In [1]:
import pandas as pd

s2015 = pd.read_csv('national_league_2015_2016.csv', parse_dates = ['date']).iloc[5:].reset_index(drop=True)
s2016 = pd.read_csv('national_league_2016_2017.csv', parse_dates = ['date']).iloc[5:].reset_index(drop=True)
s2017 = pd.read_csv('national_league_2017_2018.csv', parse_dates = ['date']).iloc[5:].reset_index(drop=True)

In [2]:
def extract_results(season):
    df = pd.read_csv(f'national_league_{season}_{season+1}.csv', parse_dates = ['date']).iloc[5:]

    ssn_records = []

    games = df.to_dict('records')

    for game in games:
        home_rec = {
            'season': season,
            'date': game['date'],
            'team': game['home_team'],
            'opponent': game['away_team'],
            'venue': 'H',
            'goals_for': game['home_score'],
            'goals_against': game['away_score']
        }

        away_rec = {
            'season': season,
            'date': game['date'],
            'team': game['away_team'],
            'opponent': game['home_team'],
            'venue': 'A',
            'goals_for': game['away_score'],
            'goals_against': game['home_score']
        }
        ssn_records.append(home_rec)
        ssn_records.append(away_rec)
    return ssn_records

In [3]:
all_seasons = []
for season in range(2015, 2018):
    records = extract_results(season)
    all_seasons.extend(records)

In [4]:
fix = pd.DataFrame(all_seasons)
fix = fix.query('team == "Tranmere"')[['season', 'date']]
fix["prev_day"] = fix["date"] - pd.Timedelta(days=1)
fix = fix.to_dict('records')

In [5]:
df = pd.DataFrame(all_seasons)

In [6]:
df["outcome"] = df.apply(lambda row: "W" if row["goals_for"] > row["goals_against"] else "L" if row["goals_for"] < row["goals_against"] else "D", axis=1)

In [7]:
df["points"] = df.apply(lambda row: 3 if row["outcome"] == "W" else 1 if row["outcome"] == "D" else 0, axis=1)

In [8]:
def make_tab(ssn, date):
    grouped = df.query("season == @ssn & date <= @date").groupby("team")
    played = grouped.size()
    won = grouped.apply(lambda x: (x["outcome"] == "W").sum(), include_groups=False)
    drawn = grouped.apply(lambda x: (x["outcome"] == "D").sum(), include_groups=False)
    lost = grouped.apply(lambda x: (x["outcome"] == "L").sum(), include_groups=False)
    goals_for = grouped["goals_for"].sum()
    goals_against = grouped["goals_against"].sum()
    goal_diff = goals_for - goals_against
    points = grouped["points"].sum()

    summary = pd.DataFrame({
        "played": played,
        "won": won,
        "drawn": drawn,
        "lost": lost,
        "goals_for": goals_for,
        "goals_against": goals_against,
        "goal_diff": goal_diff,
        "points": points,
        "season": ssn,
        "pregame_date": date
    })

    return summary.sort_values(by=["points", "goal_diff", "goals_for"], ascending=False).reset_index(drop=False)

In [9]:
all_tabs = []

for f in fix:
    ssn = f['season']
    date = f['date']
    try:
        tab = make_tab(ssn, date)
        all_tabs.append(tab)
    except:
        print(f"Season {ssn} up to {date} failed")

In [10]:
all_tabs_df = pd.concat(all_tabs).rename(columns={"pregame_date": "game_date"})
all_tabs_df

Unnamed: 0,team,played,won,drawn,lost,goals_for,goals_against,goal_diff,points,season,game_date
0,Cheltenham,46,30,11,5,87,30,57,101,2015,2016-04-30
1,Forest Green,46,26,11,9,69,42,27,89,2015,2016-04-30
2,Braintree,46,23,12,11,56,38,18,81,2015,2016-04-30
3,Grimsby,46,22,14,10,82,45,37,80,2015,2016-04-30
4,Dover,46,23,11,12,75,53,22,80,2015,2016-04-30
...,...,...,...,...,...,...,...,...,...,...,...
17,Gateshead,1,0,0,1,1,2,-1,0,2017,2017-08-05
18,Hartlepool,1,0,0,1,0,1,-1,0,2017,2017-08-05
19,Wrexham,1,0,0,1,0,1,-1,0,2017,2017-08-05
20,FC Halifax,1,0,0,1,0,2,-2,0,2017,2017-08-05


In [11]:
missing = pd.DataFrame([
    {
        "team": "Chester",
        "played": 0,
        "won": 0,
        "drawn": 0,
        "lost": 0,
        "goals_for": 0,
        "goals_against": 0,
        "goal_diff": 0,
        "points": 0,
        "season": 2017,
        "game_date": pd.Timestamp("2017-08-07")
    },
    {
        "team": "Solihull Moors",
        "played": 0,
        "won": 0,
        "drawn": 0,
        "lost": 0,
        "goals_for": 0,
        "goals_against": 0,
        "goal_diff": 0,
        "points": 0,
        "season": 2017,
        "game_date": pd.Timestamp("2017-08-07")
    }
])

all_tabs_df = pd.concat([all_tabs_df, missing])
all_tabs_df = all_tabs_df.sort_values(by=["season", "game_date", "points", "goal_diff", "goals_for"], ascending=False).reset_index(drop=True)

In [12]:
all_tabs_df.to_csv('national_league_tabs.csv', index=False)

In [13]:
all_tabs_df["pos"] = all_tabs_df.groupby("game_date").cumcount() + 1
all_tabs_df

Unnamed: 0,team,played,won,drawn,lost,goals_for,goals_against,goal_diff,points,season,game_date,pos
0,Macclesfield,46,27,11,8,67,46,21,92,2017,2018-04-28,1
1,Tranmere,46,24,10,12,78,46,32,82,2017,2018-04-28,2
2,Sutton,46,23,10,13,67,53,14,79,2017,2018-04-28,3
3,Boreham Wood,46,20,15,11,64,47,17,75,2017,2018-04-28,4
4,Aldershot,46,20,15,11,64,52,12,75,2017,2018-04-28,5
...,...,...,...,...,...,...,...,...,...,...,...,...
3307,Macclesfield,1,0,0,1,0,1,-1,0,2015,2015-08-08,20
3308,Woking,1,0,0,1,0,1,-1,0,2015,2015-08-08,21
3309,FC Halifax,1,0,0,1,1,3,-2,0,2015,2015-08-08,22
3310,Wrexham,1,0,0,1,1,3,-2,0,2015,2015-08-08,23


In [14]:
# all_tabs_df["game_date"] = all_tabs_df["game_date"] + pd.Timedelta(days=1)

In [15]:
all_tabs_df = all_tabs_df.rename(
    columns={
        "team": "Team",
        "played": "Pld",
        "won": "W",
        "drawn": "D",
        "lost": "L",
        "goals_for": "GF",
        "goals_against": "GA",
        "goal_diff": "GD",
        "points": "Pts"
    }
)[["pos","Team","Pld","W","D","L","GF","GA","GD","Pts","game_date"]]

In [16]:
res = pd.read_csv("https://raw.githubusercontent.com/petebrown/data-updater/main/data/results.csv", parse_dates=["game_date"])

res = res.query("competition == 'National League'")

In [17]:
res = res[["game_date", "ssn_comp_game_no"]].rename(columns={"ssn_comp_game_no": "game_no"})

res

Unnamed: 0,game_date,game_no
4430,2015-08-08,1
4431,2015-08-11,2
4432,2015-08-15,3
4433,2015-08-18,4
4434,2015-08-22,5
...,...,...
4581,2018-04-14,42
4582,2018-04-17,43
4583,2018-04-21,44
4584,2018-04-24,45


In [18]:
all_tabs_df

Unnamed: 0,pos,Team,Pld,W,D,L,GF,GA,GD,Pts,game_date
0,1,Macclesfield,46,27,11,8,67,46,21,92,2018-04-28
1,2,Tranmere,46,24,10,12,78,46,32,82,2018-04-28
2,3,Sutton,46,23,10,13,67,53,14,79,2018-04-28
3,4,Boreham Wood,46,20,15,11,64,47,17,75,2018-04-28
4,5,Aldershot,46,20,15,11,64,52,12,75,2018-04-28
...,...,...,...,...,...,...,...,...,...,...,...
3307,20,Macclesfield,1,0,0,1,0,1,-1,0,2015-08-08
3308,21,Woking,1,0,0,1,0,1,-1,0,2015-08-08
3309,22,FC Halifax,1,0,0,1,1,3,-2,0,2015-08-08
3310,23,Wrexham,1,0,0,1,1,3,-2,0,2015-08-08


In [19]:
name_fixes = {
    'Cheltenham': 'Cheltenham Town',
    'Forest Green': 'Forest Green Rovers',
    'Tranmere': 'Tranmere Rovers',
    'Welling': 'Welling United',
    'Grimsby': 'Grimsby Town',
    'Kidderminster': 'Kidderminster Harriers',
    'Lincoln': 'Lincoln City',
    'Aldershot': 'Aldershot Town',
    'Dover': 'Dover Athletic',
    'Braintree': 'Braintree Town',
    'Guiseley': 'Guiseley',
    'Macclesfield': 'Macclesfield Town',
    'FC Halifax': 'FC Halifax Town',
    'Dag & Red': 'Dagenham & Redbridge',
    'Maidstone': 'Maidstone United',
    'Sutton': 'Sutton United',
    'Torquay': 'Torquay United',
    'AFC Fylde': 'AFC Fylde',
    'Ebbsfleet': 'Ebbsfleet United',
    'Maidenhead': 'Maidenhead United',
    'Hartlepool': 'Hartlepool United'
}

all_tabs_df["Team"] = all_tabs_df["Team"].replace(name_fixes)

In [20]:
all_tabs_df.to_csv('national_league_tabs.csv', index=False)