In [1]:
import pandas as pd
import numpy as np

In [2]:
def process_dataframe(df, away_team):
    # Drop specified columns
    df = df.drop(['www.bulibox.de', 'Unnamed: 5'], axis=1, errors='ignore')
    
    # Rename columns
    df = df.rename(columns={'Unnamed: 1': 'date', 
                            'Unnamed: 2':'matchday_nr', 
                            'Markiere Verein: ': 'home_team_name', 
                            away_team: 'away_team_name', 
                            'Unnamed: 6': 'hist_home_team_win', 
                            'Unnamed: 7': 'hist_draw', 
                            'Unnamed: 8': 'hist_away_team_win', 
                            'Unnamed: 9': 'hist_goal_dif'})
    
    # Drop first 3 rows
    df = df.drop(df.index[:3])
    
    # Drop last 10 rows
    df = df.drop(df.tail(10).index)
    
    # Convert 'matchday_nr', 'hist_home_team_win', 'hist_draw', 'hist_away_team_win' to integer
    cols_to_convert = ['matchday_nr', 'hist_home_team_win', 'hist_draw', 'hist_away_team_win']
    for col in cols_to_convert:
        df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
    
    return df

In [3]:
def process_dataframe_2(df, away_team):
    # Drop specified columns
    df = df.drop(['www.bulibox.de', 'Unnamed: 5'], axis=1)
    
    # Rename columns
    df = df.rename(columns={'Unnamed: 1': 'date', 
                            'Unnamed: 2': 'matchday_nr', 
                            'Markiere Verein: ': 'home_team_name', 
                            away_team: 'away_team_name', 
                            'Unnamed: 6': 'hist_home_team_win', 
                            'Unnamed: 7': 'hist_draw', 
                            'Unnamed: 8': 'hist_away_team_win', 
                            'Unnamed: 9': 'hist_goal_dif'})
    
    # Drop first 3 rows
    df = df.drop(df.index[:3])
    
    # Convert 'matchday_nr', 'hist_home_team_win', 'hist_draw', 'hist_away_team_win' to integer
    cols_to_convert = ['matchday_nr', 'hist_home_team_win', 'hist_draw', 'hist_away_team_win']
    for col in cols_to_convert:
        df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
    
    return df

In [4]:
def calculate_historical_goal_difference(df, column_name):
    def calc_diff(x):
        try:
            if isinstance(x, str):  # Check if x is a string
                return int(x.split(':')[0]) - int(x.split(':')[1])
            else:
                return np.nan  # If not a string, return NaN
        except ValueError:
            return np.nan  # return NaN for rows where conversion to int fails

    df[column_name] = df[column_name].apply(calc_diff)
    return df

In [5]:
def extract_year(df, date_column):
    # Convert the date column to strings
    df[date_column] = df[date_column].astype(str)

    # Extract the year from the date column
    df['season'] = df[date_column].str.split('\D').apply(lambda x: [i for i in x if len(i) == 4][0] if [i for i in x if len(i) == 4] else None)

    # Convert the 'year' column to integers
    df['season'] = df['season'].astype(float).astype('Int64')
    
    # Drop the date column
    df = df.drop(columns=[date_column])
    
    return df

# Plan 22/23

In [6]:
df0 = pd.read_excel('data/plan_22_23.xls')
df0

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,SV Werder Bremen,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2022/2023,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2022-08-05 20:30:00,1,Eintracht Frankfurt,FC Bayern München,,24,22,56,125:205
4,,06./07.08.2022,1,Borussia Dortmund,Bayer 04 Leverkusen,,36,22,30,151:132
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [7]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df0 = process_dataframe(df0, away_team)
df0

Please enter the name of the away team: SV Werder Bremen


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2022-08-05 20:30:00,1,Eintracht Frankfurt,FC Bayern München,24,22,56,125:205
4,06./07.08.2022,1,Borussia Dortmund,Bayer 04 Leverkusen,36,22,30,151:132
5,06./07.08.2022,1,1. FC Union Berlin,Hertha BSC,4,3,3,15:15
6,06./07.08.2022,1,1. FC Köln,FC Schalke 04,39,22,27,155:128
7,06./07.08.2022,1,Borussia Mönchengladbach,TSG Hoffenheim,8,12,10,55:49
...,...,...,...,...,...,...,...,...
304,2023-05-27 15:30:00,34,Borussia Mönchengladbach,FC Augsburg,9,8,7,40:32
305,2023-05-27 15:30:00,34,Eintracht Frankfurt,Sport-Club Freiburg,17,9,14,56:46
306,2023-05-27 15:30:00,34,VfL Wolfsburg,Hertha BSC,19,16,21,73:78
307,2023-05-27 15:30:00,34,VfL Bochum 1848,Bayer 04 Leverkusen,16,13,25,75:94


In [8]:
df0 = calculate_historical_goal_difference(df0, 'hist_goal_dif')
df0

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2022-08-05 20:30:00,1,Eintracht Frankfurt,FC Bayern München,24,22,56,-80
4,06./07.08.2022,1,Borussia Dortmund,Bayer 04 Leverkusen,36,22,30,19
5,06./07.08.2022,1,1. FC Union Berlin,Hertha BSC,4,3,3,0
6,06./07.08.2022,1,1. FC Köln,FC Schalke 04,39,22,27,27
7,06./07.08.2022,1,Borussia Mönchengladbach,TSG Hoffenheim,8,12,10,6
...,...,...,...,...,...,...,...,...
304,2023-05-27 15:30:00,34,Borussia Mönchengladbach,FC Augsburg,9,8,7,8
305,2023-05-27 15:30:00,34,Eintracht Frankfurt,Sport-Club Freiburg,17,9,14,10
306,2023-05-27 15:30:00,34,VfL Wolfsburg,Hertha BSC,19,16,21,-5
307,2023-05-27 15:30:00,34,VfL Bochum 1848,Bayer 04 Leverkusen,16,13,25,-19


In [9]:
df0 = extract_year(df0, 'date')
df0

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,Eintracht Frankfurt,FC Bayern München,24,22,56,-80,2022
4,1,Borussia Dortmund,Bayer 04 Leverkusen,36,22,30,19,2022
5,1,1. FC Union Berlin,Hertha BSC,4,3,3,0,2022
6,1,1. FC Köln,FC Schalke 04,39,22,27,27,2022
7,1,Borussia Mönchengladbach,TSG Hoffenheim,8,12,10,6,2022
...,...,...,...,...,...,...,...,...
304,34,Borussia Mönchengladbach,FC Augsburg,9,8,7,8,2023
305,34,Eintracht Frankfurt,Sport-Club Freiburg,17,9,14,10,2023
306,34,VfL Wolfsburg,Hertha BSC,19,16,21,-5,2023
307,34,VfL Bochum 1848,Bayer 04 Leverkusen,16,13,25,-19,2023


# Plan 21/22

In [10]:
df1 = pd.read_excel('data/plan_21_22.xls')
df1

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,VfL Bochum,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2021/2022,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2021-08-13 20:30:00,1,Borussia Mönchengladbach,FC Bayern München,,26,29,51,134:206
4,,14./15.08.2021,1,Borussia Dortmund,Eintracht Frankfurt,,45,20,31,180:128
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [11]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df1 = process_dataframe(df1, away_team)
df1

Please enter the name of the away team: VfL Bochum


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2021-08-13 20:30:00,1,Borussia Mönchengladbach,FC Bayern München,26,29,51,134:206
4,14./15.08.2021,1,Borussia Dortmund,Eintracht Frankfurt,45,20,31,180:128
5,14./15.08.2021,1,VfL Wolfsburg,VfL Bochum,9,4,11,35:38
6,14./15.08.2021,1,1. FC Union Berlin,Bayer 04 Leverkusen,1,1,2,4:6
7,14./15.08.2021,1,VfB Stuttgart,SpVgg Greuther Fürth,5,1,4,12:8
...,...,...,...,...,...,...,...,...
304,2022-05-14 15:30:00,34,Borussia Mönchengladbach,TSG Hoffenheim,7,11,10,49:47
305,2022-05-14 15:30:00,34,VfB Stuttgart,1. FC Köln,30,25,35,149:145
306,2022-05-14 15:30:00,34,1. FSV Mainz 05,Eintracht Frankfurt,10,15,9,45:44
307,2022-05-14 15:30:00,34,FC Augsburg,SpVgg Greuther Fürth,8,8,10,37:37


In [12]:
df1 = calculate_historical_goal_difference(df1, 'hist_goal_dif')
df1

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2021-08-13 20:30:00,1,Borussia Mönchengladbach,FC Bayern München,26,29,51,-72
4,14./15.08.2021,1,Borussia Dortmund,Eintracht Frankfurt,45,20,31,52
5,14./15.08.2021,1,VfL Wolfsburg,VfL Bochum,9,4,11,-3
6,14./15.08.2021,1,1. FC Union Berlin,Bayer 04 Leverkusen,1,1,2,-2
7,14./15.08.2021,1,VfB Stuttgart,SpVgg Greuther Fürth,5,1,4,4
...,...,...,...,...,...,...,...,...
304,2022-05-14 15:30:00,34,Borussia Mönchengladbach,TSG Hoffenheim,7,11,10,2
305,2022-05-14 15:30:00,34,VfB Stuttgart,1. FC Köln,30,25,35,4
306,2022-05-14 15:30:00,34,1. FSV Mainz 05,Eintracht Frankfurt,10,15,9,1
307,2022-05-14 15:30:00,34,FC Augsburg,SpVgg Greuther Fürth,8,8,10,0


In [13]:
df1 = extract_year(df1, 'date')
df1

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,Borussia Mönchengladbach,FC Bayern München,26,29,51,-72,2021
4,1,Borussia Dortmund,Eintracht Frankfurt,45,20,31,52,2021
5,1,VfL Wolfsburg,VfL Bochum,9,4,11,-3,2021
6,1,1. FC Union Berlin,Bayer 04 Leverkusen,1,1,2,-2,2021
7,1,VfB Stuttgart,SpVgg Greuther Fürth,5,1,4,4,2021
...,...,...,...,...,...,...,...,...
304,34,Borussia Mönchengladbach,TSG Hoffenheim,7,11,10,2,2022
305,34,VfB Stuttgart,1. FC Köln,30,25,35,4,2022
306,34,1. FSV Mainz 05,Eintracht Frankfurt,10,15,9,1,2022
307,34,FC Augsburg,SpVgg Greuther Fürth,8,8,10,0,2022


# Plan 20/21

In [14]:
df2 = pd.read_excel('data/plan_20_21.xls')
df2

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,DSC Arminia Bielefeld,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2020/2021,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,18.-21.09.2020,1,FC Bayern München,FC Schalke 04,,54,28,18,214:104
4,,18.-21.09.2020,1,Borussia Dortmund,Borussia Mönchengladbach,,39,28,29,167:151
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [15]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df2 = process_dataframe(df2, away_team)
df2

Please enter the name of the away team: DSC Arminia Bielefeld


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,18.-21.09.2020,1,FC Bayern München,FC Schalke 04,54,28,18,214:104
4,18.-21.09.2020,1,Borussia Dortmund,Borussia Mönchengladbach,39,28,29,167:151
5,18.-21.09.2020,1,RB Leipzig,1. FSV Mainz 05,5,2,1,28:12
6,18.-21.09.2020,1,VfL Wolfsburg,Bayer 04 Leverkusen,18,7,23,76:89
7,18.-21.09.2020,1,Eintracht Frankfurt,DSC Arminia Bielefeld,11,9,8,42:38
...,...,...,...,...,...,...,...,...
304,2021-05-22 15:30:00,34,Eintracht Frankfurt,Sport-Club Freiburg,15,8,13,48:41
305,2021-05-22 15:30:00,34,1. FC Union Berlin,RB Leipzig,1,1,4,6:15
306,2021-05-22 15:30:00,34,1. FC Köln,FC Schalke 04,37,22,27,152:127
307,2021-05-22 15:30:00,34,SV Werder Bremen,Borussia Mönchengladbach,38,25,39,154:172


In [16]:
df2 = calculate_historical_goal_difference(df2, 'hist_goal_dif')
df2

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,18.-21.09.2020,1,FC Bayern München,FC Schalke 04,54,28,18,110
4,18.-21.09.2020,1,Borussia Dortmund,Borussia Mönchengladbach,39,28,29,16
5,18.-21.09.2020,1,RB Leipzig,1. FSV Mainz 05,5,2,1,16
6,18.-21.09.2020,1,VfL Wolfsburg,Bayer 04 Leverkusen,18,7,23,-13
7,18.-21.09.2020,1,Eintracht Frankfurt,DSC Arminia Bielefeld,11,9,8,4
...,...,...,...,...,...,...,...,...
304,2021-05-22 15:30:00,34,Eintracht Frankfurt,Sport-Club Freiburg,15,8,13,7
305,2021-05-22 15:30:00,34,1. FC Union Berlin,RB Leipzig,1,1,4,-9
306,2021-05-22 15:30:00,34,1. FC Köln,FC Schalke 04,37,22,27,25
307,2021-05-22 15:30:00,34,SV Werder Bremen,Borussia Mönchengladbach,38,25,39,-18


In [17]:
df2 = extract_year(df2, 'date')
df2

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,FC Schalke 04,54,28,18,110,2020
4,1,Borussia Dortmund,Borussia Mönchengladbach,39,28,29,16,2020
5,1,RB Leipzig,1. FSV Mainz 05,5,2,1,16,2020
6,1,VfL Wolfsburg,Bayer 04 Leverkusen,18,7,23,-13,2020
7,1,Eintracht Frankfurt,DSC Arminia Bielefeld,11,9,8,4,2020
...,...,...,...,...,...,...,...,...
304,34,Eintracht Frankfurt,Sport-Club Freiburg,15,8,13,7,2021
305,34,1. FC Union Berlin,RB Leipzig,1,1,4,-9,2021
306,34,1. FC Köln,FC Schalke 04,37,22,27,25,2021
307,34,SV Werder Bremen,Borussia Mönchengladbach,38,25,39,-18,2021


# Plan 19/20

In [18]:
df3 = pd.read_excel('data/plan_19_20.xls')
df3

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,1. FC Union Berlin,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2019/2020,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2019-08-16 20:30:00,1,FC Bayern München,Hertha BSC,,39,19,10,153:73
4,,17./18.08.2019,1,Borussia Dortmund,FC Augsburg,,9,5,2,38:19
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [19]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df3 = process_dataframe(df3, away_team)
df3

Please enter the name of the away team: 1. FC Union Berlin


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2019-08-16 20:30:00,1,FC Bayern München,Hertha BSC,39,19,10,153:73
4,17./18.08.2019,1,Borussia Dortmund,FC Augsburg,9,5,2,38:19
5,17./18.08.2019,1,Bayer 04 Leverkusen,SC Paderborn 07,1,1,0,5:2
6,17./18.08.2019,1,Borussia Mönchengladbach,FC Schalke 04,38,27,27,152:109
7,17./18.08.2019,1,VfL Wolfsburg,1. FC Köln,12,9,5,52:29
...,...,...,...,...,...,...,...,...
304,2020-05-16 15:30:00,34,Eintracht Frankfurt,SC Paderborn 07,1,1,2,7:7
305,2020-05-16 15:30:00,34,SV Werder Bremen,1. FC Köln,34,24,34,148:155
306,2020-05-16 15:30:00,34,Sport-Club Freiburg,FC Schalke 04,17,8,23,56:72
307,2020-05-16 15:30:00,34,FC Augsburg,RB Leipzig,1,3,2,4:6


In [20]:
df3 = calculate_historical_goal_difference(df3, 'hist_goal_dif')
df3

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2019-08-16 20:30:00,1,FC Bayern München,Hertha BSC,39,19,10,80.0
4,17./18.08.2019,1,Borussia Dortmund,FC Augsburg,9,5,2,19.0
5,17./18.08.2019,1,Bayer 04 Leverkusen,SC Paderborn 07,1,1,0,3.0
6,17./18.08.2019,1,Borussia Mönchengladbach,FC Schalke 04,38,27,27,43.0
7,17./18.08.2019,1,VfL Wolfsburg,1. FC Köln,12,9,5,23.0
...,...,...,...,...,...,...,...,...
304,2020-05-16 15:30:00,34,Eintracht Frankfurt,SC Paderborn 07,1,1,2,0.0
305,2020-05-16 15:30:00,34,SV Werder Bremen,1. FC Köln,34,24,34,-7.0
306,2020-05-16 15:30:00,34,Sport-Club Freiburg,FC Schalke 04,17,8,23,-16.0
307,2020-05-16 15:30:00,34,FC Augsburg,RB Leipzig,1,3,2,-2.0


In [21]:
df3 = extract_year(df3, 'date')
df3

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,Hertha BSC,39,19,10,80.0,2019
4,1,Borussia Dortmund,FC Augsburg,9,5,2,19.0,2019
5,1,Bayer 04 Leverkusen,SC Paderborn 07,1,1,0,3.0,2019
6,1,Borussia Mönchengladbach,FC Schalke 04,38,27,27,43.0,2019
7,1,VfL Wolfsburg,1. FC Köln,12,9,5,23.0,2019
...,...,...,...,...,...,...,...,...
304,34,Eintracht Frankfurt,SC Paderborn 07,1,1,2,0.0,2020
305,34,SV Werder Bremen,1. FC Köln,34,24,34,-7.0,2020
306,34,Sport-Club Freiburg,FC Schalke 04,17,8,23,-16.0,2020
307,34,FC Augsburg,RB Leipzig,1,3,2,-2.0,2020


# Plan 18/19

In [22]:
df4 = pd.read_excel('data/plan_18_19.xls')
df4

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,1. FC Nürnberg,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2018/2019,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2018-08-24 20:30:00,1,FC Bayern München,TSG 1899 Hoffenheim,,13,5,2,44:17
4,,25./26.08.2018,1,Borussia Dortmund,RB Leipzig,,1,1,2,4:5
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [23]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df4 = process_dataframe(df4, away_team)
df4

Please enter the name of the away team: 1. FC Nürnberg


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2018-08-24 20:30:00,1,FC Bayern München,TSG 1899 Hoffenheim,13,5,2,44:17
4,25./26.08.2018,1,Borussia Dortmund,RB Leipzig,1,1,2,4:5
5,25./26.08.2018,1,Borussia Mönchengladbach,Bayer 04 Leverkusen,19,26,27,106:137
6,25./26.08.2018,1,Hertha BSC,1. FC Nürnberg,13,8,17,53:54
7,25./26.08.2018,1,SV Werder Bremen,Hannover 96,31,16,13,134:76
...,...,...,...,...,...,...,...,...
304,2019-05-18 15:30:00,34,SV Werder Bremen,RB Leipzig,1,1,2,5:6
305,2019-05-18 15:30:00,34,Sport-Club Freiburg,1. FC Nürnberg,11,5,12,39:43
306,2019-05-18 15:30:00,34,1. FSV Mainz 05,TSG 1899 Hoffenheim,7,6,7,33:36
307,2019-05-18 15:30:00,34,VfL Wolfsburg,FC Augsburg,3,5,6,10:14


In [24]:
df4 = calculate_historical_goal_difference(df4, 'hist_goal_dif')
df4

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2018-08-24 20:30:00,1,FC Bayern München,TSG 1899 Hoffenheim,13,5,2,27
4,25./26.08.2018,1,Borussia Dortmund,RB Leipzig,1,1,2,-1
5,25./26.08.2018,1,Borussia Mönchengladbach,Bayer 04 Leverkusen,19,26,27,-31
6,25./26.08.2018,1,Hertha BSC,1. FC Nürnberg,13,8,17,-1
7,25./26.08.2018,1,SV Werder Bremen,Hannover 96,31,16,13,58
...,...,...,...,...,...,...,...,...
304,2019-05-18 15:30:00,34,SV Werder Bremen,RB Leipzig,1,1,2,-1
305,2019-05-18 15:30:00,34,Sport-Club Freiburg,1. FC Nürnberg,11,5,12,-4
306,2019-05-18 15:30:00,34,1. FSV Mainz 05,TSG 1899 Hoffenheim,7,6,7,-3
307,2019-05-18 15:30:00,34,VfL Wolfsburg,FC Augsburg,3,5,6,-4


In [25]:
df4 = extract_year(df4, 'date')
df4

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,TSG 1899 Hoffenheim,13,5,2,27,2018
4,1,Borussia Dortmund,RB Leipzig,1,1,2,-1,2018
5,1,Borussia Mönchengladbach,Bayer 04 Leverkusen,19,26,27,-31,2018
6,1,Hertha BSC,1. FC Nürnberg,13,8,17,-1,2018
7,1,SV Werder Bremen,Hannover 96,31,16,13,58,2018
...,...,...,...,...,...,...,...,...
304,34,SV Werder Bremen,RB Leipzig,1,1,2,-1,2019
305,34,Sport-Club Freiburg,1. FC Nürnberg,11,5,12,-4,2019
306,34,1. FSV Mainz 05,TSG 1899 Hoffenheim,7,6,7,-3,2019
307,34,VfL Wolfsburg,FC Augsburg,3,5,6,-4,2019


# Plan 17/18

In [26]:
df5 = pd.read_excel('data/plan_17_18.xls')
df5

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,VfB Stuttgart,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2017/2018,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2017-08-18 20:30:00,1,FC Bayern München,Bayer 04 Leverkusen,,44,16,16,141:88
4,,19./20.08.2017,1,1. FSV Mainz 05,Hannover 96,,14,11,13,46:47
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [27]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df5 = process_dataframe(df5, away_team)
df5

Please enter the name of the away team: VfB Stuttgart


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2017-08-18 20:30:00,1,FC Bayern München,Bayer 04 Leverkusen,44,16,16,141:88
4,19./20.08.2017,1,1. FSV Mainz 05,Hannover 96,14,11,13,46:47
5,19./20.08.2017,1,1899 Hoffenheim,Werder Bremen,2,7,9,29:38
6,19./20.08.2017,1,Borussia Mönchengladbach,1. FC Köln,48,19,23,179:118
7,19./20.08.2017,1,FC Schalke 04,RB Leipzig,0,1,1,2:3
...,...,...,...,...,...,...,...,...
304,2018-05-12 15:30:00,34,FC Schalke 04,Eintracht Frankfurt,32,23,31,126:125
305,2018-05-12 15:30:00,34,Hamburger SV,Borussia Mönchengladbach,39,26,33,152:150
306,2018-05-12 15:30:00,34,Hertha BSC,RB Leipzig,0,0,2,1:6
307,2018-05-12 15:30:00,34,SC Freiburg,FC Augsburg,12,6,4,32:20


In [28]:
df5 = calculate_historical_goal_difference(df5, 'hist_goal_dif')
df5

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2017-08-18 20:30:00,1,FC Bayern München,Bayer 04 Leverkusen,44,16,16,53.0
4,19./20.08.2017,1,1. FSV Mainz 05,Hannover 96,14,11,13,-1.0
5,19./20.08.2017,1,1899 Hoffenheim,Werder Bremen,2,7,9,-9.0
6,19./20.08.2017,1,Borussia Mönchengladbach,1. FC Köln,48,19,23,61.0
7,19./20.08.2017,1,FC Schalke 04,RB Leipzig,0,1,1,-1.0
...,...,...,...,...,...,...,...,...
304,2018-05-12 15:30:00,34,FC Schalke 04,Eintracht Frankfurt,32,23,31,1.0
305,2018-05-12 15:30:00,34,Hamburger SV,Borussia Mönchengladbach,39,26,33,2.0
306,2018-05-12 15:30:00,34,Hertha BSC,RB Leipzig,0,0,2,-5.0
307,2018-05-12 15:30:00,34,SC Freiburg,FC Augsburg,12,6,4,12.0


In [29]:
df5 = extract_year(df5, 'date')
df5

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,Bayer 04 Leverkusen,44,16,16,53.0,2017
4,1,1. FSV Mainz 05,Hannover 96,14,11,13,-1.0,2017
5,1,1899 Hoffenheim,Werder Bremen,2,7,9,-9.0,2017
6,1,Borussia Mönchengladbach,1. FC Köln,48,19,23,61.0,2017
7,1,FC Schalke 04,RB Leipzig,0,1,1,-1.0,2017
...,...,...,...,...,...,...,...,...
304,34,FC Schalke 04,Eintracht Frankfurt,32,23,31,1.0,2018
305,34,Hamburger SV,Borussia Mönchengladbach,39,26,33,2.0,2018
306,34,Hertha BSC,RB Leipzig,0,0,2,-5.0,2018
307,34,SC Freiburg,FC Augsburg,12,6,4,12.0,2018


# Plan 16/17

In [30]:
df6 = pd.read_excel('data/plan_16_17.xls')
df6

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,Werder Bremen,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2016/2017,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2016-08-26 20:30:00,1,FC Bayern München,Werder Bremen,,49,25,26,194:119
4,,27./28.08.2016,1,Borussia Dortmund,1. FSV Mainz 05,,11,6,3,33:17
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [31]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df6 = process_dataframe(df6, away_team)
df6

Please enter the name of the away team: Werder Bremen


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2016-08-26 20:30:00,1,FC Bayern München,Werder Bremen,49,25,26,194:119
4,27./28.08.2016,1,Borussia Dortmund,1. FSV Mainz 05,11,6,3,33:17
5,27./28.08.2016,1,Borussia Mönchengladbach,Bayer 04 Leverkusen,17,26,25,100:127
6,27./28.08.2016,1,Hertha BSC,Sport-Club Freiburg,12,14,8,42:36
7,27./28.08.2016,1,1. FC Köln,SV Darmstadt 98,4,2,0,12:5
...,...,...,...,...,...,...,...,...
304,2017-05-20 15:30:00,34,1. FC Köln,1. FSV Mainz 05,7,5,8,21:28
305,2017-05-20 15:30:00,34,Hamburger SV,VfL Wolfsburg,9,14,15,48:62
306,2017-05-20 15:30:00,34,FC Ingolstadt 04,FC Schalke 04,1,1,0,4:1
307,2017-05-20 15:30:00,34,1899 Hoffenheim,FC Augsburg 1907,6,3,3,19:13


In [32]:
df6 = calculate_historical_goal_difference(df6, 'hist_goal_dif')
df6

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2016-08-26 20:30:00,1,FC Bayern München,Werder Bremen,49,25,26,75.0
4,27./28.08.2016,1,Borussia Dortmund,1. FSV Mainz 05,11,6,3,16.0
5,27./28.08.2016,1,Borussia Mönchengladbach,Bayer 04 Leverkusen,17,26,25,-27.0
6,27./28.08.2016,1,Hertha BSC,Sport-Club Freiburg,12,14,8,6.0
7,27./28.08.2016,1,1. FC Köln,SV Darmstadt 98,4,2,0,7.0
...,...,...,...,...,...,...,...,...
304,2017-05-20 15:30:00,34,1. FC Köln,1. FSV Mainz 05,7,5,8,-7.0
305,2017-05-20 15:30:00,34,Hamburger SV,VfL Wolfsburg,9,14,15,-14.0
306,2017-05-20 15:30:00,34,FC Ingolstadt 04,FC Schalke 04,1,1,0,3.0
307,2017-05-20 15:30:00,34,1899 Hoffenheim,FC Augsburg 1907,6,3,3,6.0


In [33]:
df6 = extract_year(df6, 'date')
df6

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,Werder Bremen,49,25,26,75.0,2016
4,1,Borussia Dortmund,1. FSV Mainz 05,11,6,3,16.0,2016
5,1,Borussia Mönchengladbach,Bayer 04 Leverkusen,17,26,25,-27.0,2016
6,1,Hertha BSC,Sport-Club Freiburg,12,14,8,6.0,2016
7,1,1. FC Köln,SV Darmstadt 98,4,2,0,7.0,2016
...,...,...,...,...,...,...,...,...
304,34,1. FC Köln,1. FSV Mainz 05,7,5,8,-7.0,2017
305,34,Hamburger SV,VfL Wolfsburg,9,14,15,-14.0,2017
306,34,FC Ingolstadt 04,FC Schalke 04,1,1,0,3.0,2017
307,34,1899 Hoffenheim,FC Augsburg 1907,6,3,3,6.0,2017


# Plan 15/16

In [34]:
df7 = pd.read_excel('data/plan_15_16.xls')
df7

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,SV Darmstadt 98,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2015/2016,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2015-08-14 20:30:00,1,FC Bayern München,Hamburger SV,,59,22,19,224:100
4,,15./16.08.2015,1,Borussia Dortmund,Borussia Mönchengladbach,,29,28,29,139:144
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [35]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df7 = process_dataframe(df7, away_team)
df7

Please enter the name of the away team: SV Darmstadt 98


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2015-08-14 20:30:00,1,FC Bayern München,Hamburger SV,59,22,19,224:100
4,15./16.08.2015,1,Borussia Dortmund,Borussia Mönchengladbach,29,28,29,139:144
5,15./16.08.2015,1,Bayer 04 Leverkusen,1899 Hoffenheim,12,1,1,34:11
6,15./16.08.2015,1,VfL Wolfsburg,Eintracht Frankfurt,13,11,4,46:35
7,15./16.08.2015,1,VfB Stuttgart,1. FC Köln,26,24,34,138:137
...,...,...,...,...,...,...,...,...
304,2016-05-14 15:30:00,34,FC Augsburg 1907,Hamburger SV,5,1,2,12:8
305,2016-05-14 15:30:00,34,Werder Bremen,Eintracht Frankfurt,39,19,32,143:131
306,2016-05-14 15:30:00,34,1899 Hoffenheim,FC Schalke 04,5,4,5,18:25
307,2016-05-14 15:30:00,34,1. FSV Mainz 05,Hertha BSC,4,12,10,23:41


In [36]:
df7 = calculate_historical_goal_difference(df7, 'hist_goal_dif')
df7

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2015-08-14 20:30:00,1,FC Bayern München,Hamburger SV,59,22,19,124.0
4,15./16.08.2015,1,Borussia Dortmund,Borussia Mönchengladbach,29,28,29,-5.0
5,15./16.08.2015,1,Bayer 04 Leverkusen,1899 Hoffenheim,12,1,1,23.0
6,15./16.08.2015,1,VfL Wolfsburg,Eintracht Frankfurt,13,11,4,11.0
7,15./16.08.2015,1,VfB Stuttgart,1. FC Köln,26,24,34,1.0
...,...,...,...,...,...,...,...,...
304,2016-05-14 15:30:00,34,FC Augsburg 1907,Hamburger SV,5,1,2,4.0
305,2016-05-14 15:30:00,34,Werder Bremen,Eintracht Frankfurt,39,19,32,12.0
306,2016-05-14 15:30:00,34,1899 Hoffenheim,FC Schalke 04,5,4,5,-7.0
307,2016-05-14 15:30:00,34,1. FSV Mainz 05,Hertha BSC,4,12,10,-18.0


In [37]:
df7 = extract_year(df7, 'date')
df7

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,Hamburger SV,59,22,19,124.0,2015
4,1,Borussia Dortmund,Borussia Mönchengladbach,29,28,29,-5.0,2015
5,1,Bayer 04 Leverkusen,1899 Hoffenheim,12,1,1,23.0,2015
6,1,VfL Wolfsburg,Eintracht Frankfurt,13,11,4,11.0,2015
7,1,VfB Stuttgart,1. FC Köln,26,24,34,1.0,2015
...,...,...,...,...,...,...,...,...
304,34,FC Augsburg 1907,Hamburger SV,5,1,2,4.0,2016
305,34,Werder Bremen,Eintracht Frankfurt,39,19,32,12.0,2016
306,34,1899 Hoffenheim,FC Schalke 04,5,4,5,-7.0,2016
307,34,1. FSV Mainz 05,Hertha BSC,4,12,10,-18.0,2016


# Plan 14/15

In [38]:
df8 = pd.read_excel('data/plan_14_15.xls')
df8

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,1. FC Köln,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2014/2015,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,2014-08-22 20:30:00,1,FC Bayern München,VfL Wolfsburg,,26,5,3,75:27
4,,22.-24.08.2014,1,Borussia Dortmund,Bayer 04 Leverkusen,,27,20,25,112:106
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [39]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df8 = process_dataframe(df8, away_team)
df8

Please enter the name of the away team: 1. FC Köln


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2014-08-22 20:30:00,1,FC Bayern München,VfL Wolfsburg,26,5,3,75:27
4,22.-24.08.2014,1,Borussia Dortmund,Bayer 04 Leverkusen,27,20,25,112:106
5,22.-24.08.2014,1,Borussia Mönchengladbach,VfB Stuttgart,24,27,37,112:151
6,22.-24.08.2014,1,1899 Hoffenheim,FC Augsburg,3,3,2,11:8
7,22.-24.08.2014,1,Hannover 96,FC Schalke 04,15,14,29,67:102
...,...,...,...,...,...,...,...,...
304,2015-05-23 15:30:00,34,Hannover 96,Sport-Club Freiburg,7,12,11,38:45
305,2015-05-23 15:30:00,34,Eintracht Frankfurt,Bayer 04 Leverkusen,21,12,25,84:96
306,2015-05-23 15:30:00,34,Hamburger SV,FC Schalke 04,35,22,35,145:133
307,2015-05-23 15:30:00,34,1. FC Köln,VfL Wolfsburg,3,5,10,21:42


In [40]:
df8 = calculate_historical_goal_difference(df8, 'hist_goal_dif')
df8

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,2014-08-22 20:30:00,1,FC Bayern München,VfL Wolfsburg,26,5,3,48.0
4,22.-24.08.2014,1,Borussia Dortmund,Bayer 04 Leverkusen,27,20,25,6.0
5,22.-24.08.2014,1,Borussia Mönchengladbach,VfB Stuttgart,24,27,37,-39.0
6,22.-24.08.2014,1,1899 Hoffenheim,FC Augsburg,3,3,2,3.0
7,22.-24.08.2014,1,Hannover 96,FC Schalke 04,15,14,29,-35.0
...,...,...,...,...,...,...,...,...
304,2015-05-23 15:30:00,34,Hannover 96,Sport-Club Freiburg,7,12,11,-7.0
305,2015-05-23 15:30:00,34,Eintracht Frankfurt,Bayer 04 Leverkusen,21,12,25,-12.0
306,2015-05-23 15:30:00,34,Hamburger SV,FC Schalke 04,35,22,35,12.0
307,2015-05-23 15:30:00,34,1. FC Köln,VfL Wolfsburg,3,5,10,-21.0


In [41]:
df8 = extract_year(df8, 'date')
df8

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,VfL Wolfsburg,26,5,3,48.0,2014
4,1,Borussia Dortmund,Bayer 04 Leverkusen,27,20,25,6.0,2014
5,1,Borussia Mönchengladbach,VfB Stuttgart,24,27,37,-39.0,2014
6,1,1899 Hoffenheim,FC Augsburg,3,3,2,3.0,2014
7,1,Hannover 96,FC Schalke 04,15,14,29,-35.0,2014
...,...,...,...,...,...,...,...,...
304,34,Hannover 96,Sport-Club Freiburg,7,12,11,-7.0,2015
305,34,Eintracht Frankfurt,Bayer 04 Leverkusen,21,12,25,-12.0,2015
306,34,Hamburger SV,FC Schalke 04,35,22,35,12.0,2015
307,34,1. FC Köln,VfL Wolfsburg,3,5,10,-21.0,2015


# Plan 13/14

In [42]:
df9 = pd.read_excel('data/plan_13_14.xls')
df9

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,Hertha BSC Berlin,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2013/2014,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,09.08.2013 - Fr 20:30,1,FC Bayern München,Borussia Mönchengladbach,,43,27,20,174:114
4,,10./11.08.2013,1,FC Schalke 04,Hamburger SV,,34,21,35,127:142
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [43]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df9 = process_dataframe(df9, away_team)
df9

Please enter the name of the away team: Hertha BSC Berlin


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,09.08.2013 - Fr 20:30,1,FC Bayern München,Borussia Mönchengladbach,43,27,20,174:114
4,10./11.08.2013,1,FC Schalke 04,Hamburger SV,34,21,35,127:142
5,10./11.08.2013,1,Bayer 04 Leverkusen,Sport-Club Freiburg,13,8,7,49:32
6,10./11.08.2013,1,Hannover 96,VfL Wolfsburg,13,6,15,53:53
7,10./11.08.2013,1,1899 Hoffenheim,1. FC Nürnberg,4,2,2,14:10
...,...,...,...,...,...,...,...,...
304,10.05.2014 - Sa 15:30,34,VfL Wolfsburg,Borussia Mönchengladbach,15,3,8,45:31
305,10.05.2014 - Sa 15:30,34,1899 Hoffenheim,Eintracht Braunschweig,,,,-
306,10.05.2014 - Sa 15:30,34,1. FSV Mainz 05,Hamburger SV,3,5,6,13:16
307,10.05.2014 - Sa 15:30,34,FC Augsburg 1907,Eintracht Frankfurt,1,0,1,4:4


In [44]:
df9 = calculate_historical_goal_difference(df9, 'hist_goal_dif')
df9

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,09.08.2013 - Fr 20:30,1,FC Bayern München,Borussia Mönchengladbach,43,27,20,60.0
4,10./11.08.2013,1,FC Schalke 04,Hamburger SV,34,21,35,-15.0
5,10./11.08.2013,1,Bayer 04 Leverkusen,Sport-Club Freiburg,13,8,7,17.0
6,10./11.08.2013,1,Hannover 96,VfL Wolfsburg,13,6,15,0.0
7,10./11.08.2013,1,1899 Hoffenheim,1. FC Nürnberg,4,2,2,4.0
...,...,...,...,...,...,...,...,...
304,10.05.2014 - Sa 15:30,34,VfL Wolfsburg,Borussia Mönchengladbach,15,3,8,14.0
305,10.05.2014 - Sa 15:30,34,1899 Hoffenheim,Eintracht Braunschweig,,,,
306,10.05.2014 - Sa 15:30,34,1. FSV Mainz 05,Hamburger SV,3,5,6,-3.0
307,10.05.2014 - Sa 15:30,34,FC Augsburg 1907,Eintracht Frankfurt,1,0,1,0.0


In [45]:
df9 = extract_year(df9, 'date')
df9

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,Borussia Mönchengladbach,43,27,20,60.0,2013
4,1,FC Schalke 04,Hamburger SV,34,21,35,-15.0,2013
5,1,Bayer 04 Leverkusen,Sport-Club Freiburg,13,8,7,17.0,2013
6,1,Hannover 96,VfL Wolfsburg,13,6,15,0.0,2013
7,1,1899 Hoffenheim,1. FC Nürnberg,4,2,2,4.0,2013
...,...,...,...,...,...,...,...,...
304,34,VfL Wolfsburg,Borussia Mönchengladbach,15,3,8,14.0,2014
305,34,1899 Hoffenheim,Eintracht Braunschweig,,,,,2014
306,34,1. FSV Mainz 05,Hamburger SV,3,5,6,-3.0,2014
307,34,FC Augsburg 1907,Eintracht Frankfurt,1,0,1,0.0,2014


# Plan 12/13

In [46]:
df10 = pd.read_excel('data/plan_12_13.xls')
df10

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,Borussia Dortmund,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2012/2013,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,24.08.2012 20:30 - FR,1,Borussia Dortmund,SV Werder Bremen,,33,16,39,128:149
4,,25./26.08.2012,1,Borussia Mönchengladbach,1899 Hoffenheim,,1,3,6,12:18
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [47]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df10 = process_dataframe(df10, away_team)
df10

Please enter the name of the away team: Borussia Dortmund


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,24.08.2012 20:30 - FR,1,Borussia Dortmund,SV Werder Bremen,33,16,39,128:149
4,25./26.08.2012,1,Borussia Mönchengladbach,1899 Hoffenheim,1,3,6,12:18
5,25./26.08.2012,1,VfB Stuttgart,VfL Wolfsburg,15,5,10,46:42
6,25./26.08.2012,1,Hannover 96,FC Schalke 04,14,13,27,59:92
7,25./26.08.2012,1,Sport-Club Freiburg,1. FSV Mainz 05,9,8,7,22:25
...,...,...,...,...,...,...,...,...
304,18.05.2013 15:30 - SA,34,1. FC Nürnberg,SV Werder Bremen,15,15,28,80:105
305,18.05.2013 15:30 - SA,34,Sport-Club Freiburg,FC Schalke 04,13,5,18,46:61
306,18.05.2013 15:30 - SA,34,FC Augsburg,SpVgg Greuther Fürth,7,7,10,33:35
307,18.05.2013 15:30 - SA,34,Hamburger SV,Bayer 04 Leverkusen,25,19,22,85:92


In [48]:
df10 = calculate_historical_goal_difference(df10, 'hist_goal_dif')
df10

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,24.08.2012 20:30 - FR,1,Borussia Dortmund,SV Werder Bremen,33,16,39,-21.0
4,25./26.08.2012,1,Borussia Mönchengladbach,1899 Hoffenheim,1,3,6,-6.0
5,25./26.08.2012,1,VfB Stuttgart,VfL Wolfsburg,15,5,10,4.0
6,25./26.08.2012,1,Hannover 96,FC Schalke 04,14,13,27,-33.0
7,25./26.08.2012,1,Sport-Club Freiburg,1. FSV Mainz 05,9,8,7,-3.0
...,...,...,...,...,...,...,...,...
304,18.05.2013 15:30 - SA,34,1. FC Nürnberg,SV Werder Bremen,15,15,28,-25.0
305,18.05.2013 15:30 - SA,34,Sport-Club Freiburg,FC Schalke 04,13,5,18,-15.0
306,18.05.2013 15:30 - SA,34,FC Augsburg,SpVgg Greuther Fürth,7,7,10,-2.0
307,18.05.2013 15:30 - SA,34,Hamburger SV,Bayer 04 Leverkusen,25,19,22,-7.0


In [49]:
df10 = extract_year(df10, 'date')
df10

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,Borussia Dortmund,SV Werder Bremen,33,16,39,-21.0,2012
4,1,Borussia Mönchengladbach,1899 Hoffenheim,1,3,6,-6.0,2012
5,1,VfB Stuttgart,VfL Wolfsburg,15,5,10,4.0,2012
6,1,Hannover 96,FC Schalke 04,14,13,27,-33.0,2012
7,1,Sport-Club Freiburg,1. FSV Mainz 05,9,8,7,-3.0,2012
...,...,...,...,...,...,...,...,...
304,34,1. FC Nürnberg,SV Werder Bremen,15,15,28,-25.0,2013
305,34,Sport-Club Freiburg,FC Schalke 04,13,5,18,-15.0,2013
306,34,FC Augsburg,SpVgg Greuther Fürth,7,7,10,-2.0,2013
307,34,Hamburger SV,Bayer 04 Leverkusen,25,19,22,-7.0,2013


# Plan 11/12

In [50]:
df11 = pd.read_excel('data/plan_11_12.xls')
df11

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,Borussia Dortmund,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2011/2012,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,05.08.2011 - Fr,1,Borussia Dortmund,Hamburger SV,,30,27,31,150:159
4,,06./07.08.2011,1,SV Werder Bremen,1. FC Kaiserslautern,,31,21,32,137:130
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [51]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df11 = process_dataframe(df11, away_team)
df11

Please enter the name of the away team: Borussia Dortmund


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,05.08.2011 - Fr,1,Borussia Dortmund,Hamburger SV,30,27,31,150:159
4,06./07.08.2011,1,SV Werder Bremen,1. FC Kaiserslautern,31,21,32,137:130
5,06./07.08.2011,1,Hannover 96,1899 Hoffenheim,1,1,4,7:14
6,06./07.08.2011,1,1. FSV Mainz 05,Bayer 04 Leverkusen,4,2,4,14:15
7,06./07.08.2011,1,VfB Stuttgart,FC Schalke 04,38,14,30,133:98
...,...,...,...,...,...,...,...,...
304,05.05.2012 - Sa,34,1. FC Nürnberg,Bayer 04 Leverkusen,11,13,20,48:78
305,05.05.2012 - Sa,34,VfB Stuttgart,VfL Wolfsburg,14,5,9,43:39
306,05.05.2012 - Sa,34,1. FC Köln,FC Bayern München,23,22,35,113:150
307,05.05.2012 - Sa,34,FC Augsburg,Hamburger SV,,,,-


In [52]:
df11 = calculate_historical_goal_difference(df11, 'hist_goal_dif')
df11

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,05.08.2011 - Fr,1,Borussia Dortmund,Hamburger SV,30,27,31,-9.0
4,06./07.08.2011,1,SV Werder Bremen,1. FC Kaiserslautern,31,21,32,7.0
5,06./07.08.2011,1,Hannover 96,1899 Hoffenheim,1,1,4,-7.0
6,06./07.08.2011,1,1. FSV Mainz 05,Bayer 04 Leverkusen,4,2,4,-1.0
7,06./07.08.2011,1,VfB Stuttgart,FC Schalke 04,38,14,30,35.0
...,...,...,...,...,...,...,...,...
304,05.05.2012 - Sa,34,1. FC Nürnberg,Bayer 04 Leverkusen,11,13,20,-30.0
305,05.05.2012 - Sa,34,VfB Stuttgart,VfL Wolfsburg,14,5,9,4.0
306,05.05.2012 - Sa,34,1. FC Köln,FC Bayern München,23,22,35,-37.0
307,05.05.2012 - Sa,34,FC Augsburg,Hamburger SV,,,,


In [53]:
df11 = extract_year(df11, 'date')
df11

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,Borussia Dortmund,Hamburger SV,30,27,31,-9.0,2011
4,1,SV Werder Bremen,1. FC Kaiserslautern,31,21,32,7.0,2011
5,1,Hannover 96,1899 Hoffenheim,1,1,4,-7.0,2011
6,1,1. FSV Mainz 05,Bayer 04 Leverkusen,4,2,4,-1.0,2011
7,1,VfB Stuttgart,FC Schalke 04,38,14,30,35.0,2011
...,...,...,...,...,...,...,...,...
304,34,1. FC Nürnberg,Bayer 04 Leverkusen,11,13,20,-30.0,2012
305,34,VfB Stuttgart,VfL Wolfsburg,14,5,9,4.0,2012
306,34,1. FC Köln,FC Bayern München,23,22,35,-37.0,2012
307,34,FC Augsburg,Hamburger SV,,,,,2012


# Plan 10/11

In [54]:
df12 = pd.read_excel('data/plan_10_11.xls')
df12

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,1. FC Kaiserslautern,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2010/2011,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,20. Aug 2010 - Fr,1,FC Bayern München,VfL Wolfsburg,,19,4,3,57:24
4,,21./22.08.2010,1,Borussia Dortmund,Bayer 04 Leverkusen,,23,18,23,100:98
...,...,...,...,...,...,...,...,...,...,...
314,,,,,,,,,,
315,,Unter diesem Link hast Du die Möglichkeit den ...,,,,,,,,
316,,sowie die einzelnen Spielpläne der Mannschafte...,,,,,,,,
317,,,,,,,,,,


In [55]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df12 = process_dataframe(df12, away_team)
df12

Please enter the name of the away team: 1. FC Kaiserslautern


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,20. Aug 2010 - Fr,1,FC Bayern München,VfL Wolfsburg,19,4,3,57:24
4,21./22.08.2010,1,Borussia Dortmund,Bayer 04 Leverkusen,23,18,23,100:98
5,21./22.08.2010,1,Hamburger SV,FC Schalke 04,32,21,31,133:116
6,21./22.08.2010,1,1. FSV Mainz 05,VfB Stuttgart,2,3,5,15:18
7,21./22.08.2010,1,1899 Hoffenheim,SV Werder Bremen,0,1,3,4:8
...,...,...,...,...,...,...,...,...
304,14. Mai 2011 - Sa,34,1899 Hoffenheim,VfL Wolfsburg,1,0,3,4:12
305,14. Mai 2011 - Sa,34,1. FC Köln,FC Schalke 04,31,18,23,128:103
306,14. Mai 2011 - Sa,34,SC Freiburg,Bayer 04 Leverkusen,6,6,10,28:43
307,14. Mai 2011 - Sa,34,Hannover 96,1. FC Nürnberg,11,14,13,60:64


In [56]:
df12 = calculate_historical_goal_difference(df12, 'hist_goal_dif')
df12

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,20. Aug 2010 - Fr,1,FC Bayern München,VfL Wolfsburg,19,4,3,33
4,21./22.08.2010,1,Borussia Dortmund,Bayer 04 Leverkusen,23,18,23,2
5,21./22.08.2010,1,Hamburger SV,FC Schalke 04,32,21,31,17
6,21./22.08.2010,1,1. FSV Mainz 05,VfB Stuttgart,2,3,5,-3
7,21./22.08.2010,1,1899 Hoffenheim,SV Werder Bremen,0,1,3,-4
...,...,...,...,...,...,...,...,...
304,14. Mai 2011 - Sa,34,1899 Hoffenheim,VfL Wolfsburg,1,0,3,-8
305,14. Mai 2011 - Sa,34,1. FC Köln,FC Schalke 04,31,18,23,25
306,14. Mai 2011 - Sa,34,SC Freiburg,Bayer 04 Leverkusen,6,6,10,-15
307,14. Mai 2011 - Sa,34,Hannover 96,1. FC Nürnberg,11,14,13,-4


In [57]:
df12 = extract_year(df12, 'date')
df12

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,VfL Wolfsburg,19,4,3,33,2010
4,1,Borussia Dortmund,Bayer 04 Leverkusen,23,18,23,2,2010
5,1,Hamburger SV,FC Schalke 04,32,21,31,17,2010
6,1,1. FSV Mainz 05,VfB Stuttgart,2,3,5,-3,2010
7,1,1899 Hoffenheim,SV Werder Bremen,0,1,3,-4,2010
...,...,...,...,...,...,...,...,...
304,34,1899 Hoffenheim,VfL Wolfsburg,1,0,3,-8,2011
305,34,1. FC Köln,FC Schalke 04,31,18,23,25,2011
306,34,SC Freiburg,Bayer 04 Leverkusen,6,6,10,-15,2011
307,34,Hannover 96,1. FC Nürnberg,11,14,13,-4,2011


# Plan 09/10

In [58]:
df13 = pd.read_excel('data/plan_09_10.xls')
df13

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,1. FC Köln,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2009/2010,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,"07.08.2009, 20.30",1,VfL Wolfsburg,VfB Stuttgart,,7,4,13,33:39
4,,07.-09.08.2009,1,Borussia Dortmund,1. FC Köln,,28,19,25,109:110
...,...,...,...,...,...,...,...,...,...,...
304,,"08.05.2010, 15.30",34,FSV Mainz 05,Schalke 04,,2,2,6,8:21
305,,"08.05.2010, 15.30",34,Borussia Mönchengladbach,Bayer Leverkusen,,13,21,20,74:98
306,,"08.05.2010, 15.30",34,VfL Bochum,Hannover 96,,14,9,11,52:53
307,,"08.05.2010, 15.30",34,1. FC Nürnberg,1. FC Köln,,14,10,22,59:89


In [59]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df13 = process_dataframe_2(df13, away_team)
df13

Please enter the name of the away team: 1. FC Köln


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,"07.08.2009, 20.30",1,VfL Wolfsburg,VfB Stuttgart,7,4,13,33:39
4,07.-09.08.2009,1,Borussia Dortmund,1. FC Köln,28,19,25,109:110
5,07.-09.08.2009,1,1. FC Nürnberg,Schalke 04,12,12,20,43:64
6,07.-09.08.2009,1,SC Freiburg,Hamburger SV,3,7,10,15:39
7,07.-09.08.2009,1,Werder Bremen,Eintracht Frankfurt,38,15,27,135:113
...,...,...,...,...,...,...,...,...
304,"08.05.2010, 15.30",34,FSV Mainz 05,Schalke 04,2,2,6,8:21
305,"08.05.2010, 15.30",34,Borussia Mönchengladbach,Bayer Leverkusen,13,21,20,74:98
306,"08.05.2010, 15.30",34,VfL Bochum,Hannover 96,14,9,11,52:53
307,"08.05.2010, 15.30",34,1. FC Nürnberg,1. FC Köln,14,10,22,59:89


In [60]:
df13 = calculate_historical_goal_difference(df13, 'hist_goal_dif')
df13

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,"07.08.2009, 20.30",1,VfL Wolfsburg,VfB Stuttgart,7,4,13,-6.0
4,07.-09.08.2009,1,Borussia Dortmund,1. FC Köln,28,19,25,-1.0
5,07.-09.08.2009,1,1. FC Nürnberg,Schalke 04,12,12,20,-21.0
6,07.-09.08.2009,1,SC Freiburg,Hamburger SV,3,7,10,-24.0
7,07.-09.08.2009,1,Werder Bremen,Eintracht Frankfurt,38,15,27,22.0
...,...,...,...,...,...,...,...,...
304,"08.05.2010, 15.30",34,FSV Mainz 05,Schalke 04,2,2,6,-13.0
305,"08.05.2010, 15.30",34,Borussia Mönchengladbach,Bayer Leverkusen,13,21,20,-24.0
306,"08.05.2010, 15.30",34,VfL Bochum,Hannover 96,14,9,11,-1.0
307,"08.05.2010, 15.30",34,1. FC Nürnberg,1. FC Köln,14,10,22,-30.0


In [61]:
df13 = extract_year(df13, 'date')
df13

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,VfL Wolfsburg,VfB Stuttgart,7,4,13,-6.0,2009
4,1,Borussia Dortmund,1. FC Köln,28,19,25,-1.0,2009
5,1,1. FC Nürnberg,Schalke 04,12,12,20,-21.0,2009
6,1,SC Freiburg,Hamburger SV,3,7,10,-24.0,2009
7,1,Werder Bremen,Eintracht Frankfurt,38,15,27,22.0,2009
...,...,...,...,...,...,...,...,...
304,34,FSV Mainz 05,Schalke 04,2,2,6,-13.0,2010
305,34,Borussia Mönchengladbach,Bayer Leverkusen,13,21,20,-24.0,2010
306,34,VfL Bochum,Hannover 96,14,9,11,-1.0,2010
307,34,1. FC Nürnberg,1. FC Köln,14,10,22,-30.0,2010


# Plan 08/09

In [62]:
df14 = pd.read_excel('data/plan_08_09.xls')
df14

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,Werder Bremen,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2008/2009,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,15.08.2008,1,FC Bayern München,Hamburger SV,,51,18,17,182:91
4,,16.08.-17.08.2008,1,Bayer 04 Leverkusen,Borussia Dortmund,,23,16,21,94:92
...,...,...,...,...,...,...,...,...,...,...
304,,23.05.2009,34,FC Bayern München,VfB Stuttgart,,48,18,16,162:97
305,,23.05.2009,34,FC Energie Cottbus,Bayer 04 Leverkusen,,3,2,5,15:16
306,,23.05.2009,34,FC Schalke 04,1899 Hoffenheim,,-,-,-,-
307,,23.05.2009,34,Karlsruher SC,Hertha BSC Berlin,,6,6,8,28:30


In [63]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df14 = process_dataframe_2(df14, away_team)
df14

Please enter the name of the away team: Werder Bremen


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,15.08.2008,1,FC Bayern München,Hamburger SV,51,18,17,182:91
4,16.08.-17.08.2008,1,Bayer 04 Leverkusen,Borussia Dortmund,23,16,21,94:92
5,16.08.-17.08.2008,1,Borussia Mönchengladbach,VfB Stuttgart,22,24,30,100:128
6,16.08.-17.08.2008,1,DSC Arminia Bielefeld,Werder Bremen,8,3,19,39:67
7,16.08.-17.08.2008,1,Eintracht Frankfurt,Hertha BSC Berlin,15,11,22,69:81
...,...,...,...,...,...,...,...,...
304,23.05.2009,34,FC Bayern München,VfB Stuttgart,48,18,16,162:97
305,23.05.2009,34,FC Energie Cottbus,Bayer 04 Leverkusen,3,2,5,15:16
306,23.05.2009,34,FC Schalke 04,1899 Hoffenheim,,,,-
307,23.05.2009,34,Karlsruher SC,Hertha BSC Berlin,6,6,8,28:30


In [64]:
df14 = calculate_historical_goal_difference(df14, 'hist_goal_dif')
df14

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,15.08.2008,1,FC Bayern München,Hamburger SV,51,18,17,91.0
4,16.08.-17.08.2008,1,Bayer 04 Leverkusen,Borussia Dortmund,23,16,21,2.0
5,16.08.-17.08.2008,1,Borussia Mönchengladbach,VfB Stuttgart,22,24,30,-28.0
6,16.08.-17.08.2008,1,DSC Arminia Bielefeld,Werder Bremen,8,3,19,-28.0
7,16.08.-17.08.2008,1,Eintracht Frankfurt,Hertha BSC Berlin,15,11,22,-12.0
...,...,...,...,...,...,...,...,...
304,23.05.2009,34,FC Bayern München,VfB Stuttgart,48,18,16,65.0
305,23.05.2009,34,FC Energie Cottbus,Bayer 04 Leverkusen,3,2,5,-1.0
306,23.05.2009,34,FC Schalke 04,1899 Hoffenheim,,,,
307,23.05.2009,34,Karlsruher SC,Hertha BSC Berlin,6,6,8,-2.0


In [65]:
df14 = extract_year(df14, 'date')
df14

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,FC Bayern München,Hamburger SV,51,18,17,91.0,2008
4,1,Bayer 04 Leverkusen,Borussia Dortmund,23,16,21,2.0,2008
5,1,Borussia Mönchengladbach,VfB Stuttgart,22,24,30,-28.0,2008
6,1,DSC Arminia Bielefeld,Werder Bremen,8,3,19,-28.0,2008
7,1,Eintracht Frankfurt,Hertha BSC Berlin,15,11,22,-12.0,2008
...,...,...,...,...,...,...,...,...
304,34,FC Bayern München,VfB Stuttgart,48,18,16,65.0,2009
305,34,FC Energie Cottbus,Bayer 04 Leverkusen,3,2,5,-1.0,2009
306,34,FC Schalke 04,1899 Hoffenheim,,,,,2009
307,34,Karlsruher SC,Hertha BSC Berlin,6,6,8,-2.0,2009


# Plan 07/08

In [66]:
df15 = pd.read_excel('data/plan_07_08.xls')
df15

Unnamed: 0,www.bulibox.de,Unnamed: 1,Unnamed: 2,Markiere Verein:,1. FC Nürnberg,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,,
1,,Spielplan 1. Bundesliga 2007/2008,,,,,Direkter Vergleich\n(Alle 1.+2. BuLi-Spiele),,,
2,,Datum,Spieltag,Heimmannschaft,Gastmannschaft,,S,U,N,Tore
3,,10.08.2007 20.30,1,VfB Stuttgart,FC Schalke 04,,35,12,27,121:85
4,,11.08.-12.08.2007,1,FC Bayern München,FC Hansa Rostock,,14,2,6,46:21
...,...,...,...,...,...,...,...,...,...,...
304,,17.05.2008 15.30,34,Hannover 96,FC Energie Cottbus,,4,0,4,9:10
305,,17.05.2008 15.30,34,Borussia Dortmund,VfL Wolfsburg,,12,7,3,41:23
306,,17.05.2008 15.30,34,Hamburger SV,Karlsruher SC,,15,17,12,71:57
307,,17.05.2008 15.30,34,VfL Bochum,FC Hansa Rostock,,7,4,9,22:25


In [67]:
away_team = input("Please enter the name of the away team: ")  # Get user input for away team
df15 = process_dataframe_2(df15, away_team)
df15

Please enter the name of the away team: 1. FC Nürnberg


Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,10.08.2007 20.30,1,VfB Stuttgart,FC Schalke 04,35,12,27,121:85
4,11.08.-12.08.2007,1,FC Bayern München,FC Hansa Rostock,14,2,6,46:21
5,11.08.-12.08.2007,1,Bayer 04 Leverkusen,FC Energie Cottbus,4,1,3,13:13
6,11.08.-12.08.2007,1,1. FC Nürnberg,Karlsruher SC,18,11,11,67:49
7,11.08.-12.08.2007,1,VfL Bochum,Werder Bremen,9,13,38,60:123
...,...,...,...,...,...,...,...,...
304,17.05.2008 15.30,34,Hannover 96,FC Energie Cottbus,4,0,4,9:10
305,17.05.2008 15.30,34,Borussia Dortmund,VfL Wolfsburg,12,7,3,41:23
306,17.05.2008 15.30,34,Hamburger SV,Karlsruher SC,15,17,12,71:57
307,17.05.2008 15.30,34,VfL Bochum,FC Hansa Rostock,7,4,9,22:25


In [68]:
df15 = calculate_historical_goal_difference(df15, 'hist_goal_dif')
df15

Unnamed: 0,date,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif
3,10.08.2007 20.30,1,VfB Stuttgart,FC Schalke 04,35,12,27,36
4,11.08.-12.08.2007,1,FC Bayern München,FC Hansa Rostock,14,2,6,25
5,11.08.-12.08.2007,1,Bayer 04 Leverkusen,FC Energie Cottbus,4,1,3,0
6,11.08.-12.08.2007,1,1. FC Nürnberg,Karlsruher SC,18,11,11,18
7,11.08.-12.08.2007,1,VfL Bochum,Werder Bremen,9,13,38,-63
...,...,...,...,...,...,...,...,...
304,17.05.2008 15.30,34,Hannover 96,FC Energie Cottbus,4,0,4,-1
305,17.05.2008 15.30,34,Borussia Dortmund,VfL Wolfsburg,12,7,3,18
306,17.05.2008 15.30,34,Hamburger SV,Karlsruher SC,15,17,12,14
307,17.05.2008 15.30,34,VfL Bochum,FC Hansa Rostock,7,4,9,-3


In [69]:
df15 = extract_year(df15, 'date')
df15

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,VfB Stuttgart,FC Schalke 04,35,12,27,36,2007
4,1,FC Bayern München,FC Hansa Rostock,14,2,6,25,2007
5,1,Bayer 04 Leverkusen,FC Energie Cottbus,4,1,3,0,2007
6,1,1. FC Nürnberg,Karlsruher SC,18,11,11,18,2007
7,1,VfL Bochum,Werder Bremen,9,13,38,-63,2007
...,...,...,...,...,...,...,...,...
304,34,Hannover 96,FC Energie Cottbus,4,0,4,-1,2008
305,34,Borussia Dortmund,VfL Wolfsburg,12,7,3,18,2008
306,34,Hamburger SV,Karlsruher SC,15,17,12,14,2008
307,34,VfL Bochum,FC Hansa Rostock,7,4,9,-3,2008


# Merging the data sets

In [70]:
merged_df = pd.concat([df0, df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15])
merged_df

Unnamed: 0,matchday_nr,home_team_name,away_team_name,hist_home_team_win,hist_draw,hist_away_team_win,hist_goal_dif,season
3,1,Eintracht Frankfurt,FC Bayern München,24,22,56,-80.0,2022
4,1,Borussia Dortmund,Bayer 04 Leverkusen,36,22,30,19.0,2022
5,1,1. FC Union Berlin,Hertha BSC,4,3,3,0.0,2022
6,1,1. FC Köln,FC Schalke 04,39,22,27,27.0,2022
7,1,Borussia Mönchengladbach,TSG Hoffenheim,8,12,10,6.0,2022
...,...,...,...,...,...,...,...,...
304,34,Hannover 96,FC Energie Cottbus,4,0,4,-1.0,2008
305,34,Borussia Dortmund,VfL Wolfsburg,12,7,3,18.0,2008
306,34,Hamburger SV,Karlsruher SC,15,17,12,14.0,2008
307,34,VfL Bochum,FC Hansa Rostock,7,4,9,-3.0,2008


In [71]:
def missing_data(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum()/data.isnull().count()*100).sort_values(ascending = False)
    return pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data(merged_df)

Unnamed: 0,Total,Percent
hist_goal_dif,155,3.16585
hist_home_team_win,154,3.145425
hist_draw,154,3.145425
hist_away_team_win,154,3.145425
matchday_nr,0,0.0
home_team_name,0,0.0
away_team_name,0,0.0
season,0,0.0


In [73]:
merged_df.to_csv('merged_df.csv', index=False)