In [242]:
import pandas as pd

#Read the csv File
#Try Common Encodings: As a last resort, you can try common encodings like
#'utf-8', 'latin1', 'ISO-8859-1', etc., and see if any of them successfully decode the file without errors.

# Adjust display options
pd.set_option('display.max_columns', None)  # Show all columns

df = pd.read_csv('Bundesligadaten.txt', delimiter='\t', encoding='ISO-8859-1', header=None)

df.columns = ['date', 'time', 'localTeam', 'awayTeam', 'score', 'score-HT']


In [243]:
#df['date'].isnull().any()

In [244]:
for column in ('date', 'time'):
     # Check if the column contains NaN values
    if df[column].isnull().any():
        # Fill NaN values with corresponding values from the last row using "forward fill"
        df[column] = df[column].fillna(method='ffill')

In [245]:

# Split the date Value to create a Day of the Week       
df['DoW'] = df['date'].str.split(',').str[0]
df['date'] = df['date'].str.split(',').str[1]

# Split the score
split_values = df['score'].str.split(':')
df['localTeam-Score'] = split_values.str[0].str[-1]
df['awayTeam-Score'] = split_values.str[1].str[0]

# Split the score to half time
split_values = df['score-HT'].str.split(':')
df['localTeam-SHT'] = split_values.str[0].str[-1]
df['awayTeam-SHT'] = split_values.str[1].str[0]


# Reorder columns
df = df[['DoW', 'date', 'time', 'localTeam', 'awayTeam', 'score', 'localTeam-Score', 'awayTeam-Score', 'score-HT', 'localTeam-SHT', 'awayTeam-SHT']]

In [246]:
df.head(1)

Unnamed: 0,DoW,date,time,localTeam,awayTeam,score,localTeam-Score,awayTeam-Score,score-HT,localTeam-SHT,awayTeam-SHT
0,Fr,05.08.2022,20:30,Eintracht Frankfurt,- FC Bayern München,1:6,1,6,(0:5),0,5


In [247]:
# Triming Columns
df['score-HT'] = df['score-HT'].str.removeprefix('(').str.removesuffix(')')
df['awayTeam'] = df['awayTeam'].str.removeprefix('-')

In [248]:
df.head(1)

Unnamed: 0,DoW,date,time,localTeam,awayTeam,score,localTeam-Score,awayTeam-Score,score-HT,localTeam-SHT,awayTeam-SHT
0,Fr,05.08.2022,20:30,Eintracht Frankfurt,FC Bayern München,1:6,1,6,0:5,0,5


In [249]:
    
# Generate 'NewField' based on comparison of 'Field1' and 'Field2'
df['points-Local'] = df.apply(lambda row: 3 if row['localTeam-Score'] > row['awayTeam-Score'] else (1 if row['localTeam-Score'] == row['awayTeam-Score'] else 0), axis=1)
df['points-Away'] = df.apply(lambda row: 3 if row['points-Local'] ==  0 else (1 if row['points-Local'] == 1 else 0), axis=1)

# Reorder columns
df = df[['DoW', 'date', 'time', 'localTeam', 'points-Local', 'awayTeam', 'points-Away', 'score', 'localTeam-Score', 'awayTeam-Score', 'score-HT', 'localTeam-SHT', 'awayTeam-SHT']]

In [250]:
df.head(15)

Unnamed: 0,DoW,date,time,localTeam,points-Local,awayTeam,points-Away,score,localTeam-Score,awayTeam-Score,score-HT,localTeam-SHT,awayTeam-SHT
0,Fr,05.08.2022,20:30,Eintracht Frankfurt,0,FC Bayern München,3,1:6,1,6,0:5,0,5
1,Sa,06.08.2022,15:30,1. FC Union Berlin,3,Hertha BSC,0,3:1,3,1,1:0,1,0
2,Sa,06.08.2022,15:30,Bor. Mönchengladbach,3,TSG Hoffenheim,0,3:1,3,1,1:1,1,1
3,Sa,06.08.2022,15:30,FC Augsburg,0,SC Freiburg,3,0:4,0,4,0:0,0,0
4,Sa,06.08.2022,15:30,VfL Bochum,0,FSV Mainz 05,3,1:2,1,2,1:1,1,1
5,Sa,06.08.2022,15:30,VfL Wolfsburg,1,Werder Bremen,1,2:2,2,2,1:2,1,2
6,Sa,06.08.2022,18:30,Borussia Dortmund,3,Bayer 04 Leverkusen,0,1:0,1,0,1:0,1,0
7,So,07.08.2022,15:30,VfB Stuttgart,1,Rasenballsport Leipzig,1,1:1,1,1,1:1,1,1
8,So,07.08.2022,17:30,1. FC Köln,3,FC Schalke 04,0,3:1,3,1,0:0,0,0
9,Fr,12.08.2022,20:30,SC Freiburg,0,Borussia Dortmund,3,1:3,1,3,1:0,1,0


In [252]:
blTable = df.groupby('localTeam')['points-Local'].sum()

blTable.sort_values(ascending=False)


localTeam
Rasenballsport Leipzig    19
Bor. Mönchengladbach      18
FC Bayern München         17
1. FC Union Berlin        17
Borussia Dortmund         16
SC Freiburg               16
Eintracht Frankfurt       13
VfL Wolfsburg             12
1. FC Köln                11
Bayer 04 Leverkusen       11
VfB Stuttgart             11
TSG Hoffenheim            10
VfL Bochum                10
Werder Bremen             10
Hertha BSC                10
FC Schalke 04              7
FSV Mainz 05               7
FC Augsburg                5
Name: points-Local, dtype: int64

In [253]:
blTable_away = df.groupby('awayTeam')['points-Away'].sum()

blTable_away.sort_values(ascending=False)

awayTeam
 Eintracht Frankfurt       44
 FC Bayern München         44
 Werder Bremen             41
 SC Freiburg               41
 FC Augsburg               40
 FSV Mainz 05              39
 TSG Hoffenheim            38
 VfL Wolfsburg             38
 1. FC Union Berlin        37
 Bayer 04 Leverkusen       37
 Borussia Dortmund         36
 Rasenballsport Leipzig    36
 Bor. Mönchengladbach      34
 Hertha BSC                34
 VfB Stuttgart             33
 1. FC Köln                33
 FC Schalke 04             32
 VfL Bochum                30
Name: points-Away, dtype: int64