In [2]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("../data/results.csv")

In [4]:
df.head()

Unnamed: 0,home_team,away_team,home_goals,away_goals,result,season
0,Sheffield United,Liverpool,1.0,1.0,D,2006-2007
1,Arsenal,Aston Villa,1.0,1.0,D,2006-2007
2,Everton,Watford,2.0,1.0,H,2006-2007
3,Newcastle United,Wigan Athletic,2.0,1.0,H,2006-2007
4,Portsmouth,Blackburn Rovers,3.0,0.0,H,2006-2007


### Odredi bodove i gol razliku klubova po utakmici: "home_points", "away_points", "home_goal_diff", "away_goal_diff"

In [5]:
df["home_points"] = df["result"].map({"H": 3, "D": 1, "A": 0})
df["away_points"] = df["result"].map({"H": 0, "D": 1, "A": 3})

In [6]:
df["home_goal_diff"] = df["home_goals"] - df["away_goals"]
df["away_goal_diff"] = df["away_goals"] - df["home_goals"]

In [7]:
df.head()

Unnamed: 0,home_team,away_team,home_goals,away_goals,result,season,home_points,away_points,home_goal_diff,away_goal_diff
0,Sheffield United,Liverpool,1.0,1.0,D,2006-2007,1,1,0.0,0.0
1,Arsenal,Aston Villa,1.0,1.0,D,2006-2007,1,1,0.0,0.0
2,Everton,Watford,2.0,1.0,H,2006-2007,3,0,1.0,-1.0
3,Newcastle United,Wigan Athletic,2.0,1.0,H,2006-2007,3,0,1.0,-1.0
4,Portsmouth,Blackburn Rovers,3.0,0.0,H,2006-2007,3,0,3.0,-3.0


### Podijeli dataframe po sezonama

In [8]:
unique_seasons = df["season"].unique()
unique_seasons

array(['2006-2007', '2007-2008', '2008-2009', '2009-2010', '2010-2011',
       '2011-2012', '2012-2013', '2013-2014', '2014-2015', '2015-2016',
       '2016-2017', '2017-2018'], dtype=object)

In [9]:
season_dict = {}

for season in unique_seasons:
    season_df = df[df["season"] == season]
    season_dict[season] = season_df

print(season_dict.keys())
print("Number of seasons: " + str(len(season_dict.keys())))

dict_keys(['2006-2007', '2007-2008', '2008-2009', '2009-2010', '2010-2011', '2011-2012', '2012-2013', '2013-2014', '2014-2015', '2015-2016', '2016-2017', '2017-2018'])
Number of seasons: 12


### Stvori novi dataframe s kolima - varijable "fixture_id", "team", "points", "goal_diff"

In [10]:
season_with_fixtures = dict()

for season, season_df in season_dict.items():
    season_df = season_df.reset_index(drop=True)
    fixture_data = []
    fixture_id = 1
    for idx, row in season_df.iterrows():
        fixture_data.append({
            "fixture_id": fixture_id,
            "team": row["home_team"],
            "points": row["home_points"],
            "goal_diff": row["home_goal_diff"]
        })

        fixture_data.append({
            "fixture_id": fixture_id,
            "team": row["away_team"],
            "points": row["away_points"],
            "goal_diff": row["away_goal_diff"]
        })

        if idx % 10 == 9:
            fixture_id += 1

    season_with_fixtures[season] = pd.DataFrame(fixture_data)

In [11]:
print(season_with_fixtures.keys())
print("Number of seasons: " + str(len(season_with_fixtures.keys())))

dict_keys(['2006-2007', '2007-2008', '2008-2009', '2009-2010', '2010-2011', '2011-2012', '2012-2013', '2013-2014', '2014-2015', '2015-2016', '2016-2017', '2017-2018'])
Number of seasons: 12


In [12]:
season_with_fixtures["2006-2007"].head(n=30)

Unnamed: 0,fixture_id,team,points,goal_diff
0,1,Sheffield United,1,0.0
1,1,Liverpool,1,0.0
2,1,Arsenal,1,0.0
3,1,Aston Villa,1,0.0
4,1,Everton,3,1.0
5,1,Watford,0,-1.0
6,1,Newcastle United,3,1.0
7,1,Wigan Athletic,0,-1.0
8,1,Portsmouth,3,3.0
9,1,Blackburn Rovers,0,-3.0


### Izračunaj rang po kolu

Sample

In [13]:
sample_df = season_with_fixtures["2006-2007"]

In [14]:
sample_df["team"] = sample_df.groupby("fixture_id")["team"].transform(lambda x: sorted(x))

In [15]:
sample_df["cumulative_points"] = sample_df.groupby("team")["points"].cumsum()
sample_df.head(n=60)

Unnamed: 0,fixture_id,team,points,goal_diff,cumulative_points
0,1,Arsenal,1,0.0,1
1,1,Aston Villa,1,0.0,1
2,1,Blackburn Rovers,1,0.0,1
3,1,Bolton Wanderers,1,0.0,1
4,1,Charlton Athletic,3,1.0,3
5,1,Chelsea,0,-1.0,0
6,1,Everton,3,1.0,3
7,1,Fulham,0,-1.0,0
8,1,Liverpool,3,3.0,3
9,1,Manchester City,0,-3.0,0


### Spremi po sezoni

In [16]:
for season, season_df in season_with_fixtures.items():
    season_df["team"] = season_df.groupby("fixture_id")["team"].transform(lambda x: sorted(x))
    season_df["cumulative_points"] = season_df.groupby("team")["points"].cumsum()
    season_csv_filename = f'season_{season}.csv'
    season_df.to_csv("../data/task5/" + season_csv_filename, index=False)
    print(f"Saved {season_csv_filename}")

Saved task5_season_2006-2007.csv
Saved task5_season_2007-2008.csv
Saved task5_season_2008-2009.csv
Saved task5_season_2009-2010.csv
Saved task5_season_2010-2011.csv
Saved task5_season_2011-2012.csv
Saved task5_season_2012-2013.csv
Saved task5_season_2013-2014.csv
Saved task5_season_2014-2015.csv
Saved task5_season_2015-2016.csv
Saved task5_season_2016-2017.csv
Saved task5_season_2017-2018.csv
