### 0. Libraries

In [1]:
import numpy as np
import pandas as pd
import glob
from pathlib import Path
import os
import scipy.stats as stats

### 1. Transfers DataFrame

In [2]:
transfers = []
for year in range(1996, 2021):
    path = "data/transfers/" + str(year) + "/"
    for file in os.listdir(path):
        if not file.startswith('.'):
            df = pd.read_csv(path + file, sep=',')
            transfers.append(df)

transfers_df = pd.concat(transfers)

#### 1.1 Cleaning: transfers_df

In [3]:
transfers_df.replace(to_replace='Premier Liga', value='Premier League', inplace=True)
transfers_df.replace(to_replace='Primera Division', value='La Liga', inplace=True)
transfers_df.replace(to_replace='1 Bundesliga', value='Bundesliga', inplace=True)
transfers_df.replace(to_replace='Defender', value='Centre-Back', inplace=True)
transfers_df.replace(to_replace='Sweeper', value='Centre-Back', inplace=True)
transfers_df.replace(to_replace='Midfielder', value='Central Midfield', inplace=True)
transfers_df.replace(to_replace='Left Midfield', value='Left Winger', inplace=True)
transfers_df.replace(to_replace='Right Midfield', value='Right Winger', inplace=True)
transfers_df.replace(to_replace='Forward', value='Second Striker', inplace=True)
transfers_df.replace(to_replace='Centre-Forward', value='Striker', inplace=True)
transfers_df['fee_cleaned'] = transfers_df['fee_cleaned'].fillna(0)

#### 1.2 New DF: transfers_in

In [4]:
transfers_in = transfers_df[transfers_df['transfer_movement'].isin(['in'])]

#### 1.3 New DF: transfers_in_not_free

In [5]:
transfers_in_not_free = transfers_in[(transfers_in != 0).all(1)]

#### 1.4 Convert £ to € and apply inflation

In [6]:
change_inflation = pd.read_csv('../your-project/data/change_inflation/inflation_change.csv', sep=';')
change_inflation.columns = ['year', 'change', 'inflation']
#change_inflation

In [57]:
transfers_in_converted = pd.merge(transfers_in_not_free, change_inflation, how='left', left_on=['year'], right_on =['year'])
transfers_in_converted["fee_converted"] = transfers_in_converted["fee_cleaned"] * transfers_in_converted["change"] * transfers_in_converted["inflation"]
transfers_in_converted = transfers_in_converted[['club_name', 'player_name', 'age', 'position', 'club_involved_name',
       'fee', 'fee_converted', 'transfer_movement', 'transfer_period', 'league_name', 'year', 'season']]
#transfers_in_converted

In [58]:
transfers_in_converted.to_csv(r'../your-project/data/tableau_dfs/transfers_in_converted.csv')

#### 1.5 Transfers Statistics

In [112]:
transfers_statistics = transfers_in_converted[['position', 'fee_converted']]
# AGRUPAR POSICIONS PER ZONES DEL CAMP
transfers_statistics.replace(to_replace='Centre-Back', value='Defenders', inplace=True)
transfers_statistics.replace(to_replace='Left-Back', value='Defenders', inplace=True)
transfers_statistics.replace(to_replace='Right-Back', value='Defenders', inplace=True)
transfers_statistics.replace(to_replace='Defensive Midfield', value='Midfielders', inplace=True)
transfers_statistics.replace(to_replace='Central Midfield', value='Midfielders', inplace=True)
transfers_statistics.replace(to_replace='Left Winger', value='Midfielders', inplace=True)
transfers_statistics.replace(to_replace='Right Winger', value='Midfielders', inplace=True)
transfers_statistics.replace(to_replace='Attacking Midfield', value='Attackers', inplace=True)
transfers_statistics.replace(to_replace='Second Striker', value='Attackers', inplace=True)
transfers_statistics.replace(to_replace='Striker', value='Attackers', inplace=True)
#transfers_statistics

SyntaxError: unmatched ')' (<ipython-input-112-8204c91c5f6d>, line 14)

In [115]:
position_count = transfers_statistics.groupby('position').count().reset_index()
position_count.to_csv(r'../your-project/data/tableau_dfs/position_count.csv')

In [116]:
position_mean = transfers_statistics.groupby('position').mean().reset_index()
position_mean.to_csv(r'../your-project/data/tableau_dfs/position_mean.csv')

In [9]:
GK = transfers_statistics[transfers_statistics['position']=='Goalkeeper']
GK = GK['fee_converted'].reset_index(drop=True)
#GK

In [10]:
DF = transfers_statistics[transfers_statistics['position']=='Defenders']
DF = DF['fee_converted'].reset_index(drop=True)
#DF

In [11]:
MF = transfers_statistics[transfers_statistics['position']=='Midfielders']
MF = MF['fee_converted'].reset_index(drop=True)
#MF

In [12]:
AT = transfers_statistics[transfers_statistics['position']=='Attackers']
AT = AT['fee_converted'].reset_index(drop=True)
#AT

#### 1.6 P-Values

In [13]:
# MEGA NULL HIPOSTESIS = TOTES LES ZONES DEL CAMP SON IGUAL D'IMPORTANTS I, PER TANT, S'HAURIEN DE PAGAR IGUAL
# 6 NULL HIPOTESIS = 

# SI TOTS ELS P-VALUES (6 COMBINACIONS) SURTEN MENORS DE 0.05 VOL DIR QUE LA COMPARATIVA ENTRE ZONES DEL CAMP SON ESTADISTICAMENT DIFERENTS, ES A DIR,
# QUE SI ES PAGA MES PER UNS QUE PELS ALTRES NO ES PER CASUALITAT.

In [14]:
ttest, pvalue_gk_vs_df = stats.ttest_ind(GK, DF, equal_var=False)
ttest, pvalue_gk_vs_df

(-3.002030732360958, 0.002744739934721077)

In [15]:
ttest, pvalue_gk_vs_mf = stats.ttest_ind(GK, MF, equal_var=False)
ttest, pvalue_gk_vs_mf

(-7.827720439063066, 9.882662761524437e-15)

In [16]:
ttest, pvalue_gk_vs_at = stats.ttest_ind(GK, AT, equal_var=False)
ttest, pvalue_gk_vs_at

(-9.812522754896259, 4.1806054341417954e-22)

In [17]:
ttest, pvalue_df_vs_mf = stats.ttest_ind(DF, MF, equal_var=False)
ttest, pvalue_df_vs_mf

(-7.573218452685393, 4.103226117023789e-14)

In [18]:
ttest, pvalue_df_vs_at = stats.ttest_ind(DF, AT, equal_var=False)
ttest, pvalue_df_vs_at

(-10.21814510984094, 2.6103340504176492e-24)

In [19]:
ttest, pvalue_mf_vs_at = stats.ttest_ind(MF, AT, equal_var=False)
ttest, pvalue_mf_vs_at

(-2.9789454649519493, 0.002902198973509399)

#### 1.7 Transfers pct_change

In [54]:
transfers_year_leagues_pctch = transfers_in_converted.groupby(['year', 'league_name'])['fee_converted'].sum().reset_index()
transfers_year_leagues_pctch['pct_change'] = transfers_year_leagues_pctch.groupby(['league_name'])['fee_converted'].pct_change()
#transfers_year_leagues_pctch.head(50)

In [59]:
transfers_year_leagues_pctch.to_csv(r'../your-project/data/tableau_dfs/transfers_pctch_df.csv')

#### 1.8 Football Fun Facts

In [21]:
transfers_year_leagues_mean = transfers_in_converted.groupby(['year', 'league_name'])['fee_converted'].mean().reset_index()
#transfers_year_leagues_mean

In [22]:
transfers_year_leagues_globalsum = transfers_in_converted.groupby(['year'])['fee_converted'].sum().reset_index()
transfers_year_leagues_globalsum['pct_change'] = transfers_year_leagues_globalsum['fee_converted'].pct_change()
#transfers_year_leagues_globalsum

In [23]:
transfers_year_leagues_globalmean = transfers_in_converted.groupby(['year'])['fee_converted'].mean()
#transfers_year_leagues_globalmean

In [24]:
transfers_league_age_mean = transfers_in_converted.groupby(['year', 'league_name'])['age'].mean()
#transfers_league_age_mean

In [25]:
transfers_league_age_mean_global = transfers_in_converted.groupby(['year'])['age'].mean()
#transfers_league_age_mean_global

In [26]:
transfers_league_players_count = transfers_in_converted.groupby(['year', 'league_name'])['player_name'].count()
#transfers_league_players_count

In [27]:
transfers_league_players_globalcount = transfers_in_converted.groupby(['year'])['player_name'].count()
#transfers_league_players_globalcount

In [28]:
transfers_league_positions_sum = transfers_in_converted.groupby(['year', 'league_name', 'position'])['fee_converted'].sum()
#transfers_league_positions_sum

In [29]:
transfers_league_positions_globalsum = transfers_in_converted.groupby(['year', 'position'])['fee_converted'].sum()
#transfers_league_positions_globalsum

In [30]:
transfers_league_positions_globalmean = transfers_in_converted.groupby(['year', 'position'])['fee_converted'].mean()
#transfers_league_positions_globalmean

In [31]:
transfers_league_positions_historicsum = transfers_in_converted.groupby(['position'], sort=True)['fee_converted'].sum()
#transfers_league_positions_historicsum

In [32]:
transfers_league_positions_historicmean = transfers_in_converted.groupby(['position'])['fee_converted'].mean()
#transfers_league_positions_historicmean

In [33]:
transfers_league_positions_count = transfers_in_converted.groupby(['year', 'league_name', 'position'])['player_name'].count()
#transfers_league_positions_count

In [34]:
transfers_league_positions_historiccount = transfers_in_converted.groupby(['position'])['player_name'].count()
#transfers_league_positions_historiccount

### Economy DataFrame

In [35]:
path = r'/Users/arnauangerri/Desktop/IronHack/Week5/Project-Week-5-Your-Own-Project/your-project/data/economy/'
all_files = glob.glob(path + "/*.csv")

economy = []

for filename in all_files:
    dataframes = pd.read_csv(filename, index_col=None, header=0, sep=';')
    economy.append(dataframes)

economy_df = pd.concat(economy, axis=0, ignore_index=True)
economy_df
economy_df.columns = ['Year', 'Country', 'Anual GDP (M.€)', 'Inc. GDP (%)', 'GDP Per Capita (m.€)',
       'Annual Inc. GDP Per Capita (%)', 'Total Debt (M.€)', 'Debt (% GDP)',
       'Debt Per Capita (m.€)']
economy_df = economy_df.replace({'Francia':'France'}, regex=True)
economy_df = economy_df.replace({'Reino Unido':'UK'}, regex=True)
economy_df = economy_df.replace({'Italia':'Italy'}, regex=True)
economy_df = economy_df.replace({'España':'Spain'}, regex=True)
economy_df = economy_df.replace({'Alemania':'Germany'}, regex=True)
economy_df = economy_df.replace({'%':''}, regex=True)
economy_df = economy_df.replace({'€':''}, regex=True)
economy_df = economy_df.replace({'M.':''}, regex=True)
economy_df['Anual GDP (M.€)'] = economy_df['Anual GDP (M.€)'].str.replace('.', '').str.replace(',', '.').astype(float)
economy_df['Anual GDP (M.€)'] = economy_df['Anual GDP (M.€)'] * 1000000
economy_df['Inc. GDP (%)'] = economy_df['Inc. GDP (%)'].str.replace('.', '').str.replace(',', '.').astype(float)
economy_df['GDP Per Capita (m.€)'] = economy_df['GDP Per Capita (m.€)'].str.replace('.', '').str.replace(',', '.').astype(float)
economy_df['Annual Inc. GDP Per Capita (%)'] = economy_df['Annual Inc. GDP Per Capita (%)'].str.replace('.', '').str.replace(',', '.').astype(float)
economy_df['Total Debt (M.€)'] = economy_df['Total Debt (M.€)'].str.replace('.', '').str.replace(',', '.').astype(float)
economy_df['Total Debt (M.€)'] = economy_df['Total Debt (M.€)'] * 1000000
economy_df['Debt (% GDP)'] = economy_df['Debt (% GDP)'].str.replace('.', '').str.replace(',', '.').astype(float)
economy_df['Debt Per Capita (m.€)'] = economy_df['Debt Per Capita (m.€)'].str.replace('.', '').str.replace(',', '.').astype(float)

In [36]:
economy_df_germany = economy_df[economy_df['Country'].isin(['Germany'])].sort_values('Year')
economy_df_france = economy_df[economy_df['Country'].isin(['France'])].sort_values('Year')
economy_df_spain = economy_df[economy_df['Country'].isin(['Spain'])].sort_values('Year')
economy_df_italy = economy_df[economy_df['Country'].isin(['Italy'])].sort_values('Year')
economy_df_uk = economy_df[economy_df['Country'].isin(['UK'])].sort_values('Year')

In [37]:
economy_df_germany_pctch = economy_df_germany[['Anual GDP (M.€)', 'GDP Per Capita (m.€)', 'Total Debt (M.€)', 'Debt (% GDP)', 'Debt Per Capita (m.€)']]
economy_df_germany_pctch = economy_df_germany_pctch.pct_change()
economy_df_germany_pctch.columns = ['Var. GDP(%)', 'Var. GDPxC(%)', 'Var. TDebt(%)', 'Var. Debt (% GDP)', 'Var. DebtxC (%)']
economy_df_germany_pctch = pd.merge(economy_df_germany, economy_df_germany_pctch, how='left', left_index=True, right_index=True)
economy_df_germany_pctch = economy_df_germany_pctch[['Year', 'Country', 'Anual GDP (M.€)', 'Var. GDP(%)', 'GDP Per Capita (m.€)', 'Var. GDPxC(%)', 'Total Debt (M.€)', 'Var. TDebt(%)',
                                                     'Debt (% GDP)', 'Var. Debt (% GDP)', 'Debt Per Capita (m.€)', 'Var. DebtxC (%)']]
#economy_df_germany_pctch
#economy_df_germany_pctch.to_csv(r'../your-project/data/economy/merged_clean_data/economy_df_germany_pctch.csv')

In [38]:
economy_df_france_pctch = economy_df_france[['Anual GDP (M.€)', 'GDP Per Capita (m.€)', 'Total Debt (M.€)', 'Debt (% GDP)', 'Debt Per Capita (m.€)']]
economy_df_france_pctch = economy_df_france_pctch.pct_change()
economy_df_france_pctch.columns = ['Var. GDP(%)', 'Var. GDPxC(%)', 'Var. TDebt(%)', 'Var. Debt (% GDP)', 'Var. DebtxC (%)']
economy_df_france_pctch = pd.merge(economy_df_france, economy_df_france_pctch, how='left', left_index=True, right_index=True)
economy_df_france_pctch = economy_df_france_pctch[['Year', 'Country', 'Anual GDP (M.€)', 'Var. GDP(%)', 'GDP Per Capita (m.€)', 'Var. GDPxC(%)', 'Total Debt (M.€)', 'Var. TDebt(%)',
                                                     'Debt (% GDP)', 'Var. Debt (% GDP)', 'Debt Per Capita (m.€)', 'Var. DebtxC (%)']]
#economy_df_france_pctch
#economy_df_france_pctch.to_csv(r'../your-project/data/economy/merged_clean_data/economy_df_france_pctch.csv')

In [39]:
economy_df_spain_pctch = economy_df_spain[['Anual GDP (M.€)', 'GDP Per Capita (m.€)', 'Total Debt (M.€)', 'Debt (% GDP)', 'Debt Per Capita (m.€)']]
economy_df_spain_pctch = economy_df_spain_pctch.pct_change()
economy_df_spain_pctch.columns = ['Var. GDP(%)', 'Var. GDPxC(%)', 'Var. TDebt(%)', 'Var. Debt (% GDP)', 'Var. DebtxC (%)']
economy_df_spain_pctch = pd.merge(economy_df_spain, economy_df_spain_pctch, how='left', left_index=True, right_index=True)
economy_df_spain_pctch = economy_df_spain_pctch[['Year', 'Country', 'Anual GDP (M.€)', 'Var. GDP(%)', 'GDP Per Capita (m.€)', 'Var. GDPxC(%)', 'Total Debt (M.€)', 'Var. TDebt(%)',
                                                     'Debt (% GDP)', 'Var. Debt (% GDP)', 'Debt Per Capita (m.€)', 'Var. DebtxC (%)']]
#economy_df_spain_pctch
#economy_df_spain_pctch.to_csv(r'../your-project/data/economy/merged_clean_data/economy_df_spain_pctch.csv')

In [40]:
economy_df_italy_pctch = economy_df_italy[['Anual GDP (M.€)', 'GDP Per Capita (m.€)', 'Total Debt (M.€)', 'Debt (% GDP)', 'Debt Per Capita (m.€)']]
economy_df_italy_pctch = economy_df_italy_pctch.pct_change()
economy_df_italy_pctch.columns = ['Var. GDP(%)', 'Var. GDPxC(%)', 'Var. TDebt(%)', 'Var. Debt (% GDP)', 'Var. DebtxC (%)']
economy_df_italy_pctch = pd.merge(economy_df_italy, economy_df_italy_pctch, how='left', left_index=True, right_index=True)
economy_df_italy_pctch = economy_df_italy_pctch[['Year', 'Country', 'Anual GDP (M.€)', 'Var. GDP(%)', 'GDP Per Capita (m.€)', 'Var. GDPxC(%)', 'Total Debt (M.€)', 'Var. TDebt(%)',
                                                     'Debt (% GDP)', 'Var. Debt (% GDP)', 'Debt Per Capita (m.€)', 'Var. DebtxC (%)']]
#economy_df_italy_pctch
#economy_df_italy_pctch.to_csv(r'../your-project/data/economy/merged_clean_data/economy_df_italy_pctch.csv')

In [41]:
economy_df_uk_pctch = economy_df_uk[['Anual GDP (M.€)', 'GDP Per Capita (m.€)', 'Total Debt (M.€)', 'Debt (% GDP)', 'Debt Per Capita (m.€)']]
economy_df_uk_pctch = economy_df_uk_pctch.pct_change()
economy_df_uk_pctch.columns = ['Var. GDP(%)', 'Var. GDPxC(%)', 'Var. TDebt(%)', 'Var. Debt (% GDP)', 'Var. DebtxC (%)']
economy_df_uk_pctch = pd.merge(economy_df_uk, economy_df_uk_pctch, how='left', left_index=True, right_index=True)
economy_df_uk_pctch = economy_df_uk_pctch[['Year', 'Country', 'Anual GDP (M.€)', 'Var. GDP(%)', 'GDP Per Capita (m.€)', 'Var. GDPxC(%)', 'Total Debt (M.€)', 'Var. TDebt(%)',
                                                     'Debt (% GDP)', 'Var. Debt (% GDP)', 'Debt Per Capita (m.€)', 'Var. DebtxC (%)']]
#economy_df_uk_pctch
#economy_df_uk_pctch.to_csv(r'../your-project/data/economy/merged_clean_data/economy_df_uk_pctch.csv')

In [90]:
path = r'/Users/arnauangerri/Desktop/IronHack/Week5/Project-Week-5-Your-Own-Project/your-project/data/economy/merged_clean_data/'
all_files = glob.glob(path + "/*.csv")

economy = []

for filename in all_files:
    dataframes = pd.read_csv(filename, index_col=None, header=0)
    economy.append(dataframes)

economy_df = pd.concat(economy, axis=0, ignore_index=True)

economy_df = economy_df[['Year', 'Country', 'Anual GDP (M.€)', 'Var. GDP(%)', 'GDP Per Capita (m.€)', 'Var. GDPxC(%)', 'Total Debt (M.€)', 'Var. TDebt(%)', 
                         'Debt (% GDP)', 'Var. Debt (% GDP)', 'Debt Per Capita (m.€)', 'Var. DebtxC (%)']]

economy_df
#economy_df.to_csv(r'../your-project/data/tableau_dfs/economy_df.csv')

Unnamed: 0,Year,Country,Anual GDP (M.€),Var. GDP(%),GDP Per Capita (m.€),Var. GDPxC(%),Total Debt (M.€),Var. TDebt(%),Debt (% GDP),Var. Debt (% GDP),Debt Per Capita (m.€),Var. DebtxC (%)
0,1996,Germany,1.967956e+12,,24160.0,,1.115747e+12,,57.8,,13605.0,
1,1997,Germany,1.952618e+12,-0.007794,23960.0,-0.008278,1.142493e+12,0.023971,58.9,0.019031,13923.0,0.023374
2,1998,Germany,2.000816e+12,0.024684,24570.0,0.025459,1.199279e+12,0.049704,59.5,0.010187,14619.0,0.049989
3,1999,Germany,2.059480e+12,0.029320,25290.0,0.029304,1.238559e+12,0.032753,60.1,0.010084,15074.0,0.031124
4,2000,Germany,2.109090e+12,0.024089,25890.0,0.023725,1.245750e+12,0.005806,59.1,-0.016639,15144.0,0.004644
...,...,...,...,...,...,...,...,...,...,...,...,...
115,2015,Italy,1.655355e+12,0.017174,27260.0,0.018304,2.239409e+12,0.016528,135.3,-0.000739,36914.0,0.018711
116,2016,Italy,1.695787e+12,0.024425,27970.0,0.026045,2.285619e+12,0.020635,134.8,-0.003695,37723.0,0.021916
117,2017,Italy,1.736593e+12,0.024063,28690.0,0.025742,2.329553e+12,0.019222,134.1,-0.005193,38515.0,0.020995
118,2018,Italy,1.766168e+12,0.017030,29210.0,0.018125,2.380942e+12,0.022060,134.8,0.005220,39446.0,0.024172


### National Average Salary

In [103]:
national_avg_salary = pd.read_csv('../your-project/data/economy/national_avarage_salary/national_average_salary.csv', sep=';')
national_avg_salary['NAS_Inc/Dec_Rate'] = national_avg_salary.groupby(['Country'])['National Average Salary'].pct_change()
#national_avg_salary
national_avg_salary.to_csv(r'../your-project/data/tableau_dfs/national_avg_salary.csv')

### Unemployement Rate

In [110]:
unemployement_rate = pd.read_csv('../your-project/data/economy/unemployement_rate/unemployement_rate.csv', sep=';')
unemployement_rate = unemployement_rate.replace({'%':''}, regex=True)
unemployement_rate['Unemployment Rate (%)'] = unemployement_rate['Unemployment Rate (%)'].astype(float) 
unemployement_rate['Annual Change'] = unemployement_rate.groupby(['Country'])['Unemployment Rate (%)'].pct_change()
#unemployement_rate
unemployement_rate.to_csv(r'../your-project/data/tableau_dfs/unemployement_rate.csv')

### TV Revenue DataFrame

In [72]:
tv_revenue_df = pd.read_csv('../your-project/data/tv_stadiums/tv_big_5.csv', sep=';', thousands='.')
tv_revenue_inflation = pd.merge(tv_revenue_df, change_inflation, how='left', left_on=['Year'], right_on =['year'])
tv_revenue_inflation = tv_revenue_inflation[['Year', 'inflation', 'England', 'Italy', 'Germany', 'Spain', 'France']]
tv_revenue_inflation.columns
tv_revenue_inflation["England_infl"] = tv_revenue_inflation["England"] * tv_revenue_inflation["inflation"]
tv_revenue_inflation["Italy_infl"] = tv_revenue_inflation["Italy"] * tv_revenue_inflation["inflation"]
tv_revenue_inflation["Germany_infl"] = tv_revenue_inflation["Germany"] * tv_revenue_inflation["inflation"]
tv_revenue_inflation["Spain_infl"] = tv_revenue_inflation["Spain"] * tv_revenue_inflation["inflation"]
tv_revenue_inflation["France_infl"] = tv_revenue_inflation["France"] * tv_revenue_inflation["inflation"]
tv_revenue_inflation = tv_revenue_inflation[['Year', 'inflation', 'England', 'England_infl', 'Italy', 'Italy_infl', 
                                             'Germany', 'Germany_infl', 'Spain', 'Spain_infl', 'France', 'France_infl']]
tv_revenue_inflation['Big5_TV'] = tv_revenue_inflation_pct[['England_infl', 'Italy_infl', 'Germany_infl', 'Spain_infl', 'France_infl']].sum(axis=1)

In [73]:
tv_revenue_inflation_pctch = tv_revenue_inflation[['England_infl', 'Italy_infl', 'Germany_infl', 'Spain_infl', 'France_infl', 'Big5_TV']]
tv_revenue_inflation_pctch = tv_revenue_inflation_pctch.pct_change()
tv_revenue_inflation_pctch.columns = ['Premier_%', 'SerieA_%', 'Bundesliga_%', 'LaLiga_%', 'Ligue1_%', 'Big5_%'] 

In [77]:
tv_revenue_inflation_pct = pd.merge(tv_revenue_inflation, tv_revenue_inflation_pctch, how='left', left_index=True, right_index=True)
tv_revenue_inflation_pct = tv_revenue_inflation_pct[['Year', 'inflation', 'England', 'England_infl', 'Premier_%', 'Italy', 'Italy_infl', 'SerieA_%', 'Germany', 
                                                     'Germany_infl', 'Bundesliga_%', 'Spain', 'Spain_infl', 'LaLiga_%', 'France', 'France_infl', 'Ligue1_%', 'Big5_TV', 'Big5_%']]

tv_revenue_inflation_pct.to_csv(r'../your-project/data/tableau_dfs/tv_revenue_df.csv')
tv_revenue_inflation_pct

Unnamed: 0,Year,inflation,England,England_infl,Premier_%,Italy,Italy_infl,SerieA_%,Germany,Germany_infl,Bundesliga_%,Spain,Spain_infl,LaLiga_%,France,France_infl,Ligue1_%,Big5_TV,Big5_%
0,1996,1.5021,685,1028.9385,,551,827.6571,,444,666.9324,,524,787.1004,,293,440.1153,,3750.7437,
1,1997,1.4698,895,1315.471,0.278474,650,955.37,0.154307,569,836.3162,0.253974,513,754.0074,-0.042044,323,474.7454,0.078684,4335.91,0.156013
2,1998,1.4466,1024,1481.3184,0.126075,714,1032.8724,0.081123,577,834.6882,-0.001947,612,885.3192,0.174152,393,568.5138,0.197513,4802.712,0.10766
3,1999,1.4346,1219,1748.7774,0.180555,954,1368.6084,0.325051,681,976.9626,0.170452,722,1035.7812,0.169952,607,870.8022,0.531717,6000.9318,0.249488
4,2000,1.4089,1557,2193.6573,0.254395,1027,1446.9403,0.057235,880,1239.832,0.269068,676,952.4164,-0.080485,644,907.3316,0.041949,6740.1776,0.123189
5,2001,1.3737,1747,2399.8539,0.093997,1017,1397.0529,-0.034478,1043,1432.7691,0.155616,776,1065.9912,0.119249,643,883.2891,-0.026498,7178.9562,0.065099
6,2002,1.3455,1791,2409.7905,0.004141,1024,1377.792,-0.013787,1108,1490.814,0.040512,847,1139.6385,0.069088,689,927.0495,0.049543,7345.0845,0.023141
7,2003,1.3151,1977,2599.9527,0.078912,1052,1383.4852,0.004132,1058,1391.3758,-0.066701,953,1253.2903,0.099726,655,861.3905,-0.070826,7489.4945,0.019661
8,2004,1.289,1975,2545.775,-0.020838,1219,1571.291,0.135748,1236,1593.204,0.145057,1029,1326.381,0.058319,696,897.144,0.041507,7933.795,0.059323
9,2005,1.2593,1995,2512.3035,-0.013148,1277,1608.1261,0.023443,1195,1504.8635,-0.055448,1158,1458.2694,0.099435,910,1145.963,0.277346,8229.5255,0.037275


### Stadium Attendance DataFrame

In [78]:
attendance_df = pd.read_csv('../your-project/data/tv_stadiums/stadiums_attendance.csv', sep=';')
attendance_df = attendance_df.fillna(0)
attendance_df = attendance_df.replace({'%':''}, regex=True)
attendance_df['Average Capacity Ocupation'] = attendance_df['Average Capacity Ocupation'].str.replace(',', '.').astype(float)
attendance_df['Total Stadiums Capacity'] = attendance_df['Total Stadiums Capacity'].astype(float) 
attendance_df['Average Stadiums Capacity'] = attendance_df['Average Stadiums Capacity'].astype(float)

In [79]:
attendance_df_bundes = attendance_df[attendance_df['League'].isin(['Bundesliga'])]
attendance_df_premier = attendance_df[attendance_df['League'].isin(['Premier League'])]
attendance_df_laliga = attendance_df[attendance_df['League'].isin(['La Liga'])]
attendance_df_ligue1 = attendance_df[attendance_df['League'].isin(['Ligue 1'])]
attendance_df_seriea = attendance_df[attendance_df['League'].isin(['Serie A'])]

In [82]:
attendance_df_bundes_pctch = attendance_df_bundes[['Total Stadiums Capacity', 'Average Stadiums Capacity', 'Total Attendance', 'Average Attendance', 'Average Capacity Ocupation']]
attendance_df_bundes_pctch = attendance_df_bundes_pctch.pct_change()
attendance_df_bundes_pctch.columns = ['TSC_%', 'ASC_%', 'TA_%', 'AA_%', 'ACO_%']
attendance_df_bundes_pctch = pd.merge(attendance_df_bundes, attendance_df_bundes_pctch, how='left', left_index=True, right_index=True)
attendance_df_bundes_pctch = attendance_df_bundes_pctch[['Season End Year', 'League', 'Nº Teams', 'Total Stadiums Capacity', 'TSC_%', 'Average Stadiums Capacity', 'ASC_%', 
                                                         'Total Attendance', 'TA_%', 'Average Attendance', 'AA_%', 'Average Capacity Ocupation', 'ACO_%']]
#attendance_df_bundes_pctch
attendance_df_bundes_pctch.to_csv(r'../your-project/data/tv_stadiums/merged_cleaned_data/attendance_df_bundes_pctch.csv')

In [83]:
attendance_df_premier_pctch = attendance_df_premier[['Total Stadiums Capacity', 'Average Stadiums Capacity', 'Total Attendance', 'Average Attendance', 'Average Capacity Ocupation']]
attendance_df_premier_pctch = attendance_df_premier_pctch.pct_change()
attendance_df_premier_pctch.columns = ['TSC_%', 'ASC_%', 'TA_%', 'AA_%', 'ACO_%']
attendance_df_premier_pctch = pd.merge(attendance_df_premier, attendance_df_premier_pctch, how='left', left_index=True, right_index=True)
attendance_df_premier_pctch = attendance_df_premier_pctch[['Season End Year', 'League', 'Nº Teams', 'Total Stadiums Capacity', 'TSC_%', 'Average Stadiums Capacity', 'ASC_%', 
                                                         'Total Attendance', 'TA_%', 'Average Attendance', 'AA_%', 'Average Capacity Ocupation', 'ACO_%']]
#attendance_df_premier_pctch
attendance_df_premier_pctch.to_csv(r'../your-project/data/tv_stadiums/merged_cleaned_data/attendance_df_premier_pctch.csv')

In [84]:
attendance_df_laliga_pctch = attendance_df_laliga[['Total Stadiums Capacity', 'Average Stadiums Capacity', 'Total Attendance', 'Average Attendance', 'Average Capacity Ocupation']]
attendance_df_laliga_pctch = attendance_df_laliga_pctch.pct_change()
attendance_df_laliga_pctch.columns = ['TSC_%', 'ASC_%', 'TA_%', 'AA_%', 'ACO_%']
attendance_df_laliga_pctch = pd.merge(attendance_df_laliga, attendance_df_laliga_pctch, how='left', left_index=True, right_index=True)
attendance_df_laliga_pctch = attendance_df_laliga_pctch[['Season End Year', 'League', 'Nº Teams', 'Total Stadiums Capacity', 'TSC_%', 'Average Stadiums Capacity', 'ASC_%', 
                                                         'Total Attendance', 'TA_%', 'Average Attendance', 'AA_%', 'Average Capacity Ocupation', 'ACO_%']]
#attendance_df_laliga_pctch
attendance_df_laliga_pctch.to_csv(r'../your-project/data/tv_stadiums/merged_cleaned_data/attendance_df_laliga_pctch.csv')

In [85]:
attendance_df_ligue1_pctch = attendance_df_ligue1[['Total Stadiums Capacity', 'Average Stadiums Capacity', 'Total Attendance', 'Average Attendance', 'Average Capacity Ocupation']]
attendance_df_ligue1_pctch = attendance_df_ligue1_pctch.pct_change()
attendance_df_ligue1_pctch.columns = ['TSC_%', 'ASC_%', 'TA_%', 'AA_%', 'ACO_%']
attendance_df_ligue1_pctch = pd.merge(attendance_df_ligue1, attendance_df_ligue1_pctch, how='left', left_index=True, right_index=True)
attendance_df_ligue1_pctch = attendance_df_ligue1_pctch[['Season End Year', 'League', 'Nº Teams', 'Total Stadiums Capacity', 'TSC_%', 'Average Stadiums Capacity', 'ASC_%', 
                                                         'Total Attendance', 'TA_%', 'Average Attendance', 'AA_%', 'Average Capacity Ocupation', 'ACO_%']]
#attendance_df_ligue1_pctch
attendance_df_ligue1_pctch.to_csv(r'../your-project/data/tv_stadiums/merged_cleaned_data/attendance_df_ligue1_pctch.csv')

In [86]:
attendance_df_seriea_pctch = attendance_df_seriea[['Total Stadiums Capacity', 'Average Stadiums Capacity', 'Total Attendance', 'Average Attendance', 'Average Capacity Ocupation']]
attendance_df_seriea_pctch = attendance_df_seriea_pctch.pct_change()
attendance_df_seriea_pctch.columns = ['TSC_%', 'ASC_%', 'TA_%', 'AA_%', 'ACO_%']
attendance_df_seriea_pctch = pd.merge(attendance_df_seriea, attendance_df_seriea_pctch, how='left', left_index=True, right_index=True)
attendance_df_seriea_pctch = attendance_df_seriea_pctch[['Season End Year', 'League', 'Nº Teams', 'Total Stadiums Capacity', 'TSC_%', 'Average Stadiums Capacity', 'ASC_%', 
                                                         'Total Attendance', 'TA_%', 'Average Attendance', 'AA_%', 'Average Capacity Ocupation', 'ACO_%']]
#attendance_df_seriea_pctch
attendance_df_seriea_pctch.to_csv(r'../your-project/data/tv_stadiums/merged_cleaned_data/attendance_df_seriea_pctch.csv')

In [92]:
path = r'/Users/arnauangerri/Desktop/IronHack/Week5/Project-Week-5-Your-Own-Project/your-project/data/tv_stadiums/merged_cleaned_data/'
all_files = glob.glob(path + "/*.csv")

stadiums = []

for filename in all_files:
    dataframes = pd.read_csv(filename, index_col=None, header=0)
    stadiums.append(dataframes)

stadiums_df = pd.concat(stadiums, axis=0, ignore_index=True)

stadiums_df = stadiums_df[['Season End Year', 'League', 'Nº Teams', 'Total Stadiums Capacity', 'TSC_%', 'Average Stadiums Capacity', 'ASC_%', 
                           'Total Attendance','TA_%', 'Average Attendance', 'AA_%', 'Average Capacity Ocupation', 'ACO_%']]
#stadiums_df.head(50)
#stadiums_df.to_csv(r'../your-project/data/tableau_dfs/stadiums_df.csv')