## importing necessary libraries

In [None]:
print("Hi")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## reading csv and manipulating dataframes

In [None]:
df = pd.read_csv('t20_batting_source.csv',usecols=['Player','Runs','4s','6s','Start_date'],parse_dates=['Start_date'])

nplayer = [x.split(" (")[0] for x in df["Player"]]
country = [ x.split("(")[1].split(")")[0]  for x in df["Player"]]
df["Player"] = nplayer
df["Country"] = country

df['Runs'] = df['Runs'].str.replace('*','')

df1 = df.set_index('Runs')
df2 = df1.drop(['DNB','TDNB','absent','sub']).reset_index()
cdf = df2.set_index('Country')
mdf = cdf.drop(['ICC','1','2','3']).reset_index()

mdf.to_csv('t20_modified.csv',index=False)

ndf = pd.read_csv('t20_modified.csv',dtype={'Runs':int,'4s':int,'6s':int})

ndf['50s'] = ndf['Runs'].between(50,99).replace({True:1,False:0})
ndf.head()

ddf = pd.DataFrame([['0AA', 'INDIA','1971-01', 0],
                    ['1AA', 'PAK', '1971-01', 0],
                    ['2AA', 'AUS',  '1971-01', 0],
                    ['3AA', 'SL', '1971-01', 0],
                    ['4AA', 'WI', '1971-01', 0],
                    ['5AA', 'ENG', '1971-01', 0],
                    ['6AA', 'SA', '1971-01', 0],
                    ['7AA', 'NZ', '1971-01', 0],
                   ['8AA', 'BDESH', '1971-01', 0],
                    ['9AA', 'AFG', '1971-01', 0]],
                  columns=['Player', 'Country', 'Start_date', 'Runs'])

adf = pd.concat([ddf,ndf],ignore_index=True,sort=False)

fdf = pd.read_csv('flags.csv')

bigdata = pd.merge(adf,fdf, on = "Country")
bigdata['Start_date'] = pd.to_datetime(bigdata['Start_date'])
bigdata.fillna(value=0,inplace = True)
bigdata.head()

summary = bigdata.groupby(['Player','Country'])['Runs','4s','6s','50s'].sum().sort_values('Runs',ascending=False)
summary.reset_index(inplace=True)
summary.head(10)

## Visualizations

In [None]:
sns.set_style('dark')

def plot_fig(col):
    plt.figure(figsize=(10,5))
    df = summary.sort_values(col,ascending=False).head(15)
    sns.barplot(data=df,y='Player',x=col,palette='muted')
    plt.title('Top 15 cricketers with highest ' + col + ' in T20s')

In [None]:
for col in ['Runs','4s','6s','50s']:
    plot_fig(col)

## Generating batting records (CSV files)

In [None]:
def final_csv(col):
    pivot_df = bigdata.pivot_table( index=["Player", "Country","Flags"], columns="Start_date", values = col)
    pivot_df.fillna(value=0,inplace = True)
    cum_df = pivot_df.cumsum(axis =1)
    cum_df.columns = cum_df.columns.year
    cum_df.to_csv('final_t20_'+col+'.csv')

runs = final_csv('Runs')

fours = final_csv('4s')

sixes = final_csv('6s')

fifties = final_csv('50s')