# Stage 1 : Data Exploration

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import numpy as np 
import seaborn as sns 

In [None]:
df= pd.read_csv('../input/top-250-football-transfers-from-2000-to-2018/top250-00-19.csv')
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
df.corr()

# Stage 2: Data Manipulation

In [None]:
Defender=['Centre-Back','Right-Back','Left-Back','Defender']
Forward = ['Right Winger','Centre-Forward','Left Winger','Forward','Sweeper','Second Striker']
Midfielder = ['Central Midfield', 'Attacking Midfield', 'Defensive Midfield', 'Left Midfield','Right Midfield', 'Midfielder']
conditions = [
    df['Position'].isin(Defender),
    df['Position'].isin(Forward),
    df['Position'].isin(Midfielder),
    df['Position']=='Goalkeeper']
 
values = ['Defender', 'Forward','Midfielder','Goalkeeper']

df['Position'] = np.select(conditions, values)

In [None]:
df.head()

In [None]:
top5 = ['Premier League', 'Serie A', 'LaLiga', 'Ligue 1', '1.Bundesliga']
df_top5 = df.loc[ (df['League_to'].isin(top5))]

# Stage 3: Exploratory Data Analysis (EDA)

## A. General Stats

### The Most Requested Position in Top 5 Leagues

In [None]:
df_top5['Position'].value_counts().sort_values(ascending=False)

In [None]:
sns.countplot(x='Position',data=df_top5,order=df_top5['Position'].value_counts().sort_values(ascending=False).index)
#plt.xticks(rotation=90,fontsize='12')
plt.title('The Most Requested Position in Top 5 Leagues',ha='center',fontsize=15,style='oblique')
plt.show()

In [None]:
df_top5['Position'].value_counts().plot(kind='pie',figsize=(20,10),autopct = '%1.1f%%',shadow = True)
plt.ylabel('')
plt.title('The Positions Requested in Top 5 Leagues ( % ) ',ha='center',fontsize=15,style='oblique')
plt.show()

In [None]:
sns.countplot(x='Position',hue='League_to',data=df_top5)
plt.title('Players Positions in Top 5 Leagues',fontsize=15,style='oblique')
plt.legend(title='League')
plt.show()

### How many transfers are made each season ?

In [None]:
df_top5.groupby(['Season'])['Season'].count()

In [None]:
sns.countplot(x='Season',data=df_top5)
plt.xticks(rotation=90)
plt.title('Transfers For Every Season in Top 5 Leagues (2000-2018)',fontsize=15,style='oblique')
plt.show()

In [None]:
df_top5.groupby(['League_to','Season'])['Season'].count()

In [None]:
sns.countplot(x='Season',hue='League_to',data=df_top5)
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(rotation=90)
plt.ylabel('')
plt.xlabel('')
plt.title('Transfers For Every Season in Top 5 Leagues (2000-2018) ',fontsize=15,style='oblique')
plt.legend(title='League', bbox_to_anchor=(1,1), loc='upper left')
plt.show()

### Which League Pays The Most Money in Transfer Market? 

In [None]:
League_buy = df_top5.groupby(['League_to'])['Transfer_fee'].sum().sort_values(ascending=False)
League_buy

In [None]:
League_buy = df_top5.groupby(['League_to'])['Transfer_fee'].sum().sort_values(ascending=False)
League_buy.plot(kind='bar',lw=2,edgecolor='black',width=0.2)
plt.xlabel('League',fontsize=12)
yticks = list(np.arange(0,18000000000,2000000000))
plt.yticks(ticks=yticks, labels =[f"{round(int(y)/1000000000)} Billion $ " if y > 0 else "0" for y in yticks] )
plt.title('Which League Pays The Most Money in Transfer Market? ',fontsize=15)
plt.show()

In [None]:
League_buy.plot(kind='pie',figsize=(20,10),autopct = '%1.1f%%',shadow = True)
plt.title('Which League Pays The Most Money in Transfer Market? ( % ) ',fontsize=15)
plt.ylabel('')
plt.show()

### Which League Gains The Most Money From Selling Players? 

In [None]:
League_sell = df_top5.groupby(['League_from'])['Transfer_fee'].sum().sort_values(ascending=False)
League_sell

In [None]:
League_sell.head().plot(kind='bar',lw=2,edgecolor='black',width=0.2)
yticks = list(np.arange(0,9000000000,1000000000))
plt.xlabel('League')
plt.title('Which League Gains The Most Money From Selling Players? ',fontsize = 15)
plt.yticks(ticks=yticks, labels =[f"{round(int(y)/1000000000)} Billion $ " if y > 0 else "0" for y in yticks] )
plt.show()

###  Do Top 5 leagues make profits or losses from buying and selling players?

In [None]:
profits = League_sell - League_buy
profits.sort_values(inplace=True)
profits.head()/1000000000

In [None]:
sns.barplot(x=profits.head().index,y=profits.head().values,data=profits.head())
plt.xticks(ticks=np.arange(0,5,1),labels=profits.head().index)
plt.title('Do Top 5 leagues make profits or losses from buying and selling players?',style='oblique',fontsize=15)
plt.xlabel('League')
plt.show()

###  Which League made the most transfers in? 

In [None]:
df_top5['League_to'].value_counts()

In [None]:
sns.countplot(x='League_to',data=df_top5,order=df_top5['League_to'].value_counts().index)
plt.xticks(rotation=90)
plt.xlabel('League')
plt.title('Which League Had the Most Transfers In?',fontsize=15,style='oblique')
plt.show()

In [None]:
df_top5['League_to'].value_counts().plot(kind='pie',figsize=(12.5,20),autopct = '%1.1f%%',shadow = True)
plt.title('Which League Had The Most Transfers In? ( % )',fontsize=15)
plt.ylabel('')
plt.show()

### The most number of transfers between leagues

In [None]:
df_top5.groupby(['League_to','League_from'])['League_to'].count().sort_values(ascending=False).head()

## B. Team Stats

### The teams with the largest transfer fee

In [None]:
top_buyers=df_top5.groupby(['Team_to','League_to'])['Transfer_fee'].sum().sort_values(ascending=False)
top_buyers.head(10)

In [None]:
(top_buyers.head(10)/1000000000).plot(kind='bar',lw=2,edgecolor='black')
plt.xlabel(' ')
plt.ylabel('Billion $')
plt.show()

### The teams with the most gained money

In [None]:
top_sellers= df_top5.groupby(['Team_from','League_from'])['Transfer_fee'].sum().sort_values(ascending=False)
top_sellers.head(10)

In [None]:
top_sellers= df_top5.groupby(['Team_from','League_from'])['Transfer_fee'].sum().sort_values(ascending=False)
top_sellers.head(10).plot(kind='bar',lw=2,edgecolor='black')
yticks = list(np.arange(0,1000000000,100000000))
plt.yticks(ticks=yticks, labels =[f"{round(int(y)/1000000)} Million $ " if y > 0 else "0" for y in yticks] )
plt.title('Which Team Gained The Most Money From Selling His Players? ',fontsize=15,style='oblique')
plt.xlabel(' ')
plt.show()

### The team with the most transfers in the top 5 Leagues

In [None]:
df_top5['Team_to'].value_counts()[0:10]

In [None]:
sns.countplot(x="Team_to", data=df_top5,order=df_top5['Team_to'].value_counts()[0:75].index)
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(rotation=90,fontsize='12')
plt.title('Which Team Made The Most Number of Transfers In? ',fontsize=15)
plt.show()

### The team with the most transfers in 1.Bundesliga

In [None]:
v=df.loc[df.League_to=='1.Bundesliga']
sns.countplot(x="Team_to", data=v,order=v['Team_to'].value_counts().index)
plt.xlabel(' ')
plt.xticks(rotation=90,fontsize='12')
plt.title('1.Bundesliga Transfers in (2000-2018)',fontsize=15,style='oblique')
plt.show()

### The team with the most transfers in LaLiga

In [None]:
v=df.loc[df.League_to=='LaLiga']
sns.countplot(x="Team_to", data=v,order=v['Team_to'].value_counts()[0:28].index)
sns.set(rc={'figure.figsize':(20,5)})
plt.xlabel(' ')
plt.xticks(rotation=90,fontsize='12')
plt.title('LaLiga Transfers (2000-2018)',fontsize=15,style='oblique')
plt.show()

### The team with the most transfers in Ligue 1

In [None]:
v=df.loc[df.League_to=='Ligue 1']
sns.countplot(x="Team_to", data=v,order=v['Team_to'].value_counts().index)
sns.set(rc={'figure.figsize':(20,5)})
plt.xlabel(' ')
plt.xticks(rotation=90,fontsize='12')
plt.title('Ligue 1 Transfers (2000-2018)',fontsize=15,style='oblique')
plt.show()

### The team with the most transfers in Premier League

In [None]:
v=df.loc[df.League_to=='Premier League']
sns.color_palette("rocket", as_cmap=True)
sns.countplot(x="Team_to", data=v,order=v['Team_to'].value_counts().index)
sns.set(rc={'figure.figsize':(20,5)})
plt.xlabel(' ')
plt.xticks(rotation=90,fontsize='12')
plt.title('Premier League Transfers (2000-2018)',fontsize=15,style='oblique')
plt.show()

### The team with the most transfers in Serie A

In [None]:
v=df.loc[df.League_to=='Serie A']
sns.countplot(x="Team_to", data=v,order=v['Team_to'].value_counts().index)
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(rotation=90,fontsize='12')
plt.xlabel(' ')
plt.title('Serie A Transfers (2000-2018)',fontsize=15,style='oblique')
plt.show()

## C. Players stats

### Players' Age distribution

In [None]:
sns.countplot(x='Age',data=df_top5)
plt.title('Players Age Distribution',fontsize=15,style='oblique')
plt.show()

### Age VS Transfer fee stats

In [None]:
df_top5.groupby(['Age'])['Transfer_fee'].agg(['count','min','max'])

### Age VS League Stats

In [None]:
df_top5.groupby(['League_to'])['Age'].agg(['min','max','mean'])

### Age VS Position Stats

In [None]:
df_top5.groupby(['Position'])['Age'].agg(['min','max','mean'])

### Position VS Transfer fee Stats

In [None]:
df_top5.groupby(['Position'])['Transfer_fee'].agg(['min','max','mean'])

### Top 5 Leagues - The Youngest Player

In [None]:
df_top5.loc[df_top5['Age'].idxmin()]

### Top 5 Leagues - The Oldest Player

In [None]:
df_top5.loc[df_top5['Age'].idxmax()]

### Top 5 Leagues - The Highest Transfer fee

In [None]:
df_top5.loc[df_top5['Transfer_fee'].idxmax()]

### Top 5 Leagues - The Highest Market Value

In [None]:
df_top5.loc[df_top5['Market_value'].idxmax()]

### Top 10 Players ( Transfer Fee )

In [None]:
df_top5.sort_values('Transfer_fee',ascending=False)[0:10].reset_index(drop=True)

### Top 10 Players ( Market Value )

In [None]:
df_top5.sort_values('Market_value',ascending=False)[0:10].reset_index(drop=True)

### LaLiga - Top 10 Players (Transfer Fee)

In [None]:
spain = df_top5.loc[df_top5.League_to=='LaLiga'].sort_values('Transfer_fee',ascending=False).reset_index(drop=True)
spain.head(10)

In [None]:
spain.head(10)['Team_to'].value_counts()

### Ligue 1 - Top 10 Players (Transfer Fee)

In [None]:
france = df_top5.loc[df_top5.League_to=='Ligue 1'].sort_values('Transfer_fee',ascending=False).reset_index(drop=True)
france.head(10)

In [None]:
france.head(10)['Team_to'].value_counts()

### 1.Bundesliga - Top 10 Players (Transfer Fee)

In [None]:
Germany = df_top5.loc[df_top5.League_to=='1.Bundesliga'].sort_values('Transfer_fee',ascending=False).reset_index(drop=True)
Germany.head(10)

In [None]:
Germany.head(10)['Team_to'].value_counts()

### Serie A - Top 10 Players (Transfer Fee)

In [None]:
Italy = df_top5.loc[df_top5.League_to=='Serie A'].sort_values('Transfer_fee',ascending=False).reset_index(drop=True)
Italy.head(10)

In [None]:
Italy.head(10)['Team_to'].value_counts()

### Preimer League - Top 10 Players (Transfer Fee)

In [None]:
England = df_top5.loc[df_top5.League_to=='Premier League'].sort_values('Transfer_fee',ascending=False).reset_index(drop=True)
England.head(10)

In [None]:
England.head(10)['Team_to'].value_counts()

# footballl transfers between 2000 - 2018 [EDE] 