In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="ticks", color_codes=True)

In [None]:
df = pd.read_csv("/kaggle/input/the-most-expensive-football-transfers/football_players.csv")

In [None]:
df.head(2)

# Check if null values are present or not

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
# Description of transfer fees
df[['Fee(€ mln)']].describe().T

# Clubs ranked on basis of transfer revenues

In [None]:
ax = sns.catplot(x="From(Club)", y="Fee(€ mln)", kind="box", data=df , height=8, aspect=15/8)
plt.xticks(rotation=45)

# Total Transfers in a year

In [None]:
year_df = df.groupby('Year')['Origin'].count().reset_index().sort_values(by='Origin' , ascending = False)
year_df.columns = ['Year of transfer' , 'Total players']

In [None]:
plt.figure(figsize=(18,15))
p = sns.barplot(x='Year of transfer',y='Total players',data=year_df)
p.set( xlabel = "Year of Transfer", ylabel = "# Player transfer")
p.set_title("Total transfers in a year")

# Transfer fees by position (Average)

In [None]:
postional_df = df.groupby(['Position'])['Fee(€ mln)'].mean().reset_index().sort_values(by='Fee(€ mln)' , ascending = False)
pie, ax = plt.subplots(figsize=[10,6])
labels = postional_df['Position']
plt.pie(x=postional_df['Fee(€ mln)'], autopct="%.1f%%", explode=[0.025]*5, labels=labels, pctdistance=0.5)
plt.title("Transfer fee (breakdown by position)", fontsize=14);

# Maximum Transfer fee over various years

In [None]:
years = df.groupby('Year')['Fee(€ mln)'].max().reset_index().sort_values(by='Fee(€ mln)' , ascending = False)
plt.figure(figsize=(18,15))
p = sns.barplot(x='Year',y='Fee(€ mln)',data=years)
p.set( xlabel = "Year of Transfer", ylabel = "money")
p.set_title("Maximum Transfer amount by years")

# Top five countries with most expensive players

In [None]:
country = df.groupby('Origin', as_index=False,sort=True)['Fee(€ mln)'].sum()
country = country.sort_values('Fee(€ mln)', ascending=False)
country.head()

In [None]:
country = country[0:5]
plt.figure(figsize=(18,15))
p = sns.barplot(x='Origin',y='Fee(€ mln)',data=country)
p.set( xlabel = "Name of the Country", ylabel = "sum of money")
p.set_title("top 5 countries where the most expensive players come from")

# Club performance by transfers in and transfers out

In [None]:
transfer_out = df.groupby(['From(Club)' , 'Year'])['Fee(€ mln)'].sum().reset_index().sort_values(by='Fee(€ mln)' , ascending = False)
transfer_in = df.groupby(['To(Club)' , 'Year'])['Fee(€ mln)'].sum().reset_index().sort_values(by='Fee(€ mln)' , ascending = False)

## Transfers out of the club

In [None]:
sns.catplot(x="From(Club)", y="Fee(€ mln)", kind="bar", data=transfer_out, height=8, aspect=15/8)
plt.xticks(rotation=45)

## Transfers to the club

In [None]:
sns.catplot(x="To(Club)", y="Fee(€ mln)", kind="bar", data=transfer_in, height=8, aspect=15/8)
plt.xticks(rotation=45)

# Profits of clubs based on Transfers

In [None]:
balance_df = pd.DataFrame(columns = ['clubs','transfer_in' , 'transfer_out' , 'balance'])

In [None]:
club_list = []
for each in list(df['From(Club)'].unique()):
    club_list.append(each)
for each in list(df['To(Club)'].unique()):
    club_list.append(each)
clubs = list(set(club_list))

In [None]:
balance_df['clubs'] = clubs
for i  in range(balance_df.shape[0]):
    balance_df.loc[i , 'transfer_out']  = df[df['From(Club)']==clubs[i]]['Fee(€ mln)'].sum()
    balance_df.loc[i , 'transfer_in']  = df[df['To(Club)']==clubs[i]]['Fee(€ mln)'].sum()
balance_df['balance'] = balance_df['transfer_out'] - balance_df['transfer_in']
balance_df.sort_values(by = 'balance' , ascending = False, inplace = True)

## Top five clubs with profitable transfers

In [None]:
ftbl3 = balance_df[0:5]
plt.figure(figsize=(18,15))
sns.set_style('darkgrid')
p = sns.barplot(x='clubs',y='balance',data=ftbl3)
p.set( xlabel = "Clubs", ylabel = "sum of money")
p.set_title("top 5 clubs which are profitable in transfers")

## Top five clubs which has spent highest on buying players

In [None]:
ftbl3 = balance_df.tail()
ftbl3['balance'] = ftbl3['balance'].abs()
plt.figure(figsize=(18,15))
sns.set_style('darkgrid')
p = sns.barplot(x='clubs',y='balance',data=ftbl3)
p.set( xlabel = "Clubs", ylabel = "sum of money")
p.set_title("top 5 clubs highest spenders")