In [None]:
#STANDARD IMPORT PACKAGES

import pandas as pd
import numpy as np
import seaborn as sns
import scipy as sp
import sklearn

from matplotlib import pyplot as plt
from scipy.stats import norm, skew
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import neighbors
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import neural_network
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
import datetime
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import confusion_matrix
pd.set_option('display.max_columns', 500)
import warnings
warnings.filterwarnings('ignore')

In [None]:
dataset = pd.read_csv('../input/epldata_final.csv')
dataset.head()

In [None]:
dataset = dataset.drop(['position_cat','page_views','fpl_value','fpl_sel','fpl_points','region','new_foreign','club_id'],axis = 1)
dataset.shape

In [None]:
#dataset['club'].value_counts()
man_city = dataset[dataset['club'] == "Manchester+City"]

In [None]:
man_city.head()

In [None]:
man_city['England_Flag'] =  np.where(man_city['nationality']=='England', 'England', 'Overseas')

In [None]:
#Graph 1 - Getting the colors
plot_df = man_city.groupby(['position','England_Flag']).agg({'market_value':'sum'}).unstack()

In [None]:
#Graph 2 - Getting the counts
man_city['Rank'] = man_city.sort_values(['England_Flag','market_value'], ascending=[True,False]) \
             .groupby(['position']) \
             .cumcount() + 1
plot_df2 = man_city.groupby(['position','Rank']).agg({'market_value':'sum'}).unstack()

In [None]:
#Graph 3 - Superimposing the two graphs
fig, ax = plt.subplots()
ax2 = ax.twiny()
plot_df.plot(kind='bar',width = 1, stacked=True,ax=ax2,alpha = 0.5)
plot_df2.plot(kind='bar',width = 1,stacked=True, edgecolor='black',legend = False, color = 'white',ax=ax)
plt.show()  

In [None]:
#Graph 4 - Getting the colors
plot_df3 = man_city.groupby(['position','age_cat']).agg({'market_value':'sum'}).unstack()

In [None]:
#Graph 5 - Getting the counts
man_city['Rank'] = man_city.sort_values(['age_cat','market_value'], ascending=[True,False]) \
             .groupby(['position']) \
             .cumcount() + 1
plot_df4 = man_city.groupby(['position','Rank']).agg({'market_value':'sum'}).unstack()

In [None]:
#Correcting the positions
def position_groups(series):
    if series == 'SS':
        return 'CF'
    elif series == 'LM':
        return 'LW'
    elif series == 'RM':
        return 'RW'
    else:
        return series

dataset['position'] = dataset['position'].apply(position_groups)

In [None]:
#Graph 6 - Getting the market price per position from the top 6 teams
top_6 = dataset[dataset['big_club'] == 1]
top_6 = top_6.groupby(['position','club']).agg({'market_value':'sum'})
top_6 = top_6.groupby(['position']).agg({'market_value':'mean'})
top_6.reset_index(level=0, inplace=True)

In [None]:
#Graph 7 - Superimposing the three graphs
col = sns.color_palette("YlOrRd")
fig, ax = plt.subplots()
ax2 = ax.twiny()
plot_df3.plot(kind='bar',width = 1, stacked=True,ax=ax2,alpha = 0.5,color = col)
plot_df4.plot(kind='bar',width = 1,stacked=True, edgecolor='black',legend = False, color = 'white',ax=ax)
plt.scatter(top_6['position'],top_6['market_value'],color = 'darkblue')
plt.legend(loc=1, prop={'size': 5})

In [None]:
#ATTACKING MIDFIELDER:
man_city_am = man_city[man_city['position'] == "AM"]
man_city_am

In [None]:
dataset_am = dataset[(dataset['position'] == "AM") & (dataset['age'] < 31) & (dataset['market_value'] < 30)
                    & (dataset['new_signing'] == 0)]
dataset_am

In [None]:
#CENTER BACK:
man_city_cb = man_city[man_city['position'] == "CB"]
man_city_cb

In [None]:
dataset_cb = dataset[(dataset['position'] == "CB") & (dataset['age'] < 29) & (dataset['market_value'] > 20)
                    & (dataset['new_signing'] == 0)]
dataset_cb

In [None]:
#CENTER FORWARD:
man_city_cf = man_city[man_city['position'] == "CF"]
man_city_cf

In [None]:
#CENTER MIDFIELDER
man_city_cm = man_city[man_city['position'] == "CM"]
man_city_cm

In [None]:
dataset_cm = dataset[(dataset['position'] == "CM") & (dataset['age'] < 30) & (dataset['market_value'] > 20)
                    & (dataset['new_signing'] == 0)]
dataset_cm

In [None]:
#DEFENSIVE MIDFIELDER
man_city_dm = man_city[man_city['position'] == "DM"]
man_city_dm

In [None]:
dataset_dm = dataset[(dataset['position'] == "DM") & (dataset['age'] < 30) & (dataset['market_value'] > 10)
                    & (dataset['new_signing'] == 0)]
dataset_dm

In [None]:
#GOALKEEPERS
man_city_gk = man_city[man_city['position'] == "GK"]
man_city_gk

In [None]:
#LEFT BACK
man_city_lb = man_city[man_city['position'] == "LB"]
man_city_lb

In [None]:
dataset_lb = dataset[(dataset['position'] == "LB") & (dataset['age'] < 30)
                    & (dataset['new_signing'] == 0)]
dataset_lb

In [None]:
#LEFT WING
man_city_lw = man_city[man_city['position'] == "LW"]
man_city_lw

In [None]:
#Let's see if we can get greedy and go for one good pick and one backup pick
dataset_lw = dataset[(dataset['position'] == "LW") & (dataset['age'] < 30) & (dataset['market_value'] > 15)
                    & (dataset['new_signing'] == 0)]
dataset_lw

In [None]:
dataset_marquee = dataset[(dataset['age'] < 30) & (dataset['market_value'] > 40)
                    & (dataset['new_signing'] == 0)]
dataset_marquee

In [None]:
#RIGHT BACK
man_city_rb = man_city[man_city['position'] == "RB"]
man_city_rb

In [None]:
dataset_rb = dataset[(dataset['position'] == "RB") & (dataset['age'] < 25) & (dataset['market_value'] < 20)
                    & (dataset['new_signing'] == 0)]
dataset_rb

In [None]:
#RIGHT WINGER
man_city_rw = man_city[man_city['position'] == "RW"]
man_city_rw

In [None]:
dataset_rw = dataset[(dataset['position'] == "RW") & (dataset['age'] < 30) & (dataset['market_value'] >= 40)
                    & (dataset['new_signing'] == 0)]
dataset_rw

In [None]:
dataset_rw = dataset[(dataset['position'] == "RW") & (dataset['age'] < 25) & (dataset['market_value'] < 20)
                    & (dataset['new_signing'] == 0)]
dataset_rw

In [None]:
#MAN CITY TEAM VALUE BEFORE TRANSFERS
team_value = man_city['market_value'].sum()
team_value

In [None]:
#TRANSFERS DRAFT 1
man_city_new = man_city[~man_city.name.isin(['David Silva','Nicolas Otamendi',
                                             'Vincent Kompany','Yaya Toure','Kelechi Iheanacho',
                                            'Fernandinho','Fernando',
                                            'Aleksandar Kolarov'])]

In [None]:
dataset_man_city = dataset[dataset.name.isin(['Manuel Lanzini','Virgil van Dijk','Eric Dier','Trent Alexander-Arnold',
                                             'Jordon Ibe','Ben Davies','Ben Chilwell',
                                             'Giannelli Imbula'])]

In [None]:
team_value_new = man_city_new['market_value'].sum() + dataset_man_city['market_value'].sum()
team_value_new

In [None]:
man_city_2 = man_city_new.append(dataset_man_city)
man_city_2 = man_city_2.drop(['England_Flag','Rank'],axis = 1)
man_city_2

In [None]:
#Graph 8 - Getting the colors
plot_df5 = man_city_2.groupby(['position','age_cat']).agg({'market_value':'sum'}).unstack()

In [None]:
#Graph 9 - Getting the counts
man_city_2['Rank'] = man_city_2.sort_values(['age_cat','market_value'], ascending=[True,False]) \
             .groupby(['position']) \
             .cumcount() + 1
plot_df6 = man_city_2.groupby(['position','Rank']).agg({'market_value':'sum'}).unstack()

In [None]:
#Graph 10 - Superimposing the three graphs
col = sns.color_palette("YlOrRd")
fig, ax = plt.subplots()
ax2 = ax.twiny()
plot_df5.plot(kind='bar',width = 1, stacked=True,ax=ax2,alpha = 0.5,color = col)
plot_df6.plot(kind='bar',width = 1,stacked=True, edgecolor='black',legend = False, color = 'white',ax=ax)
plt.scatter(top_6['position'],top_6['market_value'],color = 'darkblue')
plt.legend(loc=1, prop={'size': 5})

In [None]:
dataset_cb = dataset[(dataset['position'] == "CB") & (dataset['age'] < 25) & (dataset['market_value'] < 6)
                    & (dataset['new_signing'] == 0)]
dataset_cb

In [None]:
#TRANSFERS DRAFT 2
man_city_new_2 = man_city_2[~man_city_2.name.isin(['Ben Chilwell','Fabian Delph','Raheem Sterling','Eric Dier'])]
dataset_man_city_2 = dataset[dataset.name.isin(['Luke Shaw','Aaron Ramsey','Son Heung-min','Granit Xhaka','Mason Holgate','Jan Bednarek'])]

In [None]:
team_value_new = man_city_new_2['market_value'].sum() + dataset_man_city_2['market_value'].sum()
team_value_new

In [None]:
man_city_3 = man_city_new_2.append(dataset_man_city_2)
man_city_3 = man_city_3.drop(['Rank'],axis = 1)
man_city_3

In [None]:
#Graph 11 - Getting the colors
plot_df7 = man_city_3.groupby(['position','age_cat']).agg({'market_value':'sum'}).unstack()

In [None]:
#Graph 12 - Getting the counts
man_city_3['Rank'] = man_city_3.sort_values(['age_cat','market_value'], ascending=[True,False]) \
             .groupby(['position']) \
             .cumcount() + 1
plot_df8 = man_city_3.groupby(['position','Rank']).agg({'market_value':'sum'}).unstack()

In [None]:
#Graph 13 - Superimposing the three graphs
col = sns.color_palette("YlOrRd")
fig, ax = plt.subplots()
ax2 = ax.twiny()
plot_df7.plot(kind='bar',width = 1, stacked=True,ax=ax2,alpha = 0.5,color = col)
plot_df8.plot(kind='bar',width = 1,stacked=True, edgecolor='black',legend = False, color = 'white',ax=ax)
plt.scatter(top_6['position'],top_6['market_value'],color = 'darkblue')
plt.legend(loc=1, prop={'size': 5})