In [None]:
import pandas as pd
from source.classes import FPL_data
import itertools
import matplotlib.pyplot as plt
from sklearn.feature_selection import r_regression, f_regression, mutual_info_regression

In [None]:
gg = FPL_data()

In [None]:
# test SVC classifier
# test different SVC kernels
fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True)
x = 0
y = 0

for k in ['rbf', 'linear', 'poly']: #, 'sigmoid']:
    gg.build_svc(type_scaler = 'range', kernel_type = k, feature_selection = None)
    print('SVC ', k)
    gg.predict('svc')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['encoded'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['svc'], color = 'r')
    axes[y].set_title(f'SVC - {k}')
    y += 1

In [None]:
# test KNN classifier
fig, axes = plt.subplots(nrows=1, ncols=8, sharey=True)
y = 0

for w, n in itertools.product(['uniform','distance'], [5, 10, 15, 20]):
    gg.build_knn_clf(type_scaler = 'range', weights = w, n_neighbors = n)
    print('KNN_CLF', w, n)
    gg.predict('knn_clf')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['encoded'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['knn_clf'], color = 'r')
    axes[y].set_title(f'KNN CLF - {w}, {n}')
    y += 1

In [None]:
gg.build_knn_clf(type_scaler = 'range', weights = w)

In [None]:
# test different decision trees classifier
fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True)
x = 0
y = 0
for c in ['gini', 'log_loss', 'entropy']:
    gg.build_dtc(type_scaler = 'range', criterion = c)
    print('DTC', c)
    gg.predict('dtc')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['encoded'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['dtc'], color = 'r')
    axes[y].set_title(f'DTC - {c}')
    y += 1

In [None]:
# test different mlp - classifier
fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True)
x = 0
y = 0
for fs in [r_regression, f_regression, mutual_info_regression]:
    gg.build_mlp_clf(type_scaler = 'range', feature_selection = fs)
    print('MLP - CLF', fs)
    gg.predict('mlp_clf')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['encoded'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['mlp_clf'], color = 'r')
    axes[y].set_title(f'MLP-CLF - {fs}')
    y += 1

In [None]:
xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
plt.scatter(xx, gg.data_dict['y_test']['encoded'])
plt.scatter(xx,gg.data_dict['y_predicted']['svc'], color = 'r', marker = '.')

In [None]:
# test different K Neighbors
fig, axes = plt.subplots(nrows=1, ncols=8, sharey=True)
y = 0

for w, n in itertools.product(['uniform','distance'], [5, 10, 15, 20]):
    gg.build_knn(type_scaler = 'range', weights = w)
    print('KNN', w, n)
    gg.predict('knn')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['raw'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['knn'], color = 'r')
    axes[y].set_title(f'KNN - {w}, {n}')
    y += 1

In [None]:
gg.knn_regression.score(gg.data_dict['X_test']['reduced'], gg.data_dict['y_test']['raw'])

In [None]:
# test different SVR kernels
fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True)
x = 0
y = 0

for k in ['rbf', 'linear', 'poly']: #, 'sigmoid']:
    gg.build_svr(type_scaler = 'range', kernel_type = k, feature_selection = r_regression)
    print('SVR ', k)
    gg.predict('svr')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['raw'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['svr'], color = 'r')
    axes[y].set_title(f'SVR - {k}')
    y += 1

In [None]:
# test different SVR kernels
fig, axes = plt.subplots(nrows=1, ncols=8, sharey=True)
x = 0
y = 0

for k in ['rbf', 'linear', 'poly']: #, 'sigmoid']:
    gg.build_svr(type_scaler = 'range', kernel_type = k, feature_selection = r_regression)
    print('SVR ', k)
    gg.predict('svr')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['raw'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['svr'], color = 'r')
    axes[y].set_title(f'SVR - {k}')
    y += 1
    
    
# test different K Neighbors
for w, n in itertools.product(['uniform','distance'], [10]):
    gg.build_knn(type_scaler = 'range', weights = w)
    print('KNN', w, n)
    gg.predict('knn')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['raw'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['knn'], color = 'r')
    axes[y].set_title(f'KNN - {w}, {n}')
    y += 1
    
# test different decision trees
for c in ['squared_error', 'friedman_mse', 'absolute_error']:
    gg.build_dtr(type_scaler = 'range', criterion = c)
    print('DTR', c)
    gg.predict('dtr')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['raw'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['dtr'], color = 'r')
    axes[y].set_title(f'DTR - {c}')
    y += 1

In [None]:
# test different decision trees
fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True)
x = 0
y = 0
for c in ['squared_error', 'friedman_mse', 'absolute_error']:
    gg.build_dtr(type_scaler = 'range', criterion = c)
    print('DTR', c)
    gg.predict('dtr')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['raw'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['dtr'], color = 'r')
    axes[y].set_title(f'DTR - {c}')
    y += 1

In [None]:
# test different mlp
fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True)
x = 0
y = 0
for fs in [r_regression, f_regression, mutual_info_regression]:
    gg.build_mlp(type_scaler = 'range', feature_selection = fs)
    print('MLP', fs)
    gg.predict('mlp')
    xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
    axes[y].scatter(xx, gg.data_dict['y_test']['raw'])
    axes[y].scatter(xx, gg.data_dict['y_predicted']['mlp'], color = 'r')
    axes[y].set_title(f'MLP - {fs}')
    y += 1

In [None]:
xx = range(1,len(gg.data_dict['y_test']['raw'])+1)
plt.scatter(xx, gg.data_dict['y_test']['raw'])
plt.scatter(xx, gg.data_dict['y_predicted']['dtr'], color = 'r')

In [None]:
from source.ML_analysis import prepare_model_input

# add position to input data
gg.input_data['position'] = [gg.player_dict[i]['element_type'] for i in gg.input_data['element']]

gg.forwards = gg.input_data[gg.input_data['position'] == 4].reset_index(drop = True)
gg.forward_ML = prepare_model_input(gg.forwards)

gg.keepers = gg.input_data[gg.input_data['position'] == 1].reset_index(drop = True)
gg.keeper_ML = prepare_model_input(gg.keepers)

gg.defenders = gg.input_data[gg.input_data['position'] == 2].reset_index(drop = True)
gg.defender_ML = prepare_model_input(gg.defenders)

gg.mids = gg.input_data[gg.input_data['position'] == 3].reset_index(drop = True)
gg.mid_ML = prepare_model_input(gg.mids)

In [None]:
from source.ML_analysis import build_mlp
# build_mlp(input_df, predicted_value = 'total_points', scaler_type = 'standard', max_iter = 500, feature_selection = r_regression)
# outputs = regr, scaler, feature_selector, d_type, data
# test different mlp - with forwards only
fig, axes = plt.subplots(nrows=1, ncols=4, sharey=True)
y = 0
for pos in [gg.keeper_ML, gg.defender_ML, gg.mid_ML, gg.forward_ML]:
    regr, scaler, feature_selector, d_type, data = build_mlp(pos, max_iter = 1000, scaler_type = 'range', feature_selection = f_regression)
    print(regr.score(data['X_test'][d_type], data['y_test']['raw']))
    y_pred = regr.predict(data['X_test'][d_type])
    xx = range(1,len(data['y_test']['raw'])+1)
    axes[y].scatter(xx, data['y_test']['raw'])
    axes[y].scatter(xx, y_pred, color = 'r')
    #axes[y].set_title(f'MLP - {fs}')
    y += 1

In [None]:
regr.score(data['X_test'][d_type], data['y_test']['raw'])
y_pred = regr.predict(data['X_test'][d_type])

In [None]:
xx = range(1,len(data['y_test']['raw'])+1)
plt.scatter(xx, data['y_test']['raw'])
plt.scatter(xx, y_pred, color = 'r')

In [None]:
y_scaler = preprocessing.MinMaxScaler()
y_scaler.fit(gg.y_train)

In [None]:
from sklearn.feature_selection import r_regression, f_regression, mutual_info_regression

In [None]:
int(6.6)

In [None]:
int(6.2)