In [None]:
import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from GA.utils import get_avg_fitness, entropy
from GA.algorithm import run_ga
from metrics.lin_reg import get_fitness, get_columns
from visualizers.plotters import plot_evolution

In [None]:
results_path = 'data/results_dict.pickle'
features_path = 'data/feature_sets.pickle'
targets_path = 'data/target_sets.pickle'

In [None]:
with open(results_path, 'rb') as file:
    results = pickle.load(file)

with open(features_path, 'rb') as file:
    feature_sets = pickle.load(file)

with open(targets_path, 'rb') as file:
    target_sets = pickle.load(file)


In [None]:
for key in results.keys():
    print('\n', key)
    display(pd.DataFrame(results[key]).drop('final_bitstring'))

In [None]:
results

In [None]:
features = feature_sets['two_season_soccermix_history']
targets = target_sets['player_season_obv_90_next_season']

In [None]:
columns_included = []
for key in results:
    bitstring = results[key]['final_bitstring']
    columns = [int(b) for b in bitstring]
    columns_included.append(columns)

columns_included = np.array(columns_included)
res = np.sum(columns_included, axis=0)

In [None]:
not_used = features.iloc[:, col_indexes_not_used]
not_used.columns

In [None]:
# _bitstring = ''.join(['1' if bit > 1 else '0' for bit in res])
# _bitstring = final_population[0].genotype
_bitstring = results['player_season_obv_90_next_season']['two_season_soccermix_history']['final_bitstring']

In [None]:
_bitstring 

In [None]:
col_indexes_used = []
col_indexes_not_used = []
for i in range(len(_bitstring)):
    if _bitstring[i] == '1':
        col_indexes_used.append(i)
    else:
        col_indexes_not_used.append(i)

_features = features.iloc[:, col_indexes_used]

In [None]:
columns = get_columns(features, _bitstring)
mse, model = get_fitness(columns, targets, num_runs=6)

In [None]:
_features.player_season_ot_shots_faced_90.describe()

In [None]:
coeff_series = pd.Series(model.coef_, _features.columns)
coeff_series = coeff_series.drop([
    # 'player_season_ot_shots_faced_ratio',
    'player_season_ot_shots_faced_90',
    'player_season_gsaa_ratio',
    # 'player_season_np_psxg_faced_90',
    'player_season_goals_faced_90',
    'player_season_clcaa',
    'player_season_gsaa_90',
    # 'player_season_npot_psxg_faced_90',
    # 'player_season_da_aggressive_distance',
    # 'player_season_xs_ratio',
    'league_market_value',
    'league_market_value_next_season',
    'league_market_value_diff'
])
plt.rcParams["figure.figsize"] = (20, 70)  # set plot size
sns.barplot(x=coeff_series.values, y=coeff_series.index)

In [None]:
coeff_series = pd.Series(model.coef_, _features.columns)
coeff_series = coeff_series['pass_0_1': 'bad_touch_4']
plt.rcParams["figure.figsize"] = (20, 30)  # set plot size
sns.barplot(x=coeff_series.values, y=coeff_series.index)

In [None]:
num_runs = 100

counter = 0
rel_changes = []
for i in range(num_runs):
    baseline_mse = get_fitness(features, targets, random_state=i)
    if baseline_mse > 100:
        continue
    new_mse = get_fitness(get_columns(features, _bitstring), targets, random_state=i)
    rel_changes.append((baseline_mse - new_mse) / baseline_mse)
    if new_mse < baseline_mse:
        counter += 1

print(counter/num_runs)
print(np.mean(rel_changes))