# Q5 Assumed Density Filtering
### Q5.1 Pre-processing and variables

In [1]:
# Q5 Assumed density filtering
# import preprocessing function and Gibbs sampler function
from Preprocessing_serieA_function import preprocess_serieA_no_draws
from Gibbs_sampler_function import gibbs_sampling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

# load data
data = pd.read_csv('SerieA_dataset.csv')

# preprocess data
data_preprocessed = preprocess_serieA_no_draws(data, dataset_name='SerieA')
print(data_preprocessed.head())

# Create team dictionary test 1
unique_teams_1 = data_preprocessed['team1'].unique().tolist()
mean=[25 for i in range(len(unique_teams_1))]
variance=[64 for i in range(len(unique_teams_1))]

teams_dictionary = {i:[25,64] for i in unique_teams_1}

# Create team dictionary test 2 for shuffled data
shuffled_teams_dictionary = {i:[25,64] for i in unique_teams_1}

# shuffle data
shuffled_data = data_preprocessed.copy()
shuffled_data = shuffled_data.sample(frac=1).reset_index(drop=True)
print(shuffled_data.head())

# print(teams_dictionary)

game1 = data_preprocessed.iloc[0, :]
# print(game1[0])
# print(f'\nFirst game:\n {game1}')

      team1     team2  score_diff
0    Chievo  Juventus          -1
1     Lazio    Napoli          -1
2    Torino      Roma          -1
3  Sassuolo     Inter           1
5    Empoli  Cagliari           2
      team1      team2  score_diff
0  Atalanta  Frosinone           4
1   Bologna     Empoli           2
2    Empoli   Juventus          -1
3      Roma     Empoli           1
4  Cagliari     Chievo           1


# Q.6
### Q6.1 prediction function

### Q5.2 Assumed Density Filtering (ADF) function

In [2]:
# Assumed density filtering (ADF)
from ADF_script import ADF

## Q5 & Q6: Run functions

In [3]:
# Run ADF on data
dic1, predictions1= ADF(teams_dictionary, data_preprocessed)

# Run ADP on shuffled data
dic_shuffled, predictions_shuffled = ADF(shuffled_teams_dictionary, shuffled_data)


Score difference: -1

Score difference: -1

Score difference: -1

Score difference: 1

Score difference: 2

Score difference: -1

Score difference: 4

Score difference: 2

Score difference: 1

Score difference: 1

Score difference: 1

Score difference: 1

Score difference: 5

Score difference: 1

Score difference: -3

Score difference: -1

Score difference: 1

Score difference: 1

Score difference: 2

Score difference: 3

Score difference: 1

Score difference: -1

Score difference: -1

Score difference: 1

Score difference: -5

Score difference: 1

Score difference: 1

Score difference: -1

Score difference: 2

Score difference: 2

Score difference: 2

Score difference: 3

Score difference: -1

Score difference: -2

Score difference: 3

Score difference: -2

Score difference: 2

Score difference: -2

Score difference: 1

Score difference: -1

Score difference: 4

Score difference: 3

Score difference: 2

Score difference: 2

Score difference: -2

Score difference: 2

Score difference:

# Results:
### Team ranking changes after shuffle
#### prediction rate of one-step-head = 0.64, better than random guessing(0.5)

In [None]:
# Create dataframe of means and variances an sort by means
from numpy import sign
teams_df = pd.DataFrame(dic1, index=['mean', 'variance']).T
teams_df = teams_df.sort_values(by=['mean'], ascending=False)
print('Teams after the season, no shuffle:')
print(teams_df)

# Calculate prediction rate
from sklearn.metrics import accuracy_score
y_true = sign(data_preprocessed['score_diff']).tolist()
prediction_rate = accuracy_score(y_true, predictions1)
print(f'\nPrediction rate of One-Step-Ahead prediction: {prediction_rate}')

# prediction rate of last 60 games
y_true_last_60 = y_true[-60:]
predictions_last_60 = predictions1[-60:]
prediction_rate_last_60 = accuracy_score(y_true_last_60, predictions_last_60)
print(f'\nPrediction rate of One-Step-Ahead prediction for last 60 games: {prediction_rate_last_60}')


# create dataframe of shuffled means and variances
teams_df_shuffled = pd.DataFrame(dic_shuffled, index=['mean', 'variance']).T
teams_df_shuffled = teams_df_shuffled.sort_values(by=['mean'], ascending=False)
print('\nTeams after the season, with shuffle:')
print(teams_df_shuffled)

# Calculate prediction rate
y_true_shuffled = sign(shuffled_data['score_diff']).tolist()
prediction_rate_shuffled = accuracy_score(y_true_shuffled, predictions_shuffled)
print(f'\nPrediction rate of One-Step-Ahead prediction: {prediction_rate_shuffled}')

# prediction rate of last 60 games
y_true_last_60_shuffled = y_true_shuffled[-60:]
predictions_last_60_shuffled = predictions_shuffled[-60:]
prediction_rate_last_60_shuffled = accuracy_score(y_true_last_60_shuffled, predictions_last_60_shuffled)
print(f'\nPrediction rate of One-Step-Ahead prediction for last 60 games: {prediction_rate_last_60_shuffled}')


In [None]:
# Plot the pdf of the three top teams and the three bottom teams

plt.figure(figsize=(10, 6))
x = np.linspace(min(teams_df['mean'])-10, max(teams_df['mean'])+10, 1000)

for i in range(3):
    y_top = stats.norm.pdf(x, teams_df.iloc[i, 0], np.sqrt(teams_df.iloc[i, 1]))
    y_bottom = stats.norm.pdf(x, teams_df.iloc[-i-1, 0], np.sqrt(teams_df.iloc[-i-1, 1]))

    plt.plot(x, y_top, label=f'{i+1}. {teams_df.index[i]}')
    plt.plot(x, y_bottom, label=f'{len(teams_df)-i}. {teams_df.index[-i-1]}')

plt.title('PDF of top 3 and bottom 3 teams after the season')
plt.xlabel('skill level')
plt.ylabel('probability density')
plt.legend()
plt.show()

# Plot the pdf of the three top teams and the three bottom teams after shuffle

plt.figure(figsize=(10, 6))
x = np.linspace(10, 40, 1000)

for i in range(3):
    y_top = stats.norm.pdf(x, teams_df_shuffled.iloc[i, 0], np.sqrt(teams_df_shuffled.iloc[i, 1]))
    y_bottom = stats.norm.pdf(x, teams_df_shuffled.iloc[-i-1, 0], np.sqrt(teams_df_shuffled.iloc[-i-1, 1]))

    plt.plot(x, y_top, label=f'{i+1}. {teams_df_shuffled.index[i]}')
    plt.plot(x, y_bottom, label=f'{len(teams_df_shuffled)-i}. {teams_df_shuffled.index[-i-1]}')

plt.title('PDF of top 3 and bottom 3 teams after the season with shuffle')
plt.xlabel('skill level')
plt.ylabel('probability density')
plt.legend()
plt.show()
