In [2]:
# preliminaries
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans

In [1]:
# paths to dataframe csv files
DF_FULL_PATH = 'Data/df_full.csv'

In [3]:
# load dataframes for each position
df_full = pd.read_csv(DF_FULL_PATH, index_col=0)
cb_df = df_full[df_full.position=='CB']
fb_df = df_full[df_full.position=='FB']
dm_df = df_full[df_full.position=='DM']
m_df = df_full[df_full.position=='M']
w_df = df_full[df_full.position=='W']
cf_df = df_full[df_full.position=='CF']

In [4]:
RAW_TRAITS = ['goals', 'shots', 'conversion', 'positioning', 'assists', 'crossing', 'dribbling', 'carries',
              'involvement', 'accuracy', 'intent', 'receiving', 'aerial', 'on_ball', 'off_ball', 'fouls']

In [5]:
# get raw traits per position as numpy array
cb_raw_traits = cb_df[RAW_TRAITS].to_numpy()
fb_raw_traits = fb_df[RAW_TRAITS].to_numpy()
dm_raw_traits = dm_df[RAW_TRAITS].to_numpy()
m_raw_traits = m_df[RAW_TRAITS].to_numpy()
w_raw_traits = w_df[RAW_TRAITS].to_numpy()
cf_raw_traits = cf_df[RAW_TRAITS].to_numpy()

### K-means

In [6]:
# create models
kmeans_cb = KMeans(n_clusters=cb_raw_traits.shape[0]/20)
kmeans_fb = KMeans(n_clusters=fb_raw_traits.shape[0]/20)
kmeans_dm = KMeans(n_clusters=dm_raw_traits.shape[0]/20)
kmeans_m = KMeans(n_clusters=m_raw_traits.shape[0]/20)
kmeans_w = KMeans(n_clusters=w_raw_traits.shape[0]/20)
kmeans_cf = KMeans(n_clusters=cf_raw_traits.shape[0]/20)

# fit models
kmeans_cb = kmeans_cb.fit(cb_raw_traits)
kmeans_fb = kmeans_fb.fit(fb_raw_traits)
kmeans_dm = kmeans_dm.fit(dm_raw_traits)
kmeans_m = kmeans_m.fit(m_raw_traits)
kmeans_w = kmeans_w.fit(w_raw_traits)
kmeans_cf = kmeans_cf.fit(cf_raw_traits)

TypeError: 'float' object cannot be interpreted as an integer

In [None]:
# get scores for each position
kmeans_cb_scores = kmeans_cb.predict_proba(cb_raw_traits)
kmeans_fb_scores = kmeans_fb.predict_proba(fb_raw_traits)
kmeans_dm_scores = kmeans_dm.predict_proba(dm_raw_traits)
kmeans_m_scores = kmeans_m.predict_proba(m_raw_traits)
kmeans_w_scores = kmeans_w.predict_proba(w_raw_traits)
kmeans_cf_scores = kmeans_cf.predict_proba(cf_raw_traits)

### Gaussian Mixture Model

In [None]:
# create models
gmm_cb = GaussianMixture(n_components=cb_raw_traits.shape[0]/20)
gmm_fb = GaussianMixture(n_components=fb_raw_traits.shape[0]/20)
gmm_dm = GaussianMixture(n_components=dm_raw_traits.shape[0]/20)
gmm_m = GaussianMixture(n_components=m_raw_traits.shape[0]/20)
gmm_w = GaussianMixture(n_components=w_raw_traits.shape[0]/20)
gmm_cf = GaussianMixture(n_components=cf_raw_traits.shape[0]/20)

# fit models
gmm_cb = gmm_cb.fit(cb_raw_traits)
gmm_fb = gmm_fb.fit(fb_raw_traits)
gmm_dm = gmm_dm.fit(dm_raw_traits)
gmm_m = gmm_m.fit(m_raw_traits)
gmm_w = gmm_w.fit(w_raw_traits)
gmm_cf = gmm_cf.fit(cf_raw_traits)


In [None]:
# get scores for each position
gmm_cb_scores = gmm_cb.predict_proba(cb_raw_traits)
gmm_fb_scores = gmm_fb.predict_proba(fb_raw_traits)
gmm_dm_scores = gmm_dm.predict_proba(dm_raw_traits)
gmm_m_scores = gmm_m.predict_proba(m_raw_traits)
gmm_w_scores = gmm_w.predict_proba(w_raw_traits)
gmm_cf_scores = gmm_cf.predict_proba(cf_raw_traits)