In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from src.make_features import *
from src.visualize import *
from config import *
from sklearn.model_selection import train_test_split
import plotly.express as px

In [2]:
mice_dfs = load_data()
all_data = pd.concat(mice_dfs).reset_index()
# all_data.describe()


In [5]:
for df in mice_dfs:
    if df is not None:
        plot_heatmap(df, area_names)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(all_data.iloc[:,:-1], all_data['win'], test_size=0.3,
                                                    random_state=109) # 70% training and 30% test

In [8]:
for col in all_data.columns:
    fig = px.histogram(y=all_data[col], color=all_data.win)
    fig.write_html(f"../graphs/histogram_area/histogram_{col}_winner_loser.html")
    # fig.write_image(f"../graphs/histogram_area/histogram_{col}_winner_loser.png")


In [13]:
trans = StandardScaler()
data = pd.DataFrame(trans.fit_transform(all_data.iloc[:,:-1]))
for col in data.columns:
    fig = px.histogram(y=data[col], color=all_data.win)
    fig.write_html(f"../graphs/histogram_area/standardize_histogram_{col}_winner_loser.html")
    # fig.write_image(f"../graphs/histogram_area/histogram_{col}_winner_loser.png")

In [8]:
######################################
############### 2D PCA ###############
######################################
# Separating out the features
x = all_data[area_names].values
# Separating out the target
y = all_data['win'].values

# Standardizing the features
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA

pca = PCA(n_components=2)

principalComponents = pca.fit_transform(x)
total_var = pca.explained_variance_ratio_.sum() * 100

principalDf = pd.DataFrame(data=principalComponents
                           , columns=['principal component 1', 'principal component 2'])

finalDf_2d = pd.concat([principalDf, all_data.win], axis=1)
#######################################
############### plot 2D ###############
#######################################
fig = px.scatter(finalDf_2d, x="principal component 1", y="principal component 2", color="win",
                 title=f'Total Explained Variance: {total_var:.2f}%')
fig.show()


In [9]:
######################################
############### 3D PCA ###############
######################################
# Separating out the features
x = all_data[area_names].values

# Separating out the target
y = all_data['win'].values

# Standardizing the features
x = StandardScaler().fit_transform(x)
from sklearn.decomposition import PCA

pca = PCA(n_components=3)

principalComponents = pca.fit_transform(x)

principalDf = pd.DataFrame(data=principalComponents
                           , columns=['principal component 1', 'principal component 2',  'principal component 3'])

finalDf_3d = pd.concat([principalDf, all_data.win], axis=1)
total_var = pca.explained_variance_ratio_.sum() * 100
############### plot 3D ###############
fig = px.scatter_3d(
    finalDf_3d, x='principal component 1', y='principal component 2', z='principal component 3', color='win',
    title=f'Total Explained Variance: {total_var:.2f}%',
    labels={'0': 'PC 1', '1': 'PC 2', '2': 'PC 3'}
)
fig.show()



In [11]:
finalDf_3d

Unnamed: 0,principal component 1,principal component 2,principal component 3,win
0,1.643404,-1.299077,-0.136270,1
1,1.571032,-1.280237,-0.050190,1
2,1.520463,-1.215477,-0.017836,1
3,1.576943,-1.026466,-0.025267,1
4,1.460583,-1.035172,0.022370,1
...,...,...,...,...
9095,-1.792984,0.739883,0.117330,0
9096,-1.671393,0.478562,0.064863,0
9097,-1.722024,0.379026,0.032229,0
9098,-1.764023,0.360777,0.017307,0


In [12]:

pio.renderers.default = "browser"
#######################################
############### plot 3D ###############
#######################################
