# Imports

In [None]:
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from analysis.dim_reduction import Data
from analysis.widgets import DataWidgets as dw
from analysis.widgets import save_df_button
from scipy.spatial.distance import cosine
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib as mpl

mpl.rcParams['figure.dpi'] = 300

# Data

In [None]:
# path to the folder with the processed data
path_to_data = 'demo_movies'

# whether to use the entropy of transfer
transfer_entropy = False

# sessions params
# key - unique id
# path - path to session data
# mouse - mouse id
# condition (1/2 etc)
# fps - frames per second
dates = {
    'mouseold1': {'path': 'mouseold/1',
                  'mouse': '1',
                  'condition': '1',
                  'fps': 20},
    'mouseold2': {'path': 'mouseold/2',
                  'mouse': '1',
                  'condition': '1',
                  'fps': 20},
    'mouseold3': {'path': 'mouseold/3',
                  'mouse': '1',
                  'condition': '1',
                  'fps': 20},

    'mouseX': {'path': 'mouse3/right_after',
               'mouse': '1',
               'condition': '2',
               'fps': 15},
}

conditions_order = {
    '1': ['1', '2'],
    }

In [None]:
%%time
# data loading
data_class = Data(path_to_data, dates, verbose=True)

In [None]:
%%time
# calculating statistics
data_class.get_data(transfer_entropy)

In [None]:
# dimension reduction
data, pca = data_class.data_reduction()

In [None]:
# visualization
dw.show_result(data_class, conditions_order)

In [None]:
# tabular representation of results
result = data.copy()
result

In [None]:
dw.save(data_class, path_to_data)

# stats all


In [None]:
dw.show_stat(data_class, conditions_order)

In [None]:
dw.stats_deviation(data_class, path_to_data)

# PCA

In [None]:
# PCA feature importance
feature_importance = pd.DataFrame(pca.components_, columns=data_class.data.columns)
feature_importance

In [None]:
plt.scatter(feature_importance.iloc[0], feature_importance.iloc[1], label=feature_importance.columns.tolist())

In [None]:
feature_importance.T[0].abs().sort_values()

In [None]:
feature_importance.T[1].abs().sort_values()

In [None]:
# centers of mass for each category of records
centers = result.pivot_table(values=['x', 'y'], index='mouse', aggfunc='mean')
centers

In [None]:
# calculating cosine similarity
features = {'value': [],
             'label': [],
             'feature': [],
             'rating': []
            }
top_n = 8
for i, row in centers.iterrows():
    top = feature_importance.apply(lambda col: cosine_similarity([row], [col])[0,0] * (col ** 2).sum() ** (1 / 2))
    top = top[top.abs().sort_values(ascending=False).head(top_n).index]
    features['value'] += top.tolist()
    features['feature'] += top.index.tolist()
    features['label'] += [i] * (top_n)
    features['rating'] += list(np.arange(top_n))
    
features = pd.DataFrame(features)
features = features.pivot_table(values=['value', 'rating'], index=['label', 'feature'])
features = features.sort_values(by=['label', 'rating'])
features

In [None]:
# visualization of the most important features by cosine similarity
feature_df = features.reset_index()

labels = feature_df['label'].unique()

fig, axs = plt.subplots(len(labels), 1, figsize=(12, 5*len(labels)))

if len(labels) == 1:
    axs = [axs]

fig.subplots_adjust(hspace=.3)


for ax, label in zip(axs, labels):
    
    sns.barplot(data = feature_df[feature_df['label'] == label], y='feature', x='value', ax=ax)
    ax.set_title(label, fontsize=18)
    
    ax.set_xlabel('value', fontsize=14)
    ax.set_ylabel('feature', fontsize=14)
    ax.tick_params(axis='both', labelsize=14)
    
plt.show()

In [None]:
save_df_button(feature_importance.T.rename(columns={0: 'x', 1: 'y'}), path_to_data+'/feature_weights.xlsx')