# Imports

In [None]:
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from analysis.dim_reduction import Data
from scipy.spatial.distance import cosine
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib as mpl
import ipywidgets as widgets
from IPython.display import display

mpl.rcParams['figure.dpi'] = 300

# Data

In [None]:
# путь к папке с обработанными minian данными 
path_to_data = 'demo_movies'

# рассматриваемые сесси
# название
# path - путь к файлам
# mouse - идентификатор особи
# condition (normal/stress/3h/10d)
# fps - количество кадров в секунду
dates = {
    'mouse_old_1': {'path': 'mouse_old/25_october_2021',
                    'mouse': 'old',
                    'condition': 'normal',
                    'fps': 20},
    'mouse_old_2': {'path': 'mouse_old/11_november_2021',
                    'mouse': 'old',
                    'condition': 'normal',
                    'fps': 20},
    'mouse_old_3': {'path': 'mouse_old/15_november_2021',
                    'mouse': 'old',
                    'condition': 'normal',
                    'fps': 20},
    
    
    'mouse1_17': {'path': '5days/mouse1/17 october exp',
                  'mouse': '1',
                  'condition': 'normal',
                  'fps': 15},
    'mouse1_18': {'path': '5days/mouse1/18 october exp',
                  'mouse': '1',
                  'condition': 'normal',
                  'fps': 15},
    'mouse1_19': {'path': '5days/mouse1/19 october exp',
                  'mouse': '1',
                  'condition': 'normal',
                  'fps': 15},
    'mouse1_20': {'path': '5days/mouse1/20 october exp',
                  'mouse': '1',
                  'condition': 'normal',
                  'fps': 15},
    'mouse1_21': {'path': '5days/mouse1/21 october exp',
                  'mouse': '1',
                  'condition': 'normal',
                  'fps': 15},
    
    
    'mouse2_17': {'path': '5days/mouse2/17 october exp',
                  'mouse': '2',
                  'condition': 'normal',
                  'fps': 15},
    'mouse2_18': {'path': '5days/mouse2/18 october exp',
                  'mouse': '2',
                  'condition': 'normal',
                  'fps': 15},
    'mouse2_19': {'path': '5days/mouse2/19 october exp',
                  'mouse': '2',
                  'condition': 'normal',
                  'fps': 15},
    'mouse2_20': {'path': '5days/mouse2/20 october exp',
                  'mouse': '2',
                  'condition': 'normal',
                  'fps': 15},
    'mouse2_21': {'path': '5days/mouse2/21 october exp',
                  'mouse': '2',
                  'condition': 'normal',
                  'fps': 15},

    'mouse1_ra': {'path': 'stress_test/mouse1/right_after',
                  'mouse': '1',
                  'condition': 'stress',
                  'fps': 15},
    'mouse1_3h': {'path': 'stress_test/mouse1/3_hours',
                  'mouse': '1',
                  'condition': '3h',
                  'fps': 15},
    'mouse1_10d': {'path': 'stress_test/mouse1/10_days',
                   'mouse': '1',
                   'condition': '10d',
                   'fps': 15},

    'mouse2_ra': {'path': 'stress_test/mouse2/right_after',
                  'mouse': '2',
                  'condition': 'stress',
                  'fps': 15},
    'mouse2_3h': {'path': 'stress_test/mouse2/3_hours',
                  'mouse': '2',
                  'condition': '3h',
                  'fps': 15},
    'mouse2_10d': {'path': 'stress_test/mouse2/10_days',
                   'mouse': '2',
                   'condition': '10d',
                   'fps': 15},
}

conditions_order = {
    '1': ['normal', 'stress', '3h', '10d'],
    '2': ['normal', 'stress', '3h', '10d'],
    'old': ['normal']
}

In [None]:
%%time
# подгрузка данных
data_class = Data(path_to_data, dates, verbose=True)

In [None]:
%%time
# сбор статистик
# операция занимает продолжительное время
data_class.get_data()

In [None]:
# удалние сильноскоррелированных признаков
# df = Data.drop_strong_corr(df)

In [None]:
# снижение размерности
data, pca = data_class.data_reduction()

In [None]:
# визуализация результатов

mouse=widgets.Dropdown(
    options=conditions_order.keys(),
    value=list(conditions_order.keys())[0],
    description='mouse',
    disabled=False,
)

def show_map(mouse_id):
    data_class.show_result(mouse_id, conditions_order[mouse_id])

mouse_map = widgets.interactive_output(show_map,
                                       {'mouse_id': mouse}
                                      );
display(mouse)
display(mouse_map)

# stats all


In [None]:
condition=widgets.Dropdown(
    options=data['condition'].unique().tolist()+['all'],
    value='all',
    description='condition',
    disabled=False,
)

stats_deviation = widgets.interactive_output(data_class.show_stats_deviation,
                                             {'condition': condition}
                                            );
display(condition)
display(stats_deviation)

In [None]:
stat = widgets.Dropdown(
    options=data_class.get_stat_list(),
    value=data_class.get_stat_list()[0],
    description='stat',
    disabled=False,
)

condition = widgets.Dropdown(
    options=data['condition'].unique().tolist()+['all'],
    value='all',
    description='condition',
    disabled=False,
)

stats_deviation = widgets.interactive_output(data_class.show_stat,
                                             {'stat': stat,
                                              'condition': condition}
                                            );

display(stat)
display(condition)
display(stats_deviation)

In [None]:
# табличное представление результатов
result = data.copy()
result

In [None]:
# сохраниение результатов
result.to_csv('reduced_data.csv')

# PCA

In [None]:
# веса признаков из PCA
feature_importance = pd.DataFrame(pca.components_, columns=data_class.data.columns)
feature_importance

In [None]:
plt.scatter(feature_importance.iloc[0], feature_importance.iloc[1], label=feature_importance.columns.tolist())

In [None]:
feature_importance.T[0].sort_values()

In [None]:
feature_importance.T[1].sort_values()

In [None]:
# центры масс по каждой категории записей
centers = result.pivot_table(values=['x', 'y'], index='mouse', aggfunc='mean')
centers

In [None]:
# подсчет косинусного сходства
features = {'value': [],
             'label': [],
             'feature': [],
             'rating': []
            }
top_n = 8
for i, row in centers.iterrows():
    top = feature_importance.apply(lambda col: cosine_similarity([row], [col])[0,0] * (col ** 2).sum() ** (1 / 2))
    top = top[top.abs().sort_values(ascending=False).head(top_n).index]
    features['value'] += top.tolist()
    features['feature'] += top.index.tolist()
    features['label'] += [i] * (top_n)
    features['rating'] += list(np.arange(top_n))
    
features = pd.DataFrame(features)
features = features.pivot_table(values=['value', 'rating'], index=['label', 'feature'])
features = features.sort_values(by=['label', 'rating'])
features

In [None]:
# визуализация наиболее выжных призанков по косинусному сходству
feature_df = features.reset_index()

labels = feature_df['label'].unique()

fig, axs = plt.subplots(len(labels), 1, figsize=(12, 5*len(labels)))
fig.subplots_adjust(hspace=.3)


for ax, label in zip(axs, labels):
    
    sns.barplot(data = feature_df[feature_df['label'] == label], y='feature', x='value', ax=ax)
    ax.set_title(label, fontsize=18)
    
    ax.set_xlabel('value', fontsize=14)
    ax.set_ylabel('feature', fontsize=14)
    ax.tick_params(axis='both', labelsize=14)
    
plt.show()