In [1]:
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

from sklearn.decomposition import NMF

import utils.sulfur.constant as const
from utils.CaImageCSV import CaImageCSV
from utils.context_data_csv import ContextDataCSV
from utils.matrix_optimizer import MatrixOptimizer

from sklearn.preprocessing import MinMaxScaler

In [2]:
def split_engram_matrix(animal_name, context_name):
    csv = ContextDataCSV(animal_name, context_name)
    matrix = csv.data_frame
    matrix = MatrixOptimizer(matrix).divide_sd()
    
    engram_df = matrix.loc[:, csv.engram_cells()]
    non_engram_df = matrix.loc[:, csv.non_engram_cells()]
    
    return engram_df, non_engram_df

def droped_unique_cells(cells1, cells2):
    droped_cells = np.array([])
    droped_cells = np.append(droped_cells, cells1)
    droped_cells = np.append(droped_cells, cells2)
    droped_cells = np.unique(droped_cells)
    return droped_cells

def all_nan_cells(matrix):
    nan_cells = np.array([])
    for cell_name, items in matrix.iteritems():
        if items.isnull().all():
            nan_cells = np.append(nan_cells, cell_name)
            
    return nan_cells

def split_matrix(matrix, context_name):
    matrix_1 = matrix.iloc[0:400, :]
    matrix_2 = matrix.iloc[900:1300, :]
    matrix_3 = matrix.iloc[1800:2200, :]
    
    return matrix_1, matrix_2, matrix_3

def AICc(distance, K, N):
    return 2 * distance + 2 * K * (N / (N - K - 1)) # 対数尤度=-Dis(x,y) より 

def setted_NMF(df, components_size):
    model = NMF(n_components=components_size, init='random', random_state=0, beta_loss='kullback-leibler', solver='mu', max_iter=2000)
    model.fit(df)
    W = model.transform(df)
    H = model.components_
    
    return W, H, model

def calc_matching_score(base_pattern_vectors, comp_pattern_vectors, threshold, d):
    matching_score = 0
    for base_vector in base_pattern_vectors:
        comp_sum = 0
        for comp_vector in comp_pattern_vectors:
            dot_product = np.dot(base_vector, comp_vector)
            comp_sum += step_func(dot_product - threshold)

        matching_score += step_func(comp_sum - d)
        
    return matching_score / len(base_pattern_vectors)
    
def step_func(value):
    return 1.0 if value > 0.0 else 0.0

In [3]:
index = const.ANIMAL_NAMES
columns = ['A1_1', 'A1_2', 'A1_3', 'A4_1', 'A4_2', 'A4_3']
engram_matching_score = pd.DataFrame(index=index, columns=columns)

for animal_name in const.ANIMAL_NAMES:
    print('animal_name: %s' % animal_name)
    
    A1postES_engram, A1postES_non_engram = split_engram_matrix(animal_name, 'A1postES')
    A4postES_engram, A4postES_non_engram = split_engram_matrix(animal_name, 'A4postES')

    A1postES_nan_engram_cells = all_nan_cells(A1postES_engram)
    A1postES_nan_non_engram_cells = all_nan_cells(A1postES_non_engram)
    A4postES_nan_engram_cells = all_nan_cells(A4postES_engram)
    A4postES_nan_non_engram_cells = all_nan_cells(A4postES_non_engram)
    
    droped_engram_cells = droped_unique_cells(A1postES_nan_engram_cells, A4postES_nan_engram_cells)
    droped_non_engram_cells = droped_unique_cells(A1postES_nan_non_engram_cells, A4postES_nan_non_engram_cells)
    
    A1postES_engram.drop(columns=droped_engram_cells, inplace=True)
    A1postES_non_engram.drop(columns=droped_non_engram_cells, inplace=True)

    A4postES_engram.drop(columns=droped_engram_cells, inplace=True)
    A4postES_non_engram.drop(columns=droped_non_engram_cells, inplace=True)
    
    engram_A1postES_1, engram_A1postES_2, engram_A1postES_3 = split_matrix(A1postES_engram, 'A1postES')
    engram_A4postES_1, engram_A4postES_2, engram_A4postES_3 = split_matrix(A4postES_engram, 'A4postES')

    non_engram_A1postES_1, non_engram_A1postES_2, non_engram_A1postES_3 = split_matrix(A1postES_non_engram, 'A1postES')
    non_engram_A4postES_1, non_engram_A4postES_2, non_engram_A4postES_3 = split_matrix(A4postES_non_engram, 'A4postES')
    
    engram_df = {}
    engram_df['A1_1'] = engram_A1postES_1
    engram_df['A1_2'] = engram_A1postES_2
    engram_df['A1_3'] = engram_A1postES_3
    engram_df['A4_1'] = engram_A4postES_1
    engram_df['A4_2'] = engram_A4postES_2
    engram_df['A4_3'] = engram_A4postES_3
    
    engram_patterns = {}
    for context_name, df in engram_df.items():
        engram_aic_value = float('inf')
        engram_min_H = None
        upper_count = 0
        
        for n in range(1, len(df.columns) + 1):
            _, H, model = setted_NMF(df, n)
            err = model.reconstruction_err_
            aic_value = AICc(err, n, len(columns) * 400)
            if engram_aic_value > aic_value:
                upper_count = 0
                engram_aic_value = aic_value
                engram_min_H = H
                
            if engram_aic_value < aic_value:
                upper_count += 1
                
            if upper_count > 20:
                break
                
        scaler = MinMaxScaler()
        normalized_H = scaler.fit_transform(engram_min_H)
        engram_patterns[context_name] = normalized_H
    
    d_for_ms = 0.05
    engram_matching_score.loc[animal_name, 'A1_1'] = calc_matching_score(engram_patterns['A1_1'], engram_patterns['A1_1'], 0.6, d_for_ms)
    engram_matching_score.loc[animal_name, 'A1_2'] = calc_matching_score(engram_patterns['A1_1'], engram_patterns['A1_2'], 0.6, d_for_ms)
    engram_matching_score.loc[animal_name, 'A1_3'] = calc_matching_score(engram_patterns['A1_1'], engram_patterns['A1_3'], 0.6, d_for_ms)
    engram_matching_score.loc[animal_name, 'A4_1'] = calc_matching_score(engram_patterns['A1_1'], engram_patterns['A4_1'], 0.6, d_for_ms)
    engram_matching_score.loc[animal_name, 'A4_2'] = calc_matching_score(engram_patterns['A1_1'], engram_patterns['A4_2'], 0.6, d_for_ms)
    engram_matching_score.loc[animal_name, 'A4_3'] = calc_matching_score(engram_patterns['A1_1'], engram_patterns['A4_3'], 0.6, d_for_ms)
    print(engram_matching_score)
engram_matching_score

animal_name: ID181106CreA
             A1_1 A1_2 A1_3 A4_1 A4_2 A4_3
ID181106CreA  1.0  1.0  1.0  1.0  1.0  1.0
ID181106CreB  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreC  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreG  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreH  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreI  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreK  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreL  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreN  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreQ  NaN  NaN  NaN  NaN  NaN  NaN
animal_name: ID181106CreB
             A1_1 A1_2 A1_3 A4_1 A4_2 A4_3
ID181106CreA  1.0  1.0  1.0  1.0  1.0  1.0
ID181106CreB  1.0  1.0  1.0  1.0  1.0  1.0
ID181106CreC  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreG  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreH  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreI  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreK  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreL  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreN  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreQ  NaN  NaN  NaN  NaN  NaN  NaN
an



             A1_1 A1_2 A1_3 A4_1 A4_2 A4_3
ID181106CreA  1.0  1.0  1.0  1.0  1.0  1.0
ID181106CreB  1.0  1.0  1.0  1.0  1.0  1.0
ID181106CreC  1.0  1.0  1.0  1.0  1.0  1.0
ID181106CreG  1.0  1.0  1.0  1.0  1.0  1.0
ID181106CreH  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreI  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreK  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreL  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreN  NaN  NaN  NaN  NaN  NaN  NaN
ID181106CreQ  NaN  NaN  NaN  NaN  NaN  NaN
animal_name: ID181106CreH
             A1_1 A1_2 A1_3      A4_1 A4_2 A4_3
ID181106CreA  1.0  1.0  1.0       1.0  1.0  1.0
ID181106CreB  1.0  1.0  1.0       1.0  1.0  1.0
ID181106CreC  1.0  1.0  1.0       1.0  1.0  1.0
ID181106CreG  1.0  1.0  1.0       1.0  1.0  1.0
ID181106CreH  1.0  1.0  1.0  0.970588  1.0  1.0
ID181106CreI  NaN  NaN  NaN       NaN  NaN  NaN
ID181106CreK  NaN  NaN  NaN       NaN  NaN  NaN
ID181106CreL  NaN  NaN  NaN       NaN  NaN  NaN
ID181106CreN  NaN  NaN  NaN       NaN  NaN  NaN
ID181106CreQ  NaN  Na

Unnamed: 0,A1_1,A1_2,A1_3,A4_1,A4_2,A4_3
ID181106CreA,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreB,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreC,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreG,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreH,1.0,1.0,1.0,0.970588,1.0,1.0
ID181106CreI,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreK,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreL,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreN,1.0,1.0,1.0,1.0,1.0,1.0
ID181106CreQ,1.0,0.941176,0.941176,0.941176,0.941176,0.941176


In [4]:
engram_matching_score.mean()

A1_1    1.000000
A1_2    0.994118
A1_3    0.994118
A4_1    0.991176
A4_2    0.994118
A4_3    0.994118
dtype: float64