### Have some extra sessions from Mike, haven't had object_features behavior files produced yet, see what the good pairs look like with these sessions included

In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
import utils.behavioral_utils as behavioral_utils
import utils.information_utils as information_utils
import utils.visualization_utils as visualization_utils
import utils.glm_utils as glm_utils
from matplotlib import pyplot as plt
import utils.spike_utils as spike_utils
from constants.glm_constants import *
from constants.behavioral_constants import *

import seaborn as sns
import scipy.stats


In [4]:
num_bins = 2

def get_sess_beh(row, beh_path, sub):
    session = row.session_name
    behavior_path = beh_path.format(sess_name=session)
    beh = pd.read_csv(behavior_path)

    # # filter trials 
    beh = behavioral_utils.get_valid_trials(beh)
    feature_selections = behavioral_utils.get_selection_features(beh)
    beh = pd.merge(beh, feature_selections, on="TrialNumber", how="inner")
    beh["session"] = session
    return beh

In [5]:
sub = "BL"
# Monkey B
# all_units = pd.read_pickle("/data/patrick_res/firing_rates/BL/all_units.pickle")
# sessions = pd.DataFrame({"session_name": all_units.session.unique()})
sessions = pd.read_pickle("/data/patrick_res/sessions/BL/valid_sessions.pickle")
all_beh = pd.concat(sessions.apply(lambda x: get_sess_beh(x, BL_SESS_BEHAVIOR_PATH, "BL"), axis=1).values).reset_index()

In [8]:
block_thresh = 3
pairs = behavioral_utils.get_good_pairs_across_sessions(all_beh, block_thresh)
pairs.sort_values(by="num_sessions", ascending=False)[:20]

Unnamed: 0,pair,sessions,num_sessions,dim_type
61,"[ESCHER, RIPPLE]","[20190529, 20190617, 20190710, 20190816, 20190...",5,within dim
62,"[ESCHER, SWIRL]","[20190529, 20190627, 20190814, 20190823]",4,within dim
65,"[RIPPLE, SWIRL]","[20190524, 20190529, 20190823]",3,within dim
27,"[STAR, POLKADOT]","[20190605, 20190606, 20190607]",3,across dim
2,"[CIRCLE, TRIANGLE]","[20190603, 20190606, 20190812]",3,within dim
34,"[TRIANGLE, ESCHER]","[20190221, 20190710, 20190814]",3,across dim
1,"[CIRCLE, STAR]","[20190228, 20190606, 20190703]",3,within dim
60,"[ESCHER, POLKADOT]","[20190227, 20190531, 20190816]",3,within dim
25,"[STAR, YELLOW]","[20190524, 20190605, 20190703]",3,across dim
52,"[MAGENTA, ESCHER]","[20190130, 20190530, 20190823]",3,across dim


### Try to read one of the CSVs

In [26]:
df = pd.read_csv("/data/patrick_res/scratch/Blanche-BehaviorOnly/sub-BL_sess-190125_parsedbehavior.csv", header=None)
df = df.rename(columns={0: "TrialNumber", 2: "BlockNumber", 3: "CurrentRule"})[["TrialNumber", "BlockNumber", "CurrentRule"]]

In [27]:


df["CurrentRule"] = df["CurrentRule"].apply(lambda x: rule_map[x])

In [28]:
df.CurrentRule.unique()

array(['CYAN', 'ESCHER', 'SWIRL', 'CIRCLE', 'STAR', 'GREEN', 'RIPPLE',
       'POLKADOT', 'SQUARE', 'MAGENTA'], dtype=object)

### Aggregate all of em

In [50]:
base_dir = "/data/patrick_res/scratch/Blanche-BehaviorOnly/"
file_names = os.listdir(base_dir)


In [52]:
rule_map = [
    "", 
    "POLKADOT", "RIPPLE", "ESCHER", "SWIRL", 
    "CYAN", "MAGENTA", "YELLOW", "GREEN", 
    "CIRCLE", "TRIANGLE", "STAR", "SQUARE"
]
dfs = []
for file_name in file_names:
    if file_name.endswith(".csv"): 
        sess_name = "20" + file_name.split("_")[1].split("-")[1]
        df = pd.read_csv(os.path.join(base_dir, file_name), header=None)
        df = df.rename(columns={0: "TrialNumber", 2: "BlockNumber", 3: "CurrentRule"})[["TrialNumber", "BlockNumber", "CurrentRule"]]
        df["CurrentRule"] = df["CurrentRule"].apply(lambda x: rule_map[x])
        df["session"] = sess_name
        dfs.append(df)
new_beh = pd.concat(dfs)

In [55]:
block_thresh = 3
new_pairs = behavioral_utils.get_good_pairs_across_sessions(new_beh, block_thresh)
new_pairs = new_pairs.sort_values(by="num_sessions", ascending=False)[:20]

In [57]:
new_beh

Unnamed: 0,TrialNumber,BlockNumber,CurrentRule,session
0,19,1,STAR,20190920
1,20,1,STAR,20190920
2,21,1,STAR,20190920
3,22,1,STAR,20190920
4,23,1,STAR,20190920
...,...,...,...,...
1020,1030,24,TRIANGLE,20190524
1021,1031,24,TRIANGLE,20190524
1022,1032,24,TRIANGLE,20190524
1023,1033,24,TRIANGLE,20190524


In [61]:
old_beh = all_beh[["TrialNumber", "BlockNumber", "CurrentRule", "session"]]

In [62]:
agg_beh = pd.concat((old_beh, new_beh))

In [66]:
block_thresh = 3
agg_pairs = behavioral_utils.get_good_pairs_across_sessions(agg_beh, block_thresh)
agg_pairs = agg_pairs.sort_values(by="num_sessions", ascending=False)[:20]

In [71]:
good_pairs = agg_pairs[agg_pairs.num_sessions > 6]

In [96]:
len(good_pairs)

11

In [78]:
sessions = good_pairs.sessions.explode().unique()

In [109]:
len(sessions)

47

### Get sessions that have been spike sorted, sessions that have object_features..

In [83]:
all_units = pd.read_pickle("/data/patrick_res/firing_rates/BL/all_units.pickle")
sorted_sessions = all_units.session.unique()
of_sessions = pd.read_pickle("/data/patrick_res/sessions/BL/valid_sessions.pickle").session_name.unique()

### Get sessions where no object features have been generated yet

In [103]:
no_of_sessions = sessions[~np.isin(sessions, of_sessions)]
df = pd.DataFrame({"sessions": no_of_sessions})
df.to_csv("/data/patrick_res/scratch/no_of_sessions.csv")

### Get sessions where object features have been generated, but haven't been spike sorted yet

In [107]:
of_sessions = sessions[np.isin(sessions, of_sessions)]
of_no_spike_sess = of_sessions[~np.isin(of_sessions, sorted_sessions)]
df = pd.DataFrame({"sessions": of_no_spike_sess})
df.to_csv("/data/patrick_res/scratch/of_no_spike_sessions.csv")

In [108]:
good_pairs.to_csv("/data/patrick_res/scratch/good_pairs.csv")