### Want to see how many pairs of rules have at least N sessions where both rules appear at least 2 times

In [76]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
import utils.behavioral_utils as behavioral_utils
import utils.information_utils as information_utils
import utils.visualization_utils as visualization_utils
import utils.glm_utils as glm_utils
from matplotlib import pyplot as plt
import utils.spike_utils as spike_utils
from constants.glm_constants import *
from constants.behavioral_constants import *

import seaborn as sns

from scipy.stats import ortho_group
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
import seaborn as sns
import scipy.stats


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [90]:
num_bins = 2

def get_sess_beh(row, beh_path):
    session = row.session_name
    behavior_path = beh_path.format(sess_name=session)
    beh = pd.read_csv(behavior_path)

    # # filter trials 
    beh = behavioral_utils.get_valid_trials(beh)
    feature_selections = behavioral_utils.get_selection_features(beh)
    beh = pd.merge(beh, feature_selections, on="TrialNumber", how="inner")

    beh = behavioral_utils.get_feature_values_per_session(session, beh)
    beh = behavioral_utils.calc_feature_probs(beh)
    beh = behavioral_utils.calc_feature_value_entropy(beh)
    beh = behavioral_utils.calc_confidence(beh, num_bins=2, quantize_bins=True)
    beh["ConfidenceLabel"] = beh.apply(lambda row: f"High {row.MaxFeat}" if row.ConfidenceBin == 1 else "Low", axis=1)
    beh["session"] = session
    return beh

In [91]:
# Monkey S
SESSIONS_PATH = "/data/patrick_res/sessions/valid_sessions_rpe.pickle"
sessions = pd.read_pickle(SESSIONS_PATH)
beh_path = SESS_BEHAVIOR_PATH
all_beh = pd.concat(sessions.apply(lambda x: get_sess_beh(x, beh_path), axis=1).values).reset_index()

ValueError: cannot convert float NaN to integer

In [79]:
num_blocks = all_beh.groupby(["session", "CurrentRule"]).apply(lambda x: len(x.BlockNumber.unique())).reset_index()

In [80]:
pairs = []
for i in range(12):
    for j in range(i + 1, 12):
        feat1 = FEATURES[i]
        feat2 = FEATURES[j]
        sess_1 = num_blocks[(num_blocks.CurrentRule == feat1) & (num_blocks[0] >= 3)].session
        sess_2 = num_blocks[(num_blocks.CurrentRule == feat2) & (num_blocks[0] >= 3)].session
        joints = sess_1[sess_1.isin(sess_2)].values
        pairs.append({"pair": [feat1, feat2], "sessions": joints, "num_sessions": len(joints)})
pairs = pd.DataFrame(pairs)

In [81]:
pairs.sort_values(by="num_sessions", ascending=False)[:10]

Unnamed: 0,pair,sessions,num_sessions
40,"[CYAN, YELLOW]","[20180705, 20180801, 20180802, 20180912, 20180...",9
53,"[MAGENTA, POLKADOT]","[20180705, 20180802, 20180803, 20180808, 20180...",8
39,"[CYAN, MAGENTA]","[20180705, 20180801, 20180802, 20180803, 20180...",8
45,"[GREEN, MAGENTA]","[20180802, 20180806, 20180808, 20180912, 20180...",8
46,"[GREEN, YELLOW]","[20180802, 20180912, 20180921, 20180924, 20180...",8
0,"[CIRCLE, SQUARE]","[20180709, 20180802, 20180803, 20180806, 20180...",8
6,"[CIRCLE, YELLOW]","[20180802, 20180918, 20180921, 20180924, 20181...",7
51,"[MAGENTA, YELLOW]","[20180705, 20180801, 20180802, 20180912, 20180...",7
4,"[CIRCLE, GREEN]","[20180802, 20180806, 20180808, 20180921, 20180...",7
54,"[MAGENTA, RIPPLE]","[20180705, 20180802, 20180803, 20180912, 20180...",7


In [82]:
good_pairs = pairs[pairs.num_sessions >= 7]

In [85]:
len(good_pairs)

18

In [87]:
good_pairs.to_pickle("/data/patrick_res/sessions/pairs_at_least_3blocks_10sess_fixed.pickle")

### For each session, for each pair, what are the min number of trials that match: 
- high preferred features
- high conf trials where features are chosen but not preferred. 

In [74]:
def min_trials_per_session(row):
    feat1, feat2 = row.pair
    res = []
    for sess in row.sessions:
        beh = all_beh[all_beh.session == sess]

        # find minimum number of trials, when either features are preferred
        pref_beh = beh[beh.ConfidenceLabel.isin([f"High {feat1}", f"High {feat2}"])]
        min_pref = np.min(pref_beh.groupby("ConfidenceLabel").count().TrialNumber)

        # find minimum number of trials, when high confidence, 
        # either features are not preferred, but still chosen, 
        not_pref_beh = beh[
            (~beh.ConfidenceLabel.isin([f"High {feat1}", f"High {feat2}"])) & 
            (beh.ConfidenceLabel != "Low")
        ]
        chose_feat_1 = not_pref_beh[not_pref_beh[FEATURE_TO_DIM[feat1]] == feat1].TrialNumber
        chose_feat_2 = not_pref_beh[not_pref_beh[FEATURE_TO_DIM[feat2]] == feat2].TrialNumber
        min_not_pref = np.min((len(chose_feat_1), len(chose_feat_2)))

        res.append({"pair": row.pair, "session": sess, "min_pref": min_pref, "min_not_pref": min_not_pref})
    return pd.DataFrame(res)
        
min_trials = pd.concat(good_pairs.apply(min_trials_per_session, axis=1).values)



In [34]:
min_trials["pref more than not pref"] = min_trials["min_pref"] > min_trials["min_not_pref"]

In [75]:
min_trials

Unnamed: 0,pair,session,min_pref,min_not_pref
0,"[CIRCLE, SQUARE]",20180709,57,60
1,"[CIRCLE, SQUARE]",20180802,75,119
2,"[CIRCLE, SQUARE]",20180803,41,110
3,"[CIRCLE, SQUARE]",20180806,74,76
4,"[CIRCLE, SQUARE]",20180808,43,89
...,...,...,...,...
2,"[YELLOW, RIPPLE]",20180912,87,66
3,"[YELLOW, RIPPLE]",20180921,52,51
4,"[YELLOW, RIPPLE]",20181005,49,91
5,"[YELLOW, RIPPLE]",20181009,53,64
