### Another shot at dimensionality reduction techniques
Want to try PCA again with the following features: 
- Try on either HC or OFC only cells, small number (19 in HC, 18 in OFC)
- Condition on one selected feature at a time
- Group trials into 3 groups: 
  - A: high feature val, high confidence
  - B: low feature val, high confidence
  - C: low feature val, low confidence
Also, will want to try: 
- 50ms time bins, smoothed with 50ms std Gaussian

### Load Data, Imports

In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
import utils.pseudo_utils as pseudo_utils
import utils.pseudo_classifier_utils as pseudo_classifier_utils
import utils.behavioral_utils as behavioral_utils
from utils.session_data import SessionData
import utils.io_utils as io_utils
from utils.constants import *
import json

from spike_tools import (
    general as spike_general,
    analysis as spike_analysis,
)

import matplotlib.pyplot as plt
import matplotlib

In [2]:
# the output directory to store the data
OUTPUT_DIR = "/data/patrick_res/pseudo"
# path to a dataframe of sessions to analyze
# SESSIONS_PATH = "/data/patrick_scratch/multi_sess/valid_sessions.pickle"
SESSIONS_PATH = "/data/patrick_res/sessions/valid_sessions_rpe.pickle"
# path for each session, specifying behavior
SESS_BEHAVIOR_PATH = "/data/rawdata/sub-SA/sess-{sess_name}/behavior/sub-SA_sess-{sess_name}_object_features.csv"
# path for each session, for spikes that have been pre-aligned to event time and binned. 
SESS_SPIKES_PATH = "/data/patrick_res/firing_rates/{sess_name}_firing_rates_{pre_interval}_{event}_{post_interval}_{interval_size}_bins_1_smooth.pickle"

FEATURE_DIMS = ["Color", "Shape", "Pattern"]

### Per session, label trials
Need confidence values, as well as feature values

In [3]:
def get_labels_for_session(session, feat):
    behavior_path = SESS_BEHAVIOR_PATH.format(sess_name=session)

    beh = pd.read_csv(behavior_path)
    valid_beh = behavioral_utils.get_valid_trials(beh)
    feature_selections = behavioral_utils.get_selection_features(valid_beh)
    valid_beh_merged = pd.merge(valid_beh, feature_selections, on="TrialNumber", how="inner")
    feat_dim = FEATURE_TO_DIM[feat]
    valid_beh_merged = valid_beh_merged[valid_beh_merged[feat_dim] == feat]
    valid_beh_vals = behavioral_utils.get_feature_values_per_session(session, valid_beh_merged)
    valid_beh_vals_conf = behavioral_utils.get_rpes_per_session(session, valid_beh_vals)
    med_conf = np.median(valid_beh_vals_conf["Prob_FE"].to_numpy())
    def assign_conf(row, med):
        row["Conf"] = "high" if row["Prob_FE"] > med else "low"
        return row
    valid_beh_vals_conf = valid_beh_vals_conf.apply(lambda row: assign_conf(row, med_conf), axis=1)
    valid_beh_vals_conf["MaxFeatMatches"] = valid_beh_vals_conf.MaxFeat == feat
    valid_beh_vals_conf["Session"] = session
    return valid_beh_vals_conf

In [4]:
feature = "CYAN"
valid_sessions = pd.read_pickle(SESSIONS_PATH)
res = pd.concat(valid_sessions.apply(lambda row: get_labels_for_session(row.session_name, feature), axis=1).values)

### Preprocess spike data: 
Want a spikes table that has columns: 
- trial
- timestep
- condition
- unit ID
- spikes

In [None]:
res.groupby(["Session", "Conf", "MaxFeatMatches"]).count()[:50]

In [7]:
sess_res = res[res.Session == "20180802"]

In [14]:
balanced = behavioral_utils.balance_trials_by_condition(sess_res, ["MaxFeatMatches", "Conf"], 10)

In [16]:
balanced.groupby(["MaxFeatMatches", "Conf"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,TrialNumber,BlockNumber,TrialAfterRuleChange,TaskInterrupt,ConditionNumber,Response,ItemChosen,TrialType,CurrentRule,LastRule,...,MaxFeat,trial_y,fb,Prob_FE,Prob_FD,Prob_FRL,RPE_FE,RPE_FD,RPE_FRL,Session
MaxFeatMatches,Conf,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
False,high,94,94,94,0,94,94,94,94,94,82,...,94,94,94,94,94,94,94,94,94,94
False,low,94,94,94,1,94,94,94,94,94,86,...,94,94,94,94,94,94,94,94,94,94
True,high,94,94,94,0,94,94,94,94,94,86,...,94,94,94,94,94,94,94,94,94,94
True,low,94,94,94,0,94,94,94,94,94,93,...,94,94,94,94,94,94,94,94,94,94


In [18]:
conditions = ["MaxFeatMatches", "Conf"]
sess_mins = res.groupby("Session").apply(lambda group: behavioral_utils.get_min_num_trials_by_condition(group, conditions))
np.min(sess_mins)

5

### The minimum number of trials per condition per session for each feature

In [19]:
conditions = ["MaxFeatMatches", "Conf"]
for feature in FEATURES:
    res = pd.concat(valid_sessions.apply(lambda row: get_labels_for_session(row.session_name, feature), axis=1).values)
    sess_mins = res.groupby("Session").apply(lambda group: behavioral_utils.get_min_num_trials_by_condition(group, conditions))
    sess_min = np.min(sess_mins)
    print(f"{feature}: {sess_min}")

CIRCLE: 1
SQUARE: 6
STAR: 11
TRIANGLE: 6
CYAN: 5
GREEN: 1
MAGENTA: 7
YELLOW: 5
ESCHER: 1
POLKADOT: 9
RIPPLE: 10
SWIRL: 1


### The number of sessions with at least N trials per condition for each feature

In [20]:
conditions = ["MaxFeatMatches", "Conf"]
min_num_trials = 50
for feature in FEATURES:
    res = pd.concat(valid_sessions.apply(lambda row: get_labels_for_session(row.session_name, feature), axis=1).values)
    sess_valid = res.groupby("Session").apply(lambda group: behavioral_utils.validate_enough_trials_by_condition(group, conditions, min_num_trials))
    valids = sess_valid[sess_valid]
    print(f"{feature}: {len(valids)}")

CIRCLE: 6
SQUARE: 12
STAR: 3
TRIANGLE: 6
CYAN: 7
GREEN: 6
MAGENTA: 5
YELLOW: 14
ESCHER: 6
POLKADOT: 10
RIPPLE: 4
SWIRL: 5
