### Another shot at dimensionality reduction techniques
Want to try PCA again with the following features: 
- Try on either HC or OFC only cells, small number (19 in HC, 18 in OFC)
- Condition on one selected feature at a time
- Group trials into 3 groups: 
  - A: high feature val, high confidence
  - B: low feature val, high confidence
  - C: low feature val, low confidence
Also, will want to try: 
- 50ms time bins, smoothed with 50ms std Gaussian

### Load Data, Imports

In [6]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
import utils.pseudo_utils as pseudo_utils
import utils.pseudo_classifier_utils as pseudo_classifier_utils
import utils.behavioral_utils as behavioral_utils
from utils.session_data import SessionData
import utils.io_utils as io_utils
from utils.constants import *
import json

from spike_tools import (
    general as spike_general,
    analysis as spike_analysis,
)

import matplotlib.pyplot as plt
import matplotlib

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# the output directory to store the data
OUTPUT_DIR = "/data/patrick_res/pseudo"
# path to a dataframe of sessions to analyze
# SESSIONS_PATH = "/data/patrick_scratch/multi_sess/valid_sessions.pickle"
SESSIONS_PATH = "/data/patrick_res/sessions/valid_sessions_rpe.pickle"
# path for each session, specifying behavior
SESS_BEHAVIOR_PATH = "/data/rawdata/sub-SA/sess-{sess_name}/behavior/sub-SA_sess-{sess_name}_object_features.csv"
# path for each session, for spikes that have been pre-aligned to event time and binned. 
SESS_SPIKES_PATH = "/data/patrick_res/firing_rates/{sess_name}_firing_rates_{pre_interval}_{event}_{post_interval}_{interval_size}_bins_1_smooth.pickle"

FEATURE_DIMS = ["Color", "Shape", "Pattern"]

### Per session, label trials
Need confidence values, as well as feature values

In [8]:
def get_labels_for_session(session, feat):
    behavior_path = SESS_BEHAVIOR_PATH.format(sess_name=session)

    beh = pd.read_csv(behavior_path)
    valid_beh = behavioral_utils.get_valid_trials(beh)
    feature_selections = behavioral_utils.get_selection_features(valid_beh)
    valid_beh_merged = pd.merge(valid_beh, feature_selections, on="TrialNumber", how="inner")
    feat_dim = FEATURE_TO_DIM[feat]
    valid_beh_merged = valid_beh_merged[valid_beh_merged[feat_dim] == feat]
    valid_beh_vals = behavioral_utils.get_feature_values_per_session(session, valid_beh_merged)
    valid_beh_vals_conf = behavioral_utils.get_rpe_groups_per_session(session, valid_beh_vals)

    valid_beh_vals_conf["MaxFeatMatches"] = valid_beh_vals_conf.MaxFeat == feat
    valid_beh_vals_conf["Session"] = session
    return valid_beh_vals_conf

In [9]:
valid_sessions = pd.read_pickle(SESSIONS_PATH)

### The number of sessions with at least N trials per condition for each feature

In [10]:
conditions = ["MaxFeatMatches", "RPEGroup"]
min_num_trials = 20
print(f"at least {min_num_trials} trials: ")
for feature in FEATURES:
    res = pd.concat(valid_sessions.apply(lambda row: get_labels_for_session(row.session_name, feature), axis=1).values)
    # res = res[res.Shape.isin(["SQUARE", "TRIANGLE"])]
    res = res[res.Response == "Correct"]
    sess_valid = res.groupby("Session").apply(lambda group: behavioral_utils.validate_enough_trials_by_condition(group, conditions, min_num_trials))
    valids = sess_valid[sess_valid]
    print(f"{feature}: {len(valids)}")

at least 20 trials: 
CIRCLE: 12
SQUARE: 17
STAR: 11
TRIANGLE: 10
CYAN: 17
GREEN: 12
MAGENTA: 16
YELLOW: 19
ESCHER: 14
POLKADOT: 13
RIPPLE: 9
SWIRL: 15


### Number of sessions that match OFC or HC sessions w at least N trials per condition

In [11]:
hip_sessions = [
    '20180920', '20180918', '20180912', '20181008', '20181002',
    '20181005', '201807250001', '20180926', '20181004', 
    '20181009','20181010', '20180921', '20180925', '20180910'
]
ofc_sessions = [
    '20180709', '20180801', '201807250001', '20180802', '20180808',
    '20180705', '20180921', '20180712', '20180910'
]

In [12]:
conditions = ["MaxFeatMatches", "RPEGroup"]
min_num_trials = 20
for feature in FEATURES:
    res = pd.concat(valid_sessions.apply(lambda row: get_labels_for_session(row.session_name, feature), axis=1).values)
    # res = res[res.Shape.isin(["SQUARE", "TRIANGLE"])]
    res = res[res.Response == "Correct"]
    sess_valid = res.groupby("Session").apply(lambda group: behavioral_utils.validate_enough_trials_by_condition(group, conditions, min_num_trials))
    valids = sess_valid[sess_valid]
    hip_valids = valids[valids.index.isin(hip_sessions)]
    ofc_valids = valids[valids.index.isin(ofc_sessions)]
    print(f"{feature}: hip sessions {len(hip_valids)}, ofc sessions {len(ofc_valids)}")

CIRCLE: hip sessions 7, ofc sessions 3
SQUARE: hip sessions 6, ofc sessions 6
STAR: hip sessions 6, ofc sessions 3
TRIANGLE: hip sessions 3, ofc sessions 4
CYAN: hip sessions 9, ofc sessions 6
GREEN: hip sessions 4, ofc sessions 4
MAGENTA: hip sessions 8, ofc sessions 5
YELLOW: hip sessions 10, ofc sessions 7
ESCHER: hip sessions 9, ofc sessions 3
POLKADOT: hip sessions 5, ofc sessions 4
RIPPLE: hip sessions 6, ofc sessions 3
SWIRL: hip sessions 8, ofc sessions 6


In [13]:
valids[valids.index.isin(hip_sessions)]

Session
20180910    True
20180912    True
20180920    True
20180921    True
20180925    True
20181002    True
20181008    True
20181010    True
dtype: bool