In [1]:
#import
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from scipy.special import comb
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import make_scorer
from itertools import combinations 
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from sklearn.model_selection import train_test_split, cross_val_score
import random
from sklearn.model_selection import KFold

In [2]:
# Load dataset
file_names = ['C_1_binned.pkl',
              'C_2_binned.pkl',
              'H_1_binned.pkl',
              'H_2_binned.pkl']

def load_dataset(filei):
    dataset = pd.read_pickle(file_names[filei])
    return dataset
    
def defineXy(dataset):
    y = dataset["directions_x_passive"]
    X = pd.DataFrame(dataset.drop(["direction","passive","directions_x_passive"], axis = 1))
    return y, X

In [3]:
# Noise shuffling function
def shuffle_noise_correlation(df,response="directions_x_passive"):
    response_vals = df[response]
    cats = np.unique(response_vals)
    predictors = df.drop(response,axis=1)
    x = predictors.to_numpy()
    for categ in cats:
        categ_idx = np.where(response_vals == categ)[0]
        for predictori in range(predictors.shape[1]):
            x[categ_idx,predictori] = x[np.random.permutation(categ_idx),predictori]
    df_shuffle = pd.DataFrame(x) 
    df_shuffle[response] =  response_vals    
    return df_shuffle

In [4]:
# Build model
kernel = ['linear', 'rbf']
def model(dataset,kerneli):
    y, X = defineXy(dataset)
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.5, random_state=0)
    #Set the parameters for tuning
    C_range = np.logspace(-3, 3, 7)
    gamma_range = np.logspace(-5, 1, 7)
    if kerneli == 0:
        parameters = dict(C=C_range)
    else:
        parameters = dict(gamma=gamma_range, C=C_range)
    svc = SVC(decision_function_shape='ovo', kernel=kernel[kerneli])
    cv = StratifiedKFold(n_splits=5, random_state=20201208, shuffle=True)
    clf = make_pipeline(StandardScaler(), 
          GridSearchCV(svc, parameters,cv=cv,refit=True))
    clf.fit(Xtrain, ytrain)
    Classification = clf.predict(Xtest)
    params = clf[1].best_params_
    return Classification, params

def condition(filei,kerneli,shuffle):
    dataset = load_dataset(filei)
    if shuffle == 0:
        Classification, params = model(dataset,kerneli)
    else:
        dataset = dataset.drop(["direction","passive"],axis=1) # drop the two columns
        dataset_shuffle = shuffle_noise_correlation(dataset)
        dataset_shuffle['direction'] = np.zeros(dataset.shape[0]) # add them back in :)
        dataset_shuffle['passive'] = np.zeros(dataset.shape[0])
        Classification, params = model(dataset_shuffle,kerneli)
    return Classification, params

__Example:__

filei: which monkey

kerneli: 0 is linear, 1 is rbf

shuffle: 0 is unshuffle, 1 is shuffle



In [5]:
# Monkey 1, Linear, Unshuffle
Classification_US_l, params_US_l = condition(0,0,0)
# Monkey 1, Non-Linear, Unshuffle
Classification_US_nl, params_US_nl = condition(0,1,0)
# Monkey 1, Linear, Shuffle
Classification_S_l, params_S_l = condition(0,0,1)
# Monkey 1, Non-Linear, Shuffle
Classification_S_nl,  params_S_nl = condition(0,1,1)

In [6]:
print(params_US_l, params_US_nl, params_S_l, params_S_nl)

{'C': 0.1} {'C': 10.0, 'gamma': 0.001} {'C': 0.01} {'C': 10.0, 'gamma': 0.001}


__Monkey 1 Results__

In [7]:
df = load_dataset(0)
y = df["directions_x_passive"]
X = pd.DataFrame(df.drop(["directions_x_passive"], axis = 1))
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.5, random_state=0)
df = pd.DataFrame(Xtest)
df['Labels'] = ytest

# Monkey 1, Linear, Unshuffle
Classification_US_l, params_US_l = condition(0,0,0)
# Monkey 1, Non-Linear, Unshuffle
Classification_US_nl, params_US_nl = condition(0,1,0)
# Monkey 1, Linear, Shuffle
Classification_S_l, params_S_l = condition(0,0,1)
# Monkey 1, Non-Linear, Shuffle
Classification_S_nl,  params_S_nl = condition(0,1,1)

df['Classification_US_l'] = Classification_US_l
df['Classification_US_nl'] = Classification_US_nl
df['Classification_S_l'] = Classification_S_l
df['Classification_S_nl'] = Classification_S_nl
df.to_csv('C_1_Classification.csv')

print(params_US_l, params_US_nl, params_S_l, params_S_nl)

{'C': 0.1} {'C': 10.0, 'gamma': 0.001} {'C': 0.01} {'C': 10.0, 'gamma': 0.001}


__Monkey 2 Results__

In [8]:
df = load_dataset(1)
y = df["directions_x_passive"]
X = pd.DataFrame(df.drop(["directions_x_passive"], axis = 1))

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.5, random_state=0)
df = pd.DataFrame(Xtest)
df['Labels'] = ytest

Classification_US_l, params_US_l = condition(1,0,0)
Classification_US_nl, params_US_nl = condition(1,1,0)
Classification_S_l, params_S_l = condition(1,0,1)
Classification_S_nl, params_S_nl = condition(1,1,1)

df['Classification_US_l'] = Classification_US_l
df['Classification_US_nl'] = Classification_US_nl
df['Classification_S_l'] = Classification_S_l
df['Classification_S_nl'] = Classification_S_nl
df.to_csv('C_2_Classification.csv')

print(params_US_l, params_US_nl, params_S_l, params_S_nl)

{'C': 0.01} {'C': 10.0, 'gamma': 0.001} {'C': 0.01} {'C': 1.0, 'gamma': 0.01}


__Monkey 3 Results__

In [9]:
df = load_dataset(2)
y = df["directions_x_passive"]
X = pd.DataFrame(df.drop(["directions_x_passive"], axis = 1))

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.5, random_state=0)
df = pd.DataFrame(Xtest)
df['Labels'] = ytest

Classification_US_l, params_US_l = condition(2,0,0)
Classification_US_nl, params_US_nl = condition(2,1,0)
Classification_S_l, params_S_l = condition(2,0,1)
Classification_S_nl,  params_S_nl = condition(2,1,1)

df['Classification_US_l'] = Classification_US_l
df['Classification_US_nl'] = Classification_US_nl
df['Classification_S_l'] = Classification_S_l
df['Classification_S_nl'] = Classification_S_nl
df.to_csv('H_1_Classification.csv')

print(params_US_l, params_US_nl, params_S_l, params_S_nl)

{'C': 0.1} {'C': 1000.0, 'gamma': 1e-05} {'C': 0.1} {'C': 1000.0, 'gamma': 0.0001}


__Monkey 4 Results__

In [10]:
df = load_dataset(3)
y = df["directions_x_passive"]
X = pd.DataFrame(df.drop(["directions_x_passive"], axis = 1))

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.5, random_state=0)
df = pd.DataFrame(Xtest)
df['Labels'] = ytest

Classification_US_l, params_US_l = condition(3,0,0)
Classification_US_nl, params_US_nl = condition(3,1,0)
Classification_S_l, params_S_l = condition(3,0,1)
Classification_S_nl,  params_S_nl = condition(3,1,1)

df['Classification_US_l'] = Classification_US_l
df['Classification_US_nl'] = Classification_US_nl
df['Classification_S_l'] = Classification_S_l
df['Classification_S_nl'] = Classification_S_nl
df.to_csv('H_2_Classification.csv')

print(params_US_l, params_US_nl, params_S_l, params_S_nl)

{'C': 0.01} {'C': 10.0, 'gamma': 0.001} {'C': 0.1} {'C': 10.0, 'gamma': 0.001}
