# Spectral feature modeling

1. Includes GLM for spectral features

2. @ bottom of notebook:
        - filter dataframe for non-responses and zero contrasts
        - group dataframe by brain area & contrast and get mean of each feature

In [15]:
import numpy as np
from statsmodels.formula.api import glm
import pandas as pd

import matplotlib.pyplot as plt

In [4]:
path = "/Users/sydneysmith/Downloads/NMA_features.csv" #change path to where you've downloaded this csv from github

In [22]:
regions = ["vis ctx", "thal", "hipp", "other ctx", "midbrain", "basal ganglia", "cortical subplate", "other"]
brain_groups = [["VISa", "VISam", "VISl", "VISp", "VISpm", "VISrl"], # visual cortex
                ["CL", "LD", "LGd", "LH", "LP", "MD", "MG", "PO", "POL", "PT", "RT", "SPF", "TH", "VAL", "VPL", "VPM"], # thalamus
                ["CA", "CA1", "CA2", "CA3", "DG", "SUB", "POST"], # hippocampal
                ["ACA", "AUD", "COA", "DP", "ILA", "MOp", "MOs", "OLF", "ORB", "ORBm", "PIR", "PL", "SSp", "SSs", "RSP"," TT"], # non-visual cortex
                ["APN", "IC", "MB", "MRN", "NB", "PAG", "RN", "SCs", "SCm", "SCig", "SCsg", "ZI"], # midbrain
                ["ACB", "CP", "GPe", "LS", "LSc", "LSr", "MS", "OT", "SNr", "SI"], # basal ganglia 
                ["BLA", "BMA", "EP", "EPd", "MEA"] # cortical subplate
                ]

In [9]:
df = pd.read_csv(path)

train = df.sample(frac = 0.3, random_state=2) #separated into training sample

In [42]:
features = list(df.columns[11:]) # Spectral features in dataframe

### GLM with data from all brain areas & select brain areas

In [21]:
formula = 'contrast_diff ~ exponent + offset + theta_cf + theta_pow + theta_band + beta_cf + beta_pow + beta_band + gamma_cf + gamma_pow + gamma_band'

lm = glm(formula, train).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,9203.0
Model:,GLM,Df Residuals:,9191.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.32782
Method:,IRLS,Log-Likelihood:,-7920.5
Date:,"Wed, 21 Jul 2021",Deviance:,3013.0
Time:,16:21:28,Pearson chi2:,3010.0
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0901,0.068,1.332,0.183,-0.043,0.223
exponent,-0.0986,0.026,-3.738,0.000,-0.150,-0.047
offset,0.0547,0.019,2.938,0.003,0.018,0.091
theta_cf,-0.0135,0.004,-3.069,0.002,-0.022,-0.005
theta_pow,-0.0203,0.019,-1.044,0.296,-0.058,0.018
theta_band,0.0004,8.96e-05,3.931,0.000,0.000,0.001
beta_cf,4.731e-05,0.001,0.037,0.970,-0.002,0.003
beta_pow,0.0246,0.026,0.938,0.348,-0.027,0.076
beta_band,-0.0002,0.000,-1.176,0.240,-0.000,0.000


In [29]:
# visual cortex --> gamma oscillatiory power

lm = glm(formula, train[train['brain_area'].isin(brain_groups[0])]).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,1054.0
Model:,GLM,Df Residuals:,1042.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.31907
Method:,IRLS,Log-Likelihood:,-887.51
Date:,"Wed, 21 Jul 2021",Deviance:,332.47
Time:,16:28:58,Pearson chi2:,332.0
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1962,0.219,0.894,0.371,-0.234,0.626
exponent,-0.1375,0.096,-1.431,0.152,-0.326,0.051
offset,0.0206,0.066,0.312,0.755,-0.109,0.150
theta_cf,-0.0329,0.013,-2.455,0.014,-0.059,-0.007
theta_pow,-0.1160,0.054,-2.157,0.031,-0.221,-0.011
theta_band,0.0017,0.001,2.069,0.039,9e-05,0.003
beta_cf,-0.0005,0.004,-0.116,0.907,-0.008,0.007
beta_pow,-0.1224,0.075,-1.633,0.103,-0.269,0.025
beta_band,0.0007,0.001,0.688,0.492,-0.001,0.003


In [32]:
# thalamus

lm = glm(formula, train[train['brain_area'].isin(brain_groups[1])]).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,1778.0
Model:,GLM,Df Residuals:,1766.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.33685
Method:,IRLS,Log-Likelihood:,-1549.5
Date:,"Wed, 21 Jul 2021",Deviance:,594.88
Time:,16:30:24,Pearson chi2:,595.0
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1001,0.160,0.627,0.531,-0.213,0.413
exponent,-0.0839,0.113,-0.744,0.457,-0.305,0.137
offset,0.0483,0.080,0.607,0.544,-0.108,0.204
theta_cf,-0.0198,0.011,-1.783,0.075,-0.042,0.002
theta_pow,0.0281,0.047,0.598,0.550,-0.064,0.120
theta_band,0.0007,0.000,2.394,0.017,0.000,0.001
beta_cf,0.0001,0.003,0.044,0.965,-0.006,0.006
beta_pow,-0.0398,0.067,-0.595,0.552,-0.171,0.091
beta_band,0.0004,0.001,0.669,0.503,-0.001,0.001


In [34]:
# hippocamal formation

lm = glm(formula, train[train['brain_area'].isin(brain_groups[2])]).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,1971.0
Model:,GLM,Df Residuals:,1959.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.32412
Method:,IRLS,Log-Likelihood:,-1680.4
Date:,"Wed, 21 Jul 2021",Deviance:,634.95
Time:,16:31:09,Pearson chi2:,635.0
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1350,0.143,0.947,0.344,-0.144,0.414
exponent,-0.1623,0.073,-2.238,0.025,-0.304,-0.020
offset,0.1245,0.051,2.418,0.016,0.024,0.225
theta_cf,-0.0160,0.010,-1.574,0.115,-0.036,0.004
theta_pow,0.0121,0.042,0.287,0.774,-0.070,0.094
theta_band,3.328e-05,0.000,0.308,0.758,-0.000,0.000
beta_cf,-0.0011,0.003,-0.399,0.690,-0.006,0.004
beta_pow,0.0074,0.059,0.125,0.901,-0.108,0.123
beta_band,-0.0001,0.000,-0.686,0.493,-0.000,0.000


In [57]:
# non-visual cortex (currently ORBm, can use brain_groups[3] for general non-visual cortex

lm = glm(formula, train[train['brain_area']=='ORBm']).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,36.0
Model:,GLM,Df Residuals:,24.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.27226
Method:,IRLS,Log-Likelihood:,-20.365
Date:,"Wed, 21 Jul 2021",Deviance:,6.5342
Time:,17:17:20,Pearson chi2:,6.53
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.5273,1.481,-0.356,0.722,-3.431,2.376
exponent,2.9652,1.607,1.845,0.065,-0.185,6.115
offset,-2.3971,1.302,-1.841,0.066,-4.949,0.154
theta_cf,0.0361,0.100,0.361,0.718,-0.160,0.232
theta_pow,-0.2607,0.623,-0.419,0.675,-1.481,0.960
theta_band,0.0053,0.010,0.515,0.607,-0.015,0.025
beta_cf,0.0332,0.022,1.537,0.124,-0.009,0.075
beta_pow,-1.0109,0.688,-1.470,0.142,-2.359,0.337
beta_band,-0.0038,0.015,-0.255,0.799,-0.033,0.026


In [36]:
# midbrain --> THETA BAND

lm = glm(formula, train[train['brain_area'].isin(brain_groups[4])]).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,1456.0
Model:,GLM,Df Residuals:,1444.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.3379
Method:,IRLS,Log-Likelihood:,-1270.1
Date:,"Wed, 21 Jul 2021",Deviance:,487.93
Time:,16:32:58,Pearson chi2:,488.0
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.2349,0.179,1.315,0.188,-0.115,0.585
exponent,-0.1511,0.111,-1.363,0.173,-0.368,0.066
offset,0.0714,0.078,0.915,0.360,-0.082,0.224
theta_cf,0.0028,0.012,0.239,0.811,-0.021,0.026
theta_pow,-0.1131,0.053,-2.124,0.034,-0.217,-0.009
theta_band,0.0013,0.000,3.648,0.000,0.001,0.002
beta_cf,-0.0011,0.003,-0.351,0.725,-0.007,0.005
beta_pow,0.1073,0.069,1.549,0.121,-0.028,0.243
beta_band,-0.0010,0.001,-1.895,0.058,-0.002,3.39e-05


In [37]:
# basal ganglia --> theta_band

lm = glm(formula, train[train['brain_area'].isin(brain_groups[5])]).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,710.0
Model:,GLM,Df Residuals:,698.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.31862
Method:,IRLS,Log-Likelihood:,-595.36
Date:,"Wed, 21 Jul 2021",Deviance:,222.39
Time:,16:35:05,Pearson chi2:,222.0
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.1144,0.253,-0.453,0.651,-0.610,0.381
exponent,0.0431,0.141,0.306,0.759,-0.232,0.319
offset,-0.0797,0.097,-0.819,0.413,-0.270,0.111
theta_cf,0.0066,0.015,0.431,0.666,-0.023,0.036
theta_pow,0.0369,0.070,0.528,0.597,-0.100,0.174
theta_band,0.0018,0.001,2.849,0.004,0.001,0.003
beta_cf,0.0055,0.005,1.193,0.233,-0.004,0.014
beta_pow,-0.0397,0.098,-0.406,0.685,-0.231,0.152
beta_band,-0.0007,0.001,-1.018,0.309,-0.002,0.001


In [38]:
# basal ganglia 

lm = glm(formula, train[train['brain_area'].isin(brain_groups[6])]).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,134.0
Model:,GLM,Df Residuals:,122.0
Model Family:,Gaussian,Df Model:,11.0
Link Function:,identity,Scale:,0.32246
Method:,IRLS,Log-Likelihood:,-108.02
Date:,"Wed, 21 Jul 2021",Deviance:,39.34
Time:,16:35:40,Pearson chi2:,39.3
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.0655,0.564,-1.888,0.059,-2.171,0.040
exponent,-1.0507,0.480,-2.188,0.029,-1.992,-0.109
offset,0.8668,0.329,2.636,0.008,0.222,1.511
theta_cf,-0.0221,0.039,-0.562,0.574,-0.099,0.055
theta_pow,0.1891,0.158,1.198,0.231,-0.120,0.499
theta_band,-5.216e-05,0.001,-0.038,0.970,-0.003,0.003
beta_cf,0.0116,0.010,1.166,0.244,-0.008,0.031
beta_pow,0.6172,0.262,2.352,0.019,0.103,1.132
beta_band,-0.0019,0.002,-0.887,0.375,-0.006,0.002


In [17]:
formula = 'contrast_diff ~ exponent + offset + theta_cf + theta_band'
lm = glm(formula, train).fit()
lm.summary()

0,1,2,3
Dep. Variable:,contrast_diff,No. Observations:,16168.0
Model:,GLM,Df Residuals:,16163.0
Model Family:,Gaussian,Df Model:,4.0
Link Function:,identity,Scale:,0.32758
Method:,IRLS,Log-Likelihood:,-13917.0
Date:,"Wed, 21 Jul 2021",Deviance:,5294.7
Time:,16:15:34,Pearson chi2:,5290.0
No. Iterations:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0803,0.021,3.828,0.000,0.039,0.121
exponent,-0.0604,0.015,-4.042,0.000,-0.090,-0.031
offset,0.0361,0.011,3.247,0.001,0.014,0.058
theta_cf,-0.0094,0.003,-2.849,0.004,-0.016,-0.003
theta_band,0.0002,5.16e-05,3.512,0.000,8.01e-05,0.000


## Filtering and sorting dataframe

In [77]:
df_filtered = df[(df['response']!=0) | # filter out non-response
                 (df['contrast_right'] !=0) | # filter out where contrast right is 0
                  (df['contrast_left']) !=0] # filter out where contrast left is 0

In [78]:
df_filtered

Unnamed: 0.1,Unnamed: 0,recording,mouse_name,shank,brain_area,trial,contrast_left,contrast_right,contrast_diff,response,...,offset,theta_cf,theta_pow,theta_band,beta_cf,beta_pow,beta_band,gamma_cf,gamma_pow,gamma_band
0,0,0.0,Cori,0.0,ACA,0.0,1.0,0.00,1.00,1.0,...,2.664121,,,9.831546,,,10.620027,36.095439,2.014865,18.387417
1,1,0.0,Cori,0.0,ACA,1.0,0.0,0.50,-0.50,-1.0,...,2.144954,,,11.997439,19.000000,0.938180,15.890682,45.826713,1.538463,10.291032
2,2,0.0,Cori,0.0,ACA,2.0,1.0,0.50,0.50,1.0,...,1.446829,3.226824,1.225611,6.192864,21.057067,1.104569,9.401584,38.460663,0.924613,6.324518
4,4,0.0,Cori,0.0,ACA,4.0,0.5,1.00,-0.50,1.0,...,1.212975,,,10.501702,26.125270,1.304335,17.662793,45.406778,1.445720,10.705526
5,5,0.0,Cori,0.0,ACA,5.0,0.0,0.00,0.00,1.0,...,1.430677,,,9.777060,26.699767,1.203111,8.272372,44.010933,1.428439,4.644057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90556,90556,38.0,Theiler,9.0,PL,338.0,1.0,0.25,0.75,0.0,...,2.153136,7.188424,0.904128,23.539290,,,23.824528,47.881411,1.412883,30.299521
90557,90557,38.0,Theiler,9.0,PL,339.0,1.0,0.25,0.75,-1.0,...,1.057102,4.264742,1.366179,3.063958,20.245277,1.550889,22.908508,43.442056,1.114572,14.479311
90558,90558,38.0,Theiler,9.0,PL,340.0,1.0,0.25,0.75,0.0,...,0.577381,5.999716,1.801409,15.579604,,,30.962327,37.766152,0.713209,35.356546
90559,90559,38.0,Theiler,9.0,PL,341.0,1.0,0.25,0.75,0.0,...,2.013084,,,14.942919,24.611828,1.344539,64.787653,38.210336,1.433152,20.251010


In [79]:
levels = df_filtered.groupby(['brain_area', 'contrast_diff'])[features].mean()

In [80]:
levels

Unnamed: 0_level_0,Unnamed: 1_level_0,exponent,offset,theta_cf,theta_pow,theta_band,beta_cf,beta_pow,beta_band,gamma_cf,gamma_pow,gamma_band
brain_area,contrast_diff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ACA,-1.00,1.216330,1.056786,5.407010,1.289229,16.989041,22.016144,1.060558,20.522425,41.448126,1.237905,14.454372
ACA,-0.75,1.220387,1.116396,5.744760,1.283396,25.822874,21.906677,1.166425,27.829805,41.192293,1.232289,15.843587
ACA,-0.50,1.178345,1.051979,5.493439,1.321729,24.122764,21.705525,1.138546,27.413036,40.890386,1.243761,15.377588
ACA,-0.25,1.073311,0.961111,5.464462,1.328346,28.152664,21.639636,1.139015,31.930410,40.674300,1.228600,18.332627
ACA,0.00,1.088913,0.860372,5.453677,1.269841,21.215460,21.502774,1.126781,22.917578,40.566432,1.216266,13.920563
...,...,...,...,...,...,...,...,...,...,...,...,...
ZI,0.00,0.860253,0.752829,6.180982,1.304943,42.335438,20.754728,1.112447,42.846231,41.030641,1.177505,22.674343
ZI,0.25,1.017528,1.013117,6.250893,1.280374,75.557336,20.159004,1.223126,68.096565,40.805702,1.189747,23.734947
ZI,0.50,1.003999,1.093829,6.153682,1.155866,76.076520,19.794035,1.193146,78.302478,40.232720,1.166816,28.608411
ZI,0.75,0.980268,0.987551,6.223347,1.243536,74.795719,21.471125,1.137117,65.917287,40.165283,1.231228,27.459149
