In [7]:
import pandas as pd
import numpy as np
import math
from copy import deepcopy


#############################
# Import and combine all data
#############################


DATA_PATH = "/Users/mvonebers/HUBBS-Lab/data/"
#DATA_PATH = "/home/maggie/HUBBS-Lab/data/"

e4_data = pd.read_excel(DATA_PATH + "E4_TEST.xlsx")
change_data = pd.read_excel(DATA_PATH + "normalized_change.xlsx")
audio_data = pd.read_excel(DATA_PATH + "audio_TEST.xlsx")
demo_data = pd.read_csv(DATA_PATH + "Demographics Information.csv")


# Break apart the ID column into "person" and "trial"
def clean_id(data):
    data.insert(0, "person", [0] * data.shape[0])
    data.insert(1, "trial", [0] * data.shape[0])

    for i, row in data.iterrows():
        data.at[i, "person"] = int(data.at[i, "id"][7:])
        data.at[i, "trial"] = int(data.at[i, "id"][5])
    
    data = data.drop(columns=['id'])
    data = data.rename(columns={"person": "id"})
    return data

    
e4_data = clean_id(e4_data)
audio_data = clean_id(audio_data)    
    
all_data = pd.merge(e4_data, change_data, on='id')
all_data = audio_data.merge(all_data, how='right')


# Reorder survey data in order of most samples to least
columns = all_data.columns.to_list()
new_columns = deepcopy(columns)
new_columns[35] = columns[40]
new_columns[37] = columns[41] 
new_columns[38] = columns[37] 
new_columns[40] = columns[35]
new_columns[41] = columns[38]

all_data = all_data[new_columns]


# what does this do?
#demo_ids = demo_data['id'].to_list()
#
#for id_ in demo_ids:
#    if id_ not in slope_ids:
#        demo_data = demo_data[demo_data.id != id_]
        
all_data = all_data.merge(demo_data, how="right")

# The demographic data lists IDs that aren't present in the other data, so remove them
all_data = all_data[all_data.id != 16]
all_data = all_data[all_data.id != 27]
all_data = all_data[all_data.id != 38]
all_data = all_data[all_data.id != 43]
all_data = all_data[all_data.id != 46]
all_data = all_data[all_data.id != 49]
all_data = all_data[all_data.id != 53]
all_data = all_data[all_data.id != 58]
all_data = all_data[all_data.id != 65]
all_data = all_data[all_data.id != 66]

all_data.drop(['CAI Trait Small group Score', 'CAI Trait Dyadic Score', 'CAI Trait Public Speaking Score', 'STAI State Score', 'Brief fear of Negative Evaluation'], axis=1, inplace=True)
all_data

Unnamed: 0,id,trial,pcm_RMSenergy_sma_amean,pcm_fftMag_mfcc_sma[1]_amean,pcm_fftMag_mfcc_sma[2]_amean,pcm_fftMag_mfcc_sma[3]_amean,pcm_fftMag_mfcc_sma[4]_amean,pcm_fftMag_mfcc_sma[5]_amean,pcm_fftMag_mfcc_sma[6]_amean,pcm_fftMag_mfcc_sma[7]_amean,...,CAI Trait Full Score,STAI Trait Score,Age,Gender,Lang,college,presentation,ethnicity,presentation_3_months,highest_education
0,4,1.0,0.007879,0.956435,-13.176012,-4.782369,-5.508350,-6.742012,-13.622468,-4.198480,...,0.101695,-0.068182,3.0,1.0,2.0,1.0,2.0,1.0,0.0,3.0
1,4,2.0,0.006843,-0.399977,-12.222916,-8.252062,-2.672821,-3.437455,-14.641911,-2.807243,...,0.101695,-0.068182,3.0,1.0,2.0,1.0,2.0,1.0,0.0,3.0
2,4,3.0,0.003532,0.615522,-9.837350,-2.540836,-2.281998,-3.958653,-14.158829,1.465646,...,0.101695,-0.068182,3.0,1.0,2.0,1.0,2.0,1.0,0.0,3.0
3,4,4.0,0.004056,0.912469,-8.391797,-7.296996,-3.236593,-1.216842,-14.971951,-1.150219,...,0.101695,-0.068182,3.0,1.0,2.0,1.0,2.0,1.0,0.0,3.0
4,4,5.0,0.000788,-1.409059,-1.801911,-5.151633,-2.803752,-6.043437,-5.157262,-2.180702,...,0.101695,-0.068182,3.0,1.0,2.0,1.0,2.0,1.0,0.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,73,4.0,0.016747,-5.346863,-4.401036,-1.772596,-6.538305,5.228556,-17.409770,-3.816441,...,0.233333,0.033333,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0
148,73,5.0,0.019861,-6.544580,-0.529959,-4.315085,-7.491222,9.523307,-22.557555,-2.752085,...,0.233333,0.033333,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0
149,73,6.0,0.023864,-6.224142,-1.605599,-4.598936,-8.447270,9.679458,-22.852172,-0.745983,...,0.233333,0.033333,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0
150,73,7.0,0.019280,-6.070041,-1.986066,-4.606451,-5.151283,6.730979,-23.007388,-1.617074,...,0.233333,0.033333,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0


In [6]:
###############################################################
# Get slopes from linear regression of the 8 trials for each ID
###############################################################

from sklearn.linear_model import LinearRegression

def get_slopes(data, start, end):
    y0 = data['trial'].to_numpy(copy=True)

    slopes = pd.DataFrame(np.zeros((19, 46)), columns=data.columns)
    slopes = slopes.drop(["trial"], axis=1)

    for col in range(2, 34):
        x1 = data[data.columns[col]]
        y0 = list(range(start, end + 1))
        for row in range(19):
            x0 = x1[ (row * 8) + start - 1 : (row * 8) + end ].to_numpy()
            x = np.array([])
            y = np.array([])
            
            slopes.iloc[row, 0] = data.iloc[row * 8, 0]

            for i in range(len(x0)):  # remove NaN from data
                if not math.isnan(x0[i]) and not math.isnan(y0[i]):
                    x = np.append(x, x0[i])
                    y = np.append(y, y0[i])
            
            try:
                reg = LinearRegression().fit(y.reshape(-1,1),x)
                slopes.iloc[row, col - 1] = reg.coef_
            except:
                 slopes.iloc[row, col - 1] = 0
    
    for col in range(34, 46):
        for row in range(19):
            slopes.iloc[row, col - 1] = data.iloc[row * 8, col]
            
    # Want to preserve zeros in the demographic data, so temporarily boost it up one...
    for col in range(43, 46):
        for row in range(19):
            slopes.iloc[row, col - 1] += 1.0
                    
    slopes.replace(0, np.NaN, inplace=True)
    
    # Then bump it back down. (I know this is a dumb way to do this.)
    for col in range(43, 46):
        for row in range(19):
            slopes.iloc[row, col - 1] -= 1.0
    
    return slopes

all_slopes = get_slopes(all_data, 1, 8)
slope_ids = all_slopes['id'].to_list()
#slope_ids
all_slopes

for i, col in zip(range(len(all_slopes.columns.to_list())), all_slopes.columns.to_list()):
    print(i, col)

0 id
1 pcm_RMSenergy_sma_amean
2 pcm_fftMag_mfcc_sma[1]_amean
3 pcm_fftMag_mfcc_sma[2]_amean
4 pcm_fftMag_mfcc_sma[3]_amean
5 pcm_fftMag_mfcc_sma[4]_amean
6 pcm_fftMag_mfcc_sma[5]_amean
7 pcm_fftMag_mfcc_sma[6]_amean
8 pcm_fftMag_mfcc_sma[7]_amean
9 pcm_fftMag_mfcc_sma[8]_amean
10 pcm_fftMag_mfcc_sma[9]_amean
11 pcm_fftMag_mfcc_sma[10]_amean
12 pcm_fftMag_mfcc_sma[11]_amean
13 pcm_fftMag_mfcc_sma[12]_amean
14 pcm_zcr_sma_amean
15 voiceProb_sma_amean
16 F0_sma_amean
17 #pause
18 pause_frequency
19 pause_interval
20 mean
21 percent
22 jitterLocal_sma_amean
23 jitterDDP_sma_amean
24 shimmerLocal_sma_amean
25 EDA_PPT
26 HR_PPT
27 TEMP_PPT
28 BVP_PPT
29 ACC_PPT
30 IBI_PPT
31 EDA_FREQ_PPT
32 EDA_AMP_PPT
33 Brief fear of Negative Evaluation
34 CAI State Score
35 CAI Trait Full Score
36 STAI Trait Score
37 Age
38 Gender
39 Lang
40 college
41 presentation
42 ethnicity
43 presentation_3_months
44 highest_education


In [3]:
#from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold, cross_val_predict
from scipy.stats import pearsonr

y_columns = change_data.columns.to_list()[1:]

def get_combo_predictions(X0, slope_data):
    corrs = []
    ps = []
    for y_col, y_i in zip(y_columns, range(len(y_columns))):
        y0 = slope_data[y_col].to_numpy(copy=True)
        X = np.array([X0[0]])
        y = np.array(y0[0])
        
        for i in range(1,len(X0)):  # remove NaN from data
            is_nan = False
            for x in X0[i]:
                if math.isnan(x):
                    is_nan = True
                    break
            if not math.isnan(y0[i]) and not is_nan:
                X = np.append(X, [X0[i]], axis=0)
                y = np.append(y, y0[i])
            
        folds = min(10, len(X))
        model = LinearRegression()
        cv = KFold(folds, shuffle=True, random_state=42)
        predicted_vals0 = cross_val_predict(model, X, y, cv=cv)
        actual_vals0 = slope_data[y_col].to_numpy(copy=True)
        predicted_vals = []
        actual_vals = []
        

        for j in range(len(predicted_vals0)):
            if not math.isnan(predicted_vals0[j]) and not math.isnan(actual_vals0[j]):
                predicted_vals.append(predicted_vals0[j])
                actual_vals.append(actual_vals0[j])

        correlation, pval = pearsonr(predicted_vals, actual_vals)
        #to_print = str(correlation) + str(pval)
        #if pval < 0.15: 
        #    if not folds == 10:
        #        print("With # KFolds", folds)
        #    print("{0}:\t\t{1}\t\t{2}\t{3}\t\t{4}".format(y_col, correlation, pval, "Rows:", len(y)))
        corrs.append(correlation)
        ps.append(pval)
        
    return corrs, ps
      
def generate_combos(slope_data):
    group_c = []
    group_p = []
    group_titles = []
    
    group_titles.append('all demos')
    X0_demo = slope_data[slope_data.columns[37:44]].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_demo, slope_data)
    group_c.append(c)
    group_p.append(p)
    
    
    group_titles.append("bios + demos")
    columns = slope_data.columns[23:30].to_list() + slope_data.columns[37:44].to_list()
    X0 = slope_data[columns].to_numpy(copy=True)
    c, p = get_combo_predictions(X0, slope_data)
    group_c.append(c)
    group_p.append(p)
    
    group_titles.append('mfcc1-12 + demos')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[37:44].to_list()
    X0 = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('mfcc1-12')
    X0_mfcc = slope_data[slope_data.columns[2:14]].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_mfcc, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('pauses')
    X0_pauses = slope_data[slope_data.columns[17:20]].to_numpy(copy=True) # #pause, pause_frequency, pause_interval
    c, p = get_combo_predictions(X0_pauses, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('mfccs + pauses')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[17:20].to_list()
    X0_pauses = slope_data[columns].to_numpy(copy=True) # #pause, pause_frequency, pause_interval
    c, p = get_combo_predictions(X0_pauses, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('jitter, shimmer')
    X0_jitter = slope_data[slope_data.columns[22:24]].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_jitter, slope_data)
    group_c.append(c)
    group_p.append(p)
    
    group_titles.append('jitter, shimmer, pauses')
    columns = slope_data.columns[22:24].to_list() + slope_data.columns[17:20].to_list()
    X0_jitter = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_jitter, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('mfccs + jitter, shimmer')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[22:24].to_list()
    X0_jitter = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_jitter, slope_data)
    group_c.append(c)
    group_p.append(p)
    
    group_titles.append('mfccs + jitter shimmer + pauses')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[22:24].to_list() + slope_data.columns[17:20].to_list()
    X0_jitter = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_jitter, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append("all bio")
    X0_eda = slope_data[slope_data.columns[23:30]].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_eda, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('bio + mfcc')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[23:30].to_list()
    X0_bio_mfcc = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_bio_mfcc, slope_data)
    group_c.append(c)
    group_p.append(p)
    
    group_titles.append('all of em')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[23:30].to_list() + slope_data.columns[22:24].to_list() + slope_data.columns[17:20].to_list() + slope_data.columns[37:44].to_list()
    X0 = slope_data[columns].to_numpy(copy=True)
    c, p = get_combo_predictions(X0, slope_data)
    group_c.append(c)
    group_p.append(p)
    
    
    #return group_c, group_p, group_titles

    # All mfcc's combined with other attributes
    
        
    return group_c, group_p, group_titles

#group_c, group_p, group_titles = generate_combos(all_slopes)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

short_y_col = ["BFNE (19)", "CAI Dy (16)", "CAI F(18)", "CAI PS(17)", "CAI Sm(15)", "STAI T(17)", "CAI St(19)", "STAI St(18)"]
correlations = np.matrix(group_c)
pvalues = np.matrix(group_p)

correlations = np.round(correlations, decimals=2)
pvalues = np.round(pvalues, decimals=2)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20,85))

p = ax.matshow(correlations)
fig.colorbar(p, ax=ax, fraction=0.05, pad=0.04)
ax.set_xticklabels(labels=[''] + short_y_col)
ax.set_yticklabels(labels=group_titles)
plt.yticks(np.arange(0, len(group_titles), 1.0))
for tick in ax.get_xticklabels():
    tick.set_rotation(30)  
    
for i in range(len(short_y_col)):
    for j in range(len(group_titles)):
        text = ax.text(i, j, correlations[j, i], ha="center", va="center", color="w")
    
plt.title('LinReg Correlations')
fig.tight_layout()
plt.savefig(DATA_PATH + 'correlatons.png')
plt.show()

In [None]:
fig, ax2 = plt.subplots(nrows=1, ncols=1, figsize=(20,85))
p = ax2.matshow(pvalues)
fig.colorbar(p, ax=ax2, fraction=0.05, pad=0.04)
ax2.set_xticklabels(labels=[''] + short_y_col)  
ax2.set_yticklabels(labels=group_titles)
plt.yticks(np.arange(0, len(group_titles), 1.0))
for tick in ax2.get_xticklabels():
    tick.set_rotation(30)
    
for i in range(len(short_y_col)):
    for j in range(len(group_titles)):
        text = ax2.text(i, j, pvalues[j, i], ha="center", va="center", color="w")

plt.title('LinReg Pvalues')
fig.tight_layout()
plt.savefig(DATA_PATH + 'pvalues.png')
plt.show()

In [4]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

def generate_graphs(c, p, titles, specifier):
    short_y_col = ["BFNE (19)", "CAI St(19)", "CAI F(18)" , "STAI St(18)", "CAI PS(17)", "STAI T(17)", "CAI Dy (16)", "CAI Sm(15)"]
    correlations = np.matrix(c)
    pvalues = np.matrix(p)

    correlations = np.round(correlations, decimals=2)
    pvalues = np.round(pvalues, decimals=2)
    
    data = [correlations, pvalues]
    graph_titles = ["correlations_" + specifier + ".png", "pvalues_" + specifier + ".png"]
    
    for k in range(2):
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20,85))
        
        if k == 0:
            p = ax.pcolor(data[k], vmin=-0.5, vmax=0.8)
        else:
            p = ax.pcolor(data[k], vmin = 0.0, vmax = 1.0)
        #p = ax.matshow(data[k])
        fig.colorbar(p, ax=ax, fraction=0.05, pad=0.04)
        ax.set_xticklabels(labels=short_y_col)
        ax.set_yticklabels(labels=titles)
        plt.yticks(np.arange(0, len(titles), 1.0))
        for tick in ax.get_xticklabels():
            tick.set_rotation(30)  

        for i in range(len(short_y_col)):
            for j in range(len(titles)):
                text = ax.text(i + 0.5, j + 0.5, data[k][j, i], ha="center", va="center", color="w")

        fig.tight_layout()
        plt.savefig(DATA_PATH + graph_titles[k])
        plt.close()
        

"""#slopes_3 = get_slopes(all_data, 1, 3)
#c3, p3, titles3 = generate_combos(slopes_3)
#generate_graphs(c3, p3, titles3, '3')

#slopes_3 = get_slopes(all_data, 5)
#c3, p3, titles3 = generate_combos(slopes_3)
#generate_graphs(c3, p3, titles3, '5')

slopes_all = get_slopes(all_data, 1, 8)
c, p, titles = generate_combos(slopes_all)
generate_graphs(c, p, titles, 'all')

first_4 = get_slopes(all_data, 1, 4)
c4, p4, titles4 = generate_combos(first_4)
generate_graphs(c4, p4, titles4, 'first4')

middle_4 = get_slopes(all_data, 3, 6)
c4, p4, titles4 = generate_combos(middle_4)
generate_graphs(c4, p4, titles4, 'middle4')

last_4 = get_slopes(all_data, 5, 8)
c4, p4, titles4 = generate_combos(last_4)
generate_graphs(c4, p4, titles4, 'last4')

for i in range(1, 5):
    title = str(2*i - 1) + '-' + str(2*i)
    pair = get_slopes(all_data, 2*i - 1, 2*i)
    c2, p2, titles2 = generate_combos(pair)
    generate_graphs(c2, p2, titles2, title)"""

"#slopes_3 = get_slopes(all_data, 1, 3)\n#c3, p3, titles3 = generate_combos(slopes_3)\n#generate_graphs(c3, p3, titles3, '3')\n\n#slopes_3 = get_slopes(all_data, 5)\n#c3, p3, titles3 = generate_combos(slopes_3)\n#generate_graphs(c3, p3, titles3, '5')\n\nslopes_all = get_slopes(all_data, 1, 8)\nc, p, titles = generate_combos(slopes_all)\ngenerate_graphs(c, p, titles, 'all')\n\nfirst_4 = get_slopes(all_data, 1, 4)\nc4, p4, titles4 = generate_combos(first_4)\ngenerate_graphs(c4, p4, titles4, 'first4')\n\nmiddle_4 = get_slopes(all_data, 3, 6)\nc4, p4, titles4 = generate_combos(middle_4)\ngenerate_graphs(c4, p4, titles4, 'middle4')\n\nlast_4 = get_slopes(all_data, 5, 8)\nc4, p4, titles4 = generate_combos(last_4)\ngenerate_graphs(c4, p4, titles4, 'last4')\n\nfor i in range(1, 5):\n    title = str(2*i - 1) + '-' + str(2*i)\n    pair = get_slopes(all_data, 2*i - 1, 2*i)\n    c2, p2, titles2 = generate_combos(pair)\n    generate_graphs(c2, p2, titles2, title)"

In [14]:
def analyze_by_demo(slopes):
    slopes.dropna(inplace=True)
    age1 = slopes[slopes.Age == 1.0]
    age2 = slopes[slopes.Age == 2.0]
    age3 = slopes[slopes.Age == 3.0]
    gender1 = slopes[slopes.Gender == 1.0]
    gender2 = slopes[slopes.Gender == 2.0]
    college1 = slopes[slopes.college == 1.0]
    college2 = slopes[slopes.college == 2.0]
    college3 = slopes[slopes.college == 3.0]
    college4 = slopes[slopes.college == 4.0]
    ethnicity1 = slopes[slopes.ethnicity == 1.0]
    ethnicity2 = slopes[slopes.ethnicity == 2.0]
    ethnicity3 = slopes[slopes.ethnicity == 3.0]
    ethnicity4 = slopes[slopes.ethnicity == 4.0]
    presentation0 = slopes[slopes.presentation_3_months == 0.0]
    presentation1 = slopes[slopes.presentation_3_months == 1.0]
    presentation2 = slopes[slopes.presentation_3_months == 2.0]
    edu1 = slopes[slopes.highest_education == 1.0]
    edu2 = slopes[slopes.highest_education == 2.0]
    edu3 = slopes[slopes.highest_education == 2.0]
    
    all_demos = [age1, age2, age3, gender1, gender2, college1, 
                 college2, college3, college4, ethnicity1, 
                 ethnicity2, ethnicity3, ethnicity4, presentation0,
                presentation1, presentation2, edu1, edu2, edu3]
    demo_titles = ["age1", "age2", "age3", "gender1", "gender2", "college1", "college2", 
                   "college3", "college4", "ethnicity1", "ethnicity2", "ethnicity3", 
                   "ethnicity4", "presentation0", "presentation1", "presentation2", "edu1", 
                   "edu2", "edu3"]
    
    for d, t in zip(all_demos, demo_titles):
        if d.shape[0] < 3:
            continue
        print(t, "had", d.shape[0], "samples")
        c, p, titles = generate_combos(d)
        print("Finished", t)
        generate_graphs(c, p, titles, t)
        
analyze_by_demo(all_slopes)

Finished age1
Finished gender1
Finished gender2
Finished college1
Finished college3
Finished ethnicity1
Finished ethnicity2
Finished presentation1
Finished edu1


In [35]:
def generate_combos_with_demo(slope_data, demo_col):
    group_c = []
    group_p = []
    group_titles = []

    group_titles.append('mfcc1-12')
    columns = slope_data.columns[2:14].to_list() + [demo_col]
    X0_mfcc = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_mfcc, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('pauses')
    columns = slope_data.columns[17:20].to_list() + [demo_col]
    X0_pauses = slope_data[columns].to_numpy(copy=True) # #pause, pause_frequency, pause_interval
    c, p = get_combo_predictions(X0_pauses, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('mfccs + pauses')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[17:20].to_list() + [demo_col]
    X0_pauses = slope_data[columns].to_numpy(copy=True) # #pause, pause_frequency, pause_interval
    c, p = get_combo_predictions(X0_pauses, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('jitter, shimmer')
    columns = slope_data.columns[22:24].to_list() + [demo_col]
    X0_jitter = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_jitter, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('mfccs+ jitter, shimmer')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[22:24].to_list() + [demo_col]
    X0_jitter = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_jitter, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append("all bio")
    columns = slope_data.columns[23:30].to_list() + [demo_col]
    X0_eda = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_eda, slope_data)
    group_c.append(c)
    group_p.append(p)

    group_titles.append('bio + mfcc')
    columns = slope_data.columns[2:14].to_list() + slope_data.columns[23:30].to_list() + [demo_col]
    X0_bio_mfcc = slope_data[columns].to_numpy(copy=True)
    c,p = get_combo_predictions(X0_bio_mfcc, slope_data)
    group_c.append(c)
    group_p.append(p)
    
    #return group_c, group_p, group_titles

    # All mfcc's combined with other attributes
    for i in range(14, 25):
        title = "mfccs + " + str(slope_data.columns[i])
        group_titles.append(title)
        columns = slope_data.columns[2:14].to_list() + [slope_data.columns[i]] + [demo_col]
        X0 = slope_data[columns].to_numpy(copy=True)
        c,p = get_combo_predictions(X0, slope_data)
        group_c.append(c)
        group_p.append(p)

    for i in range(14, 25):
        col1 = slope_data.columns[i]
        for j in range(i + 1, 25):
            col2 = slope_data.columns[j]
            title = str(col1) + '+' + str(col2)
            group_titles.append(title)
            X0 = slope_data[[col1, col2, demo_col]].to_numpy(copy=True)
            c, p = get_combo_predictions(X0, slope_data)
            group_c.append(c)
            group_p.append(p)

    for i in range(14, 25):
        col1 = slope_data.columns[i]
        for j in range(i + 1, 25):
            col2 = slope_data.columns[j]
            columns = slope_data.columns[2:14].to_list() + [col1] + [col2] + [demo_col]
            title = "mfccs + " + str(col1) + '+' + str(col2)
            group_titles.append(title)
            X0 = slope_data[[col1, col2]].to_numpy(copy=True)
            c, p = get_combo_predictions(X0, slope_data)
            group_c.append(c)
            group_p.append(p)

    for i in range(14, 24):
        col = slope_data.columns[i]
        title = 'bio +' + str(col)
        group_titles.append(title)
        columns = slope_data.columns[23:30].to_list() + [col] + [demo_col]
        X0_eda = slope_data[columns].to_numpy(copy=True)
        c,p = get_combo_predictions(X0_eda, slope_data)
        group_c.append(c)
        group_p.append(p)

    for i in range(14, 24):
        col = slope_data.columns[i]
        title = 'mfcc + bio +' + str(col)
        group_titles.append(title)
        columns = slope_data.columns[2:14].to_list() + slope_data.columns[23:30].to_list() + [col] + [demo_col]
        X0_eda = slope_data[columns].to_numpy(copy=True)
        c,p = get_combo_predictions(X0_eda, slope_data)
        group_c.append(c)
        group_p.append(p)
        
    return group_c, group_p, group_titles

In [39]:
for demo_title in demo_data.columns.to_list():
    #print(demo_title)
    c, p, titles = generate_combos_with_demo(all_slopes, demo_title)
    generate_graphs(c, p, titles, demo_title)

In [7]:
columns = all_slopes.columns[1:33].to_list()
X0 = all_slopes[columns].to_numpy(copy=True)
c, p = get_combo_predictions(X0, all_slopes)
generate_graphs([c], [p], ["all"], "all-no-demo")