In [None]:
import pandas as pd
import os
import numpy as np

# Preparing Train/Test data

## Facial feature subsets

In [None]:
# Including only AU intensity (_r)
def select_features_AU(file_df):
    file_df = file_df.filter(regex='_r')
    return file_df

In [None]:
# Including only specific AUs intensity (_r)) based on the value change analysis between mania levels
def select_features_specific_AU(file_df):
    file_df = file_df[[" AU04_r", " AU06_r", " AU07_r", " AU10_r", " AU14_r"]]
    return file_df

## Functionals

In [None]:
# FH functional
def fh_functional(file_location): #
    dataframe_list = []
    idx = 0

    for file in os.scandir(file_location):
        temp_df = pd.read_csv(file)
        temp_df = select_features_AU(temp_df)
        new_df = pd.DataFrame(columns = ['0-1', '1-2', '2-3', '3-4','4-5'])
        total_rows = len(temp_df.index) #gets the count of all frames per each sequence
        
        for AU in temp_df:
            seq = {}
            count1, count2, count3, count4, count5 = 0, 0, 0, 0, 0 #count of frames
            
            for score in temp_df[AU]: # evaluates the intensity value for every AU.
                if score < 1 and score >= 0:
                    count1 += 1

                elif score < 2 and score >= 1:
                    count2 += 1

                elif score < 3 and score >= 2:
                    count3 += 1

                elif score < 4 and score >= 3:
                    count4 += 1

                elif score < 5 and score >= 4:
                    count5 += 1
    
            new_row = {'0-1':round(count1/total_rows, 2), '1-2':round(count2/total_rows, 2), '2-3':round(count3/total_rows, 2), 
                       '3-4':round(count4/total_rows, 2), '4-5':round(count5/total_rows, 2)} # converts data into percentages.
            new_df = new_df.append(new_row, ignore_index=True)

    
        for label in train_labels_df:
            YMRS = train_labels_df.iloc[idx]['Total_YMRS']
            ManiaLevel = train_labels_df.iloc[idx]['ManiaLevel']
            new_df['Total_YMRS'] = YMRS
            new_df['ManiaLevel'] = ManiaLevel

        idx += 1
        dataframe_list.append([file.name,new_df])
        
    return dataframe_list

## Prepare Y data

In [None]:
# Label dataframe
labels_df = pd.read_csv('./Data/Label/labels_metadata.csv', delimiter=',')
train_labels_df = labels_df.drop(labels_df.columns[[1,2,3,6]], axis=1) # remove unnecessary columns
train_labels_df = train_labels_df.drop(labels_df.index[:60]) # remove test rows
train_labels_df.reset_index(inplace=True)

dev_labels_df = labels_df.drop(labels_df.columns[[1,2,3,6]], axis=1)
dev_labels_df = dev_labels_df.drop(labels_df.index[60:]) 
dev_labels_df.reset_index(inplace=True)

## Prepare X data

In [None]:
au_train = fh_functional("Data/Train OpenFace LLD")
au_test = fh_functional("Data/Test OpenFace LLD")
au_dev = fh_functional("Data/Dev OpenFace LLD")

In [None]:
# Feature vector creation
def feature_vector(dataframe_list):
    df = pd.DataFrame()
    # df_target = pd.DataFrame()
    idx = 0

    for dataframe in dataframe_list:
        YMRS = dataframe[1]["Total_YMRS"].mean()
        Mania = dataframe[1]["ManiaLevel"].mean()
        dataframe = dataframe[1].iloc[:, :-2]
        dataframe = dataframe.unstack().to_frame().sort_index(level=1).transpose()

        temp_df = pd.DataFrame(dataframe)
        df = df.append(temp_df)
    
    return df

In [None]:
train_df = feature_vector(au_train)
test_df = feature_vector(au_test)
dev_df = feature_vector(au_dev)

In [None]:
X_train = train_df
Y_train = train_labels_df["Total_YMRS"]
X_test = dev_df
Y_test = dev_labels_df["Total_YMRS"]

## AU intensity distribution among mania levels

In [None]:
# Create groups based on ManiaLevel
def mania_grouping(au_list):
    ManiaLevel_1 = []
    ManiaLevel_2 = []
    ManiaLevel_3 = []

    for item in au_list:
        file_name = item[0]
        mania = item[1]["ManiaLevel"].mean()

        if mania == 1:
            ManiaLevel_1.append({file_name:item[1]})
    
        elif mania == 2:
            ManiaLevel_2.append({file_name:item[1]})

        elif mania == 3:
            ManiaLevel_3.append({file_name:item[1]})

    return ManiaLevel_1, ManiaLevel_2, ManiaLevel_3

In [None]:
# Taking matrix and merge them from the mean. Overlay, addition of matrices and taking the mean.
def mania_mean(df):
    Mania_df_mean = pd.DataFrame(data=0, index=range(0,17), columns= ['0-1', '1-2', '2-3', '3-4','4-5'])

    for item in df:
        for x in item.values():
            temp_df = pd.DataFrame(x.iloc[:, :-2])
            Mania_df_mean = Mania_df_mean.add(temp_df)

    Mania_df_mean = Mania_df_mean / len(df)

    return Mania_df_mean


In [None]:
mania1_df, mania2_df, mania3_df = mania_grouping(au_train)
mania1_df_mean, mania2_df_mean, mania3_df_mean = mania_mean(mania1_df), mania_mean(mania2_df), mania_mean(mania3_df)

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, axes = plt.subplots(17, 3, figsize=(35,55))
# X-axis names
fig.text(0.5, 0.1, "Range of AU intensity", ha="center", size=20)
fig.text(0.21, 0.89, "ManiaLevel 1", size=20)
fig.text(0.48, 0.89, "ManiaLevel 2", size=20)
fig.text(0.76, 0.89, "ManiaLevel 3", size=20)


# Y-axis names.
fig.text(0.08, 0.5, 'Mean percentage count of frames per AU', va='center', rotation='vertical', size=20)
fig.text(0.1, 0.87, 'AU_1', rotation='vertical', size=20)
fig.text(0.1, 0.82, 'AU_2', rotation='vertical', size=20)
fig.text(0.1, 0.775, 'AU_4', rotation='vertical', size=20)
fig.text(0.1, 0.735, 'AU_5', rotation='vertical', size=20)
fig.text(0.1, 0.689, 'AU_6', rotation='vertical', size=20)
fig.text(0.1, 0.642, 'AU_7', rotation='vertical', size=20)
fig.text(0.1, 0.599, 'AU_9', rotation='vertical', size=20)
fig.text(0.1, 0.552, 'AU_10', rotation='vertical', size=20)
fig.text(0.1, 0.506, 'AU_12', rotation='vertical', size=20)
fig.text(0.1, 0.462, 'AU_14', rotation='vertical', size=20)
fig.text(0.1, 0.416, 'AU_15', rotation='vertical', size=20)
fig.text(0.1, 0.371, 'AU_17', rotation='vertical', size=20)
fig.text(0.1, 0.326, 'AU_20', rotation='vertical', size=20)
fig.text(0.1, 0.283, 'AU_23', rotation='vertical', size=20)
fig.text(0.1, 0.238, 'AU_25', rotation='vertical', size=20)
fig.text(0.1, 0.193, 'AU_26', rotation='vertical', size=20)
fig.text(0.1, 0.147, 'AU_45', rotation='vertical', size=20)

row = 0
index = 0
for au in mania1_df_mean.itertuples():
    while row != 17:
        axes[row, 0].bar(mania1_df_mean.columns, mania1_df_mean.iloc[index])
        row += 1
        index += 1

row = 0
index = 0
for au in mania2_df_mean.itertuples():
    while row != 17:
        axes[row, 1].bar(mania2_df_mean.columns, mania2_df_mean.iloc[index])
        row += 1
        index += 1

row = 0
index = 0
for au in mania3_df_mean.itertuples():
    while row != 17:
        axes[row, 2].bar(mania3_df_mean.columns, mania3_df_mean.iloc[index])
        row += 1
        index += 1

## Selecting specific AU subset

In [None]:
#Find change values between mania levels
diff_df_1_2 = mania1_df_mean - mania2_df_mean
diff_df_2_3 = mania2_df_mean - mania3_df_mean
diff_df_1_3 = mania1_df_mean - mania3_df_mean

In [None]:
diff_df_1_2.style.applymap(lambda x: 'background-color : green' if x > 0.03 or x < -0.03 else '')

In [None]:
diff_df_2_3.style.applymap(lambda x: 'background-color : green' if x > x > 0.03 or x < -0.03 < -x else '')

In [None]:
diff_df_1_3.style.applymap(lambda x: 'background-color : green' if x > x > 0.03 or x < -0.03 < -x else '')

# Cross-validation

In [None]:
#cross-validation scores
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.cross_decomposition import PLSRegression
import numpy as np
import matplotlib.pyplot as plt
from numpy import mean, absolute, sqrt

scores_mse = []
scores_mae = []
scores_r2 = []
scores_rmse = []
comp = []
iterations = np.arange(1, 10)

for comp_no in iterations:    
    model = PLSRegression(n_components=comp_no)
    mse = cross_val_score(model, X_train, Y_train, scoring='neg_mean_squared_error', cv=KFold()).mean()
    rmse = cross_val_score(model, X_train, Y_train, scoring='neg_root_mean_squared_error', cv=KFold()).mean()
    mae = cross_val_score(model, X_train, Y_train, scoring='neg_mean_absolute_error', cv=KFold()).mean()
    r2 = cross_val_score(model, X_train, Y_train, scoring='r2', cv=KFold()).mean()

    scores_mse.append(absolute(mse))
    scores_mae.append(absolute(mae))
    scores_r2.append(absolute(r2))
    scores_rmse.append(absolute(rmse))
    
    comp.append(comp_no)

In [None]:
plt.plot(scores_mae)
plt.xlabel('Number of PLS Components')
plt.ylabel('MAE')

In [None]:
plt.plot(scores_mse)
plt.xlabel('Number of PLS Components')
plt.ylabel('MSE')

In [None]:
plt.plot(scores_r2)
plt.xlabel('Number of PLS Components')
plt.ylabel('r2')

In [None]:
plt.plot(scores_rmse)
plt.xlabel('Number of PLS Components')
plt.ylabel('RMSE')

# The model

In [None]:
from sklearn.cross_decomposition import PLSRegression

In [None]:
regression = PLSRegression(n_components=1)
regression.fit(X_train, Y_train)

In [None]:
regression.score(X_test, Y_test), regression.score(X_train, Y_train)

# Prediction

In [None]:
y_pred = regression.predict(test_df)
y_pred_class = np.array(y_pred)

for i in range(len(y_pred_class)):
    if y_pred_class[i] <= 7:
        y_pred_class[i] = 1

    elif y_pred_class[i] > 7 or y_pred_class[i] < 20:
        y_pred_class[i] = 2

    else:
        y_pred_class[i] = 3

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, recall_score

mse = mean_squared_error(Y_test.iloc[:54], y_pred)
mae = mean_absolute_error(Y_test.iloc[:54], y_pred)
uar = recall_score(Y_test.iloc[:54], y_pred_class, average='macro')

In [None]:
'MSE: ' + str(mse), 'RMSE: ' + str(np.sqrt(mse)), 'MAE: ' + str(mae), 'R-sqaured: ' + str(regression.score(X_test, Y_test)), 'UAR: ' + str(uar)