In [None]:
import pandas as pd
import os

# Preparing Train/Test data

## Facial feature subsets

In [None]:
# Including only AU intensity (_r)
def select_features_AU(file_df):
    file_df = file_df.filter(regex='_r')
    # file_df = file_df.iloc[:, 435:452]
    return file_df

In [None]:
# Including only specific AUs intensity based on the value change analysis between mania levels
def select_features_specific_AU(file_df):
    file_df = file_df[[" AU04_r", " AU06_r", " AU07_r", " AU10_r", " AU14_r"]]
    return file_df

In [None]:
# Including all AU
def select_features_all(file_df):
    file_df = file_df.iloc[:, 5:]
    return file_df

In [None]:
#Excluding all landmarks
def select_features_lmk(file_df):
    file_df = file_df[file_df.columns.drop(list(file_df.filter(regex='lmk')))]
    file_df = file_df.iloc[:, 5:]
    return file_df

## Functionals

In [None]:
#FM functional
def fm_functional(input_df):
        av_column = pd.DataFrame(input_df.mean(axis=0))
        av_column_t = av_column.transpose()

        return av_column_t

In [None]:
#FR functional
def fr_functional(input_df):
    
    feature_df = input_df
    min_max_df = pd.DataFrame(columns=feature_df.columns)

    for column in feature_df:
        max = feature_df[column].max()
        min = feature_df[column].min()
        v = max - min
        min_max_df.at[0, column] = v

    return min_max_df

In [None]:
from sklearn.preprocessing import MinMaxScaler

def normalize_data(input_df):
    scaler = MinMaxScaler()
    df_1 = scaler.fit_transform(input_df)
    df_2 = pd.DataFrame(df_1, columns=input_df.columns)
    return df_2

## Prepare X data

In [None]:
#Create feature vector for every data set AND merge it together
def prepare_data(file_location):
    X = pd.DataFrame()
    
    for file in os.scandir(file_location):
        temp_df = pd.read_csv(file, delimiter=',')
        temp_df = select_features_all(temp_df)
        temp_df_norm = normalize_data(temp_df)
        temp_df_tr = fm_functional(temp_df_norm)
        X = X.append(temp_df_tr, ignore_index=True)
        
    return X

In [None]:
X_train = prepare_data('Data/Train OpenFace LLD')

In [None]:
X_test = prepare_data('Data/Dev OpenFace LLD')

In [None]:
X_test.head(10)

## Prepare Y data

In [None]:
# Label dataframe
labels_df = pd.read_csv('./Data/Label/labels_metadata.csv', delimiter=',')

In [None]:
train_labels_df = labels_df.drop(labels_df.columns[[1,2,3,6]], axis=1) # remove unnecessary data variables
train_labels_df = train_labels_df.drop(labels_df.index[:60]) # remove test rows
train_labels_df.reset_index(inplace=True)
train_labels_df

In [None]:
dev_labels_df = labels_df.drop(labels_df.columns[[1,2,3,6]], axis=1) 
dev_labels_df = dev_labels_df.drop(labels_df.index[60:]) 
dev_labels_df.reset_index(inplace=True)
dev_labels_df.head(5)

In [None]:
y_train = train_labels_df["Total_YMRS"]
y_test = dev_labels_df["Total_YMRS"]
y_train

## Cross-validation

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.cross_decomposition import PLSRegression
import numpy as np
from numpy import absolute

scores_mse = []
scores_mae = []
scores_r2 = []
scores_rmse = []
comp = []
iterations = np.arange(1, 10)

for comp_no in iterations:    
    model = PLSRegression(n_components=comp_no)
    mse = cross_val_score(model, X_train, y_train, scoring='neg_mean_squared_error', cv=KFold()).mean()
    mae = cross_val_score(model, X_train, y_train, scoring='neg_mean_absolute_error', cv=KFold()).mean()
    r2 = cross_val_score(model, X_train, y_train, scoring='r2', cv=KFold()).mean()
    rmse = cross_val_score(model, X_train, y_train, scoring='neg_root_mean_squared_error', cv=KFold()).mean()

    scores_mse.append(absolute(mse))
    scores_mae.append(absolute(mae))
    scores_r2.append(absolute(r2))
    scores_rmse.append(absolute(rmse))
    
    comp.append(comp)

    comp.append(comp_no)

In [None]:
import matplotlib.pyplot as plt

plt.plot(scores_mse)
plt.xlabel('Number of PLS Components')
plt.ylabel('MSE_sqr')

In [None]:
plt.plot(scores_mse)
plt.xlabel('Number of PLS Components')
plt.ylabel('MSE_sqr')

In [None]:
plt.plot(scores_r2)
plt.xlabel('Number of PLS Components')
plt.ylabel('r2')

In [None]:
plt.plot(scores_rmse)
plt.xlabel('Number of PLS Components')
plt.ylabel('RMSE')

# The model

In [None]:
from sklearn.cross_decomposition import PLSRegression

In [None]:
regression = PLSRegression(n_components=1, scale=False)

In [None]:
regression.fit(X_train, y_train)

In [None]:
# R^2 score
print(regression.score(X_test, y_test)), print(regression.score(X_train, y_train))

# Prediction

In [None]:
X_pred = prepare_data('Data/Test OpenFace LLD')

In [None]:
y_pred = regression.predict(X_pred)

In [None]:
y_pred_class = np.array(y_pred)

for i in range(len(y_pred_class)):
    if y_pred_class[i] <= 7:
        y_pred_class[i] = 1

    elif y_pred_class[i] > 7 or y_pred_class[i] < 20:
        y_pred_class[i] = 2

    else:
        y_pred_class[i] = 3

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, recall_score

mse = mean_squared_error(y_test.iloc[:54], y_pred) #filtering for 54 values to align size of the data sets
rmse = np.sqrt(mean_squared_error(y_test.iloc[:54], y_pred))
mae = mean_absolute_error(y_test.iloc[:54], y_pred)
uar = recall_score(y_test.iloc[:54], y_pred_class, average='macro')
print(mse, rmse, mae, uar)