# 1. Importation of packages and metrics functions

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

In [2]:
def correlation_score(y_true, y_pred):
    """Scores the predictions according to the competition rules. 
    
    It is assumed that the predictions are not constant.
    
    Returns the average of each sample's Pearson correlation coefficient"""
    if type(y_true) == pd.DataFrame: y_true = y_true.values
    if type(y_pred) == pd.DataFrame: y_pred = y_pred.values
    corrsum = 0
    for i in range(len(y_true)):
        corrsum += np.corrcoef(y_true[i], y_pred[i])[1, 0]
    return corrsum / len(y_true)

# 2. PCA per patient model

## 2.1. Importation of the results' files

In [3]:
test_preds_PCA_1 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/1/test_preds_PCA_1.npy")
test_va_PCA_1 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/1/test_va_PCA_1.npy")
train_preds_PCA_1 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/1/train_preds_PCA_1.npy")
train_va_PCA_1 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/1/train_va_PCA_1.npy")
test_preds_PCA_2 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/2/test_preds_PCA_2.npy")
test_va_PCA_2 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/2/test_va_PCA_2.npy")
train_preds_PCA_2 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/2/train_preds_PCA_2.npy")
train_va_PCA_2 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/2/train_va_PCA_2.npy")
test_preds_PCA_3 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/3/test_preds_PCA_3.npy")
test_va_PCA_3 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/3/test_va_PCA_3.npy")
train_preds_PCA_3 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/3/train_preds_PCA_3.npy")
train_va_PCA_3 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/3/train_va_PCA_3.npy")
test_preds_PCA_4 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/4/test_preds_PCA_4.npy")
test_va_PCA_4 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/4/test_va_PCA_4.npy")
train_preds_PCA_4 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/4/train_preds_PCA_4.npy")
train_va_PCA_4 = np.load("/kaggle/input/openproblemdataquentinchappat/PCA per patient/4/train_va_PCA_4.npy")

## 2.2. Concatenation of the results in one array

In [4]:
test_preds_PCA = np.concatenate((test_preds_PCA_1, test_preds_PCA_2, test_preds_PCA_3, test_preds_PCA_4))
test_va_PCA = np.concatenate((test_va_PCA_1, test_va_PCA_2, test_va_PCA_3, test_va_PCA_4))
train_preds_PCA = np.concatenate((train_preds_PCA_1, train_preds_PCA_2, train_preds_PCA_3, train_preds_PCA_4))
train_va_PCA = np.concatenate((train_va_PCA_1, train_va_PCA_2, train_va_PCA_3, train_va_PCA_4))

## 2.3. Computation of the results

In [5]:
mse_train_PCA = mean_squared_error(train_va_PCA, train_preds_PCA)
corrscore_train_PCA = correlation_score(train_va_PCA, train_preds_PCA)

mse_test_PCA = mean_squared_error(test_va_PCA, test_preds_PCA)
corrscore_test_PCA = correlation_score(test_va_PCA, test_preds_PCA)

In [6]:
print("The MSE train for PCA per patient model is:", mse_train_PCA)
print("The correlation score train for PCA per patient model is:", corrscore_train_PCA)
print("The MSE test for PCA per patient model is:", mse_test_PCA)
print("The correlation score test for PCA per patient model is:", corrscore_test_PCA)

The MSE train for PCA per patient model is: 2.175620334147274
The correlation score train for PCA per patient model is: 0.4630287759184809
The MSE test for PCA per patient model is: 2.8098649888187164
The correlation score test for PCA per patient model is: 0.3615834370787006


# 3. Pearson correlation per patient model

## 3.1. Importation of the results' files

In [7]:
test_preds_pearson_1 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/1/test_preds_pearson_1.npy")
test_va_pearson_1 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/1/test_va_pearson_1.npy")
train_preds_pearson_1 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/1/train_preds_pearson_1.npy")
train_va_pearson_1 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/1/train_va_pearson_1.npy")
test_preds_pearson_2 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/2/test_preds_pearson_2.npy")
test_va_pearson_2 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/2/test_va_pearson_2.npy")
train_preds_pearson_2 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/2/train_preds_pearson_2.npy")
train_va_pearson_2 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/2/train_va_pearson_2.npy")
test_preds_pearson_3 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/3/test_preds_pearson_3.npy")
test_va_pearson_3 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/3/test_va_pearson_3.npy")
train_preds_pearson_3 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/3/train_preds_pearson_3.npy")
train_va_pearson_3 = np.load("/kaggle/input/openproblemdataquentinchappat/Pearson per patient/3/train_va_pearson_3.npy")

## 3.2. Concatenation of the results in one array

In [8]:
test_preds_pearson = np.concatenate((test_preds_pearson_1, test_preds_pearson_2, test_preds_pearson_3))
test_va_pearson = np.concatenate((test_va_pearson_1, test_va_pearson_2, test_va_pearson_3))
train_preds_pearson = np.concatenate((train_preds_pearson_1, train_preds_pearson_2, train_preds_pearson_3))
train_va_pearson = np.concatenate((train_va_pearson_1, train_va_pearson_2, train_va_pearson_3))

## 3.3. Computation of the results

In [9]:
mse_train_pearson = mean_squared_error(train_va_pearson, train_preds_pearson)
corrscore_train_pearson = correlation_score(train_va_pearson, train_preds_pearson)

mse_test_pearson = mean_squared_error(test_va_PCA, test_preds_pearson)
corrscore_test_pearson = correlation_score(test_va_PCA, test_preds_pearson)

In [10]:
print("The MSE train for Pearson per patient model is:", mse_train_pearson)
print("The correlation score train for Pearson per patient model is:", corrscore_train_pearson)
print("The MSE test for Pearson per patient model is:", mse_test_pearson)
print("The correlation score test for Pearson per patient model is:", corrscore_test_pearson)

The MSE train for Pearson per patient model is: 2.488560558734378
The correlation score train for Pearson per patient model is: 0.4677711132506335
The MSE test for Pearson per patient model is: 2.7947769169767334
The correlation score test for Pearson per patient model is: 0.41055300542599255
