In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.linear_model import LassoCV
from sklearn.linear_model import MultiTaskLassoCV
from yellowbrick.datasets import load_concrete
from yellowbrick.regressor import AlphaSelection
from yellowbrick.regressor.alphas import alphas
from heapq import nsmallest
import sys
import seaborn as sns
import matplotlib.pyplot as plt



### Merge datasets based on cancer cell line name;
### concatenate the two dataframes and match based on cancer cell lines; 
### save them into two new dataframes- MET and GCP; 
### split GCP and MET into train and test set respectively.


In [2]:
md = pd.read_excel(io='CCLE metabolomics dataset.xlsx',sheet_name="All")
mt = md.drop(['Tissue', 'Medium','Culture'], axis=1)
hm = pd.read_csv('GCP_proteomics_remapped.csv')
merge_tb = mt.merge(hm,how='inner',left_on='CCL', right_on='Cell Line')
MET = merge_tb.iloc[:,1:226]
GCP = merge_tb.iloc[:,227:269]

metab = MET.columns
metab_name = pd.DataFrame(metab)
his = GCP.columns
his_name = pd.DataFrame(his)

GCP = np.nan_to_num(GCP, nan=0)
MET = np.nan_to_num(MET, nan=0)

Xtrain, Xtest, Ytrain, Ytest =train_test_split(GCP, MET, test_size=0.3, random_state=0)
# print(Xtest.shape)
# print(Ytest.shape)
# print(Xtrain.shape)
# print(Ytrain.shape)

x = Xtrain[:,1]
print(x.shape)
y = Ytrain[:,50]
print(y.shape)

(604,)
(604,)


### Tuning the alpha

In [3]:
# alphas = [10**-6, 10**-5,10**-4,10**-3,10**-2,10**-1,1,2,3,4,5,6,7,8,9,10]
# min_mses_G2M = []
# best_alpha_G2M = []
# for j in range(Ytrain.shape[1]):
#     min_mses = sys.maxsize
#     alpha_value = 100
#     for i,a in enumerate(alphas):
#         x = linear_model.Lasso(alpha=a, max_iter=10000, tol=0.0001).fit(Xtrain, Ytrain[:, j])
#         ypred = x.predict(Xtest)
#         mse = mean_squared_error(ypred,Ytest[:, j])
#         if mse < min_mses:
#             min_mses = mse
#             alpha_value = a
#     min_mses_G2M.append(min_mses)
#     best_alpha_G2M.append(alpha_value)
# print(best_alpha_G2M)

# min_mses_M2G = []
# best_alpha_M2G = []
# for k in range(Xtrain.shape[1]):
#     min_mses = sys.maxsize
#     alpha_value = 100
#     for i,a in enumerate(alphas):
#         x = linear_model.Lasso(alpha=a,max_iter=10000, tol=0.0001).fit(Ytrain, Xtrain[:, k])
#         xpred = x.predict(Ytest)
#         mse = mean_squared_error(xpred,Xtest[:, k])
#         if mse < min_mses:
#             min_mses = mse
#             alpha_value = a
#     min_mses_M2G.append(min_mses)
#     best_alpha_M2G.append(alpha_value)
# print(best_alpha_M2G)

In [8]:
alphas = [10**-6, 10**-5,10**-4,10**-3,10**-2,10**-1,1,2,3,4,5,6,7,8,9,10]
min_mses_G2M = []
best_alpha_G2M = np.zeros([42, 225])
for k in range(Xtrain.shape[1]):
    for j in range(Ytrain.shape[1]):
        min_mses = sys.maxsize
        alpha_value = 100
        for i,a in enumerate(alphas):
            x = linear_model.Lasso(alpha=a, max_iter=10000, tol=0.0001).fit(Xtrain[:,k].reshape(-1,1), Ytrain[:, j])
            ypred = x.predict(Xtest[:,k].reshape(-1,1))
            mse = mean_squared_error(Ytest[:, j], ypred)
            if mse < min_mses:
                min_mses = mse
                alpha_value = a
            else:
                alpha_value = alpha_value
        min_mses_G2M.append(min_mses)
        best_alpha_G2M[k,j] = alpha_value
print(best_alpha_G2M.shape)

min_mses_M2G = []
best_alpha_M2G = np.zeros([225, 42]) 
for j in range(Ytrain.shape[1]):
    for k in range(Xtrain.shape[1]):
        min_mses = sys.maxsize
        alpha_value = 100
        for i,a in enumerate(alphas):
            x = linear_model.Lasso(alpha=a,max_iter=10000, tol=0.0001).fit(Ytrain[:,j].reshape(-1,1), Xtrain[:, k])
            xpred = x.predict(Ytest[:,j].reshape(-1,1))
            mse = mean_squared_error(Xtest[:, k], xpred)
            if mse < min_mses:
                min_mses = mse
                alpha_value = a
            else:
                alpha_value = alpha_value
        min_mses_M2G.append(min_mses)
        best_alpha_M2G[j,k] = alpha_value
print(best_alpha_M2G)

[[1.e-02 1.e-02 1.e-06 ... 1.e-02 1.e-02 1.e-02]
 [1.e-02 1.e-02 1.e-06 ... 1.e-06 1.e-03 1.e-02]
 [1.e-06 1.e-01 1.e-06 ... 1.e-01 1.e-06 1.e-01]
 ...
 [1.e-03 1.e-03 1.e-03 ... 1.e-02 1.e-01 1.e-01]
 [1.e-02 1.e-03 1.e-03 ... 1.e-02 1.e-01 1.e-01]
 [1.e-02 1.e-02 1.e-06 ... 1.e-06 1.e-01 1.e-01]]


### Fit train sets in Lasso with best alpha to generate models

In [19]:
GCP2MET_models = pd.DataFrame()
for i in range(Xtrain.shape[1]):
    for j in range(Ytrain.shape[1]):
        mdl_G2M =linear_model.Lasso(alpha=best_alpha_G2M[i,j],max_iter=10000, tol=0.0001).fit(Xtrain[:,i].reshape(-1,1), Ytrain[:, j])
        GCP2MET_models.at[i,j]  = mdl_G2M
print(GCP2MET_models.shape)

MET2GCP_models = pd.DataFrame() 
for i in range(Ytrain.shape[1]):
    for j in range(Xtrain.shape[1]):
        mdl_M2G = linear_model.Lasso(alpha=best_alpha_M2G[i,j],max_iter=10000, tol=0.0001).fit(Ytrain[:,i].reshape(-1,1), Xtrain[:,j])
        MET2GCP_models.at[i,j]= mdl_M2G
print(MET2GCP_models)


(42, 225)
                                     0                                   1   \
0     Lasso(alpha=0.01, max_iter=10000)   Lasso(alpha=0.01, max_iter=10000)   
1     Lasso(alpha=0.01, max_iter=10000)   Lasso(alpha=0.01, max_iter=10000)   
2    Lasso(alpha=1e-06, max_iter=10000)    Lasso(alpha=0.1, max_iter=10000)   
3    Lasso(alpha=1e-06, max_iter=10000)  Lasso(alpha=1e-06, max_iter=10000)   
4     Lasso(alpha=0.01, max_iter=10000)   Lasso(alpha=0.01, max_iter=10000)   
..                                  ...                                 ...   
220  Lasso(alpha=0.001, max_iter=10000)  Lasso(alpha=1e-06, max_iter=10000)   
221  Lasso(alpha=1e-06, max_iter=10000)  Lasso(alpha=0.001, max_iter=10000)   
222  Lasso(alpha=0.001, max_iter=10000)  Lasso(alpha=0.001, max_iter=10000)   
223   Lasso(alpha=0.01, max_iter=10000)  Lasso(alpha=0.001, max_iter=10000)   
224   Lasso(alpha=0.01, max_iter=10000)   Lasso(alpha=0.01, max_iter=10000)   

                                     2   

### To evaluate models and get correlation coefficient, pvalue, and mse value.

In [None]:
for col in df:
    if col == 'views':
        continue
    for i, row_value in df[col].iteritems():
        df[col][i] = row_value * df['views'][i]

In [27]:
def evaluate_models(models, Xtest, Ytest):
    """
    evaluate_models returns results from the model predictions, including the pearson
    correlation coefficient, p-Values, and MSE.

    :param models:         A list of scikit-learn model objects.
    :param Xtest:          A numpy array or pandas dataframe containing validation set input data.
    :param Ytest:          A numpy array or pandas dataframe containing validation set output data.
    :return pred_resul:    A dictionary containing the final MSE, pValue, or rValue.
    
    """
    
    predictions = []
    rValue = list()
    pValue = list()
    MSE = list()
    for col in models:
        for i,row_value in models[col].iteritems():
#             mdl = models[col][i]
            Ypred = row_value.predict(Xtest[:,i].reshape(-1,1))  
    
            r, pvalue = pearsonr(Ypred, Ytest[:, col])
            rValue.append(r)
            pValue.append(pvalue)
        
            mse = mean_squared_error(Ytest[:, i], Ypred)
            MSE.append(mse)
    
    df_MSE = pd.DataFrame(MSE)
    df_pValue = pd.DataFrame(pValue)
    df_rValue = pd.DataFrame(rValue)
    
    return df_MSE, df_pValue, df_rValue

evaluate_models(GCP2MET_models,Xtest, Ytest)

Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.0001, max_iter=10000)
Lasso(alpha=0.0001, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=1000

Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0

Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alph

Lasso(alpha=0.0001, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1

Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.0001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alp

Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=

Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Las

Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=1000

Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.0001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.0001, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10

Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.0001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lass

Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.01, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.1, max_iter=10000)
Lasso(alpha=0.001, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1e-06, max_iter=10000)
Lasso(alpha=1

(Empty DataFrame
 Columns: []
 Index: [],
 Empty DataFrame
 Columns: []
 Index: [],
 Empty DataFrame
 Columns: []
 Index: [])

## GCP_to_MET
### perform 3-fold cross validation to find model that generate the smallest mse value.

In [21]:
mse1,p1,pearson1 = evaluate_models(GCP2MET_models, Xtest, Ytest)
mse1.columns = ["GCP2MET_models"]
# print(mse1)

AttributeError: 'Series' object has no attribute 'predict'

In [None]:
kf = KFold(n_splits=3)

KFold(n_splits=3, random_state=None, shuffle=True)
mse_result = pd.DataFrame()
pvalue_result = pd.DataFrame()
rvalue_result = pd.DataFrame()
for train_index, test_index in kf.split(Xtrain, Ytrain):
    X_train, X_test = Xtrain[train_index], Xtrain[test_index]
    y_train, y_test = Ytrain[train_index], Ytrain[test_index]
    v_GCP2MET_models = []
    final_metrics = []
    for i in range(y_train.shape[1]):
        mdl_G2M = linear_model.Lasso(alpha=best_alpha_G2M[i],max_iter=10000, tol=0.0001).fit(X_train, y_train[:, i])
        v_GCP2MET_models.append(mdl_G2M)
        df_MSE, df_pValue, df_rValue = evaluate_models(v_GCP2MET_models, X_test, y_test)
    mse_result = pd.concat([mse_result,df_MSE],axis = 1)
    pvalue_result = pd.concat([pvalue_result,df_pValue],axis = 1)
    rvalue_result = pd.concat([rvalue_result,df_rValue],axis = 1)
mse_result.columns = ["mse1","mse2","mse3"]
mse_result = mse_result.T
print(mse_result)   

In [None]:
minMSE = mse_result.idxmin()
# print(minMSE)

mse_min = pd.DataFrame()
MSE_min = list()
for index in mse_result:
    v = mse_result[index][minMSE[index]]
    MSE_min.append(v)
#     print(v)
mse_min = pd.DataFrame(MSE_min)
mse_min.columns = ["v_GCP2MET_models"]
# print(mse_min)
raw = mse1.join(mse_min)
print(raw)

### Generate new list of model

In [None]:
new_GCP2MET = list()
for index, row in raw.iterrows():
    if row['GCP2MET_models'] < row['v_GCP2MET_models']:
        new_GCP2MET.append(GCP2MET_models[index])
    else:
        new_GCP2MET.append(v_GCP2MET_models[index])
    
print(new_GCP2MET)

### Evaluate if the correlation coefficient generated with the list of best model is significant or not

In [None]:
mse_G2M,p_G2M,pearson_G2M = evaluate_models(new_GCP2MET, Xtest, Ytest)
metab_name.columns = ["METAB_NAME"]
p_G2M.columns = ["GCP2MET_p"]
pearson_G2M.columns = ["GCP2MET_r"]
raw_G2M = pd.concat([pearson_G2M,p_G2M],axis = 1)
pearson_G2M.columns = ["GCP2MET_r_0.05"]
heat_G2M = pd.concat([pearson_G2M, raw_G2M],axis = 1)
print(heat_G2M)
for index, row in heat_G2M.iterrows():
    if row['GCP2MET_p'] >= 0.05:
        row['GCP2MET_r_0.05'] = 'NaN'
new_G2M = pd.concat([metab_name, heat_G2M],axis = 1)
top10_G2M = new_G2M.nsmallest(10, ['GCP2MET_p'])
print (top10_G2M)


### MET TO GCP (repeat the same process above)

In [None]:
mse2,p2,pearson2 = evaluate_models(MET2GCP_models, Ytest, Xtest)
mse2.columns = ["MET2GCP_models"]

kf = KFold(n_splits=3)
KFold(n_splits=3, random_state=None, shuffle=True)
mse2_result = pd.DataFrame()

for train_index, test_index in kf.split(Xtrain, Ytrain):
    X_train, X_test = Xtrain[train_index], Xtrain[test_index]
    y_train, y_test = Ytrain[train_index], Ytrain[test_index]
    v_MET2GCP_models = []
    metrics = []
    for i in range(X_train.shape[1]):
        mdl_M2G = linear_model.Lasso(alpha=best_alpha_M2G[i],max_iter=10000, tol=0.0001).fit(y_train, X_train[:, i])
        v_MET2GCP_models.append(mdl_M2G)
        df_MSE, df_pValue, df_rValue = evaluate_models(v_MET2GCP_models, y_test, X_test)
    mse2_result = pd.concat([mse2_result,df_MSE],axis = 1)
#     pvalue_result = pd.concat([pvalue_result,df_pValue],axis = 1)
#     rvalue_result = pd.concat([rvalue_result,df_rValue],axis = 1)
mse2_result.columns = ["mse1","mse2","mse3"]
mse2_result = mse2_result.T
# print(mse2_result) 

In [None]:
minMSE2 = mse2_result.idxmin()
# print(minMSE)

mse2_min = pd.DataFrame()
MSE2_min = list()
for index in mse2_result:
    v = mse2_result[index][minMSE2[index]]
    MSE2_min.append(v)
#     print(v)
mse2_min = pd.DataFrame(MSE2_min)
mse2_min.columns = ["v_MET2GCP_models"]
# print(mse2_min)
raw2 = mse2.join(mse2_min)

new_MET2GCP = list()
for index, row in raw2.iterrows():
    if row['MET2GCP_models'] < row['v_MET2GCP_models']:
        
        new_MET2GCP.append(MET2GCP_models[index])
    else:
        new_MET2GCP.append(v_MET2GCP_models[index])
              
    
print(new_MET2GCP)

In [None]:
mse_M2G,p_M2G,pearson_M2G = evaluate_models(new_MET2GCP, Ytest, Xtest)
his_name.columns = ["HIS_NAME"]
p_M2G.columns = ["MET2GCP_p"]
pearson_M2G.columns = ["MET2GCP_r"]
raw_M2G = pd.concat([pearson_M2G, p_M2G],axis = 1)
pearson_M2G.columns = ["MET2GCP_r_0.05"]
heat_M2G = pd.concat([pearson_M2G, raw_M2G],axis = 1)
for index, row in heat_M2G.iterrows():
    if row['MET2GCP_p'] >= 0.05:
        row['MET2GCP_r_0.05'] = 'NaN'
print(heat_M2G)
new_M2G = pd.concat([his_name, heat_M2G],axis = 1)
print(new_M2G)
top10_M2G = new_M2G.nsmallest(10, ['MET2GCP_p'])
print (top10_M2G)


In [None]:
with sns.axes_style("darkgrid"):
    f, G2M_heatmap = plt.subplots(figsize=(5, 100))
    G2M_heatmap = sns.heatmap(heat_G2M, annot=True, vmin = -1, vmax= 1)
with sns.axes_style("darkgrid"):
    f, M2G_heatmap = plt.subplots(figsize=(5, 15))
    M2G_heatmap = sns.heatmap(heat_M2G, annot=True, vmin = -1, vmax= 1)
G2M_heatmap.figure.savefig("G2M_heatmap.png")
M2G_heatmap.figure.savefig("M2G_heatmap.png")

In [None]:
corr_G2M = np.zeros([42, 225])
for i in range(GCP.shape[1]):
    for j in range(MET.shape[1]):
        r_1, pvalue1 = pearsonr(MET[:,j], GCP[:, i])
        r1 = round(r_1, 2)
        corr_G2M[i][j] = r1
print(corr_G2M)

corr_M2G = np.zeros([225, 42])
for i in range(MET.shape[1]):
    for j in range(GCP.shape[1]):
        r_2, pvalue2 = pearsonr(MET[:,i], GCP[:, j])
        r2 = round(r_2, 2)
        corr_M2G[i][j] = r2
with sns.axes_style("darkgrid"):
    f, G2M_heatmap = plt.subplots(figsize=(50, 50))
    corr_G2M_heatmap = sns.heatmap(corr_G2M, annot=True, vmin = -1, vmax= 1)
with sns.axes_style("darkgrid"):
    f, M2G_heatmap = plt.subplots(figsize=(50, 50))
    corr_M2G_heatmap = sns.heatmap(corr_M2G, annot=True, vmin = -1, vmax= 1)
corr_G2M_heatmap.figure.savefig("corr_G2M_heatmap.png")
corr_M2G_heatmap.figure.savefig("corr_M2G_heatmap.png")
np.savetxt("G2M_corr_LASSO.csv", corr_G2M, delimiter=",")
np.savetxt("M2G_corr_LASSO.csv", corr_M2G, delimiter=",")
