# GTEx model building with NFM

## Libraries

In [None]:
import csv
import gc
from pathlib import Path
import numpy as np
import pandas as pd
from pyprojroot.here import here
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score as ari
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import adjusted_rand_score
import cupy as cp
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.decomposition import NMF
from sklearn.decomposition import FastICA

## Input

In [None]:
gtex_data = pd.read_csv(here('output/gtex/df_gtex_fbm_filt.csv'), index_col=0)

In [None]:
gtex_data.head()

In [None]:
n_components = 412

# PCA

In [6]:
pca = PCA(n_components=n_components, svd_solver="auto", random_state=0)

W = pca.fit_transform(gtex_data.T)     
H = pca.components_         

pc_names = [f"PC{i+1}" for i in range(W.shape[1])]

gtex_pca_scores = pd.DataFrame(W, index=gtex_data.columns, columns=pc_names)
gtex_pca_loadings = pd.DataFrame(H, index=pc_names, columns=gtex_data.index)

gtex_pca_B = gtex_pca_scores.T 
gtex_pca_B.index.name = "PC"

In [7]:
gtex_pca_B.head()

Unnamed: 0_level_0,GTEX-1117F-0226-SM-5GZZ7,GTEX-1117F-0426-SM-5EGHI,GTEX-1117F-0526-SM-5EGHJ,GTEX-1117F-0626-SM-5N9CS,GTEX-1117F-0726-SM-5GIEN,GTEX-1117F-1326-SM-5EGHH,GTEX-1117F-2426-SM-5EGGH,GTEX-1117F-2526-SM-5GZY6,GTEX-1117F-2826-SM-5GZXL,GTEX-1117F-2926-SM-5GZYI,...,GTEX-ZZPU-1126-SM-5N9CW,GTEX-ZZPU-1226-SM-5N9CK,GTEX-ZZPU-1326-SM-5GZWS,GTEX-ZZPU-1426-SM-5GZZ6,GTEX-ZZPU-1826-SM-5E43L,GTEX-ZZPU-2126-SM-5EGIU,GTEX-ZZPU-2226-SM-5EGIV,GTEX-ZZPU-2426-SM-5E44I,GTEX-ZZPU-2626-SM-5E45Y,GTEX-ZZPU-2726-SM-5NQ8O
PC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PC1,98.066131,-80.226636,72.737224,105.431979,-42.6035,24.434636,119.04358,72.98696,58.044673,55.032375,...,-29.446284,14.89508,69.250271,-4.70522,26.783967,73.281715,-7.275129,48.876506,-91.038999,48.72488
PC2,-22.368006,-36.675448,-22.237349,3.937239,-35.746435,-59.140056,23.276249,-33.219026,-32.308614,-44.535799,...,-18.444306,-4.356395,-1.665304,-29.443116,0.957318,22.944016,-62.769103,-7.763878,-35.591944,-36.325488
PC3,7.35928,70.45901,41.336152,41.718353,56.652206,14.089737,18.836757,-87.862139,4.977632,-60.012872,...,69.291016,8.623129,-20.374531,-4.924642,60.051812,14.65512,-90.162553,75.344418,84.749351,40.011727
PC4,-5.554155,55.524286,-14.203896,-34.143531,-1.929957,-36.905641,-28.490491,69.880966,-6.469797,79.996264,...,11.135298,-12.172509,-20.192667,7.88011,2.598821,-6.331966,95.068564,-3.057642,56.712612,-22.39506
PC5,10.588558,-52.179941,2.600956,19.753202,18.62253,30.034636,-1.852577,0.461041,18.291633,26.687994,...,6.371653,-22.412051,4.254971,14.125361,1.298625,-26.85627,-5.603087,5.260878,-59.923204,16.323468


In [8]:
gtex_pca_B.to_pickle(here('output/gtex/gtex_pca_B.pkl'))

# ICA

In [None]:
ica = FastICA(n_components=n_components, random_state=0, max_iter=1000)

W = ica.fit_transform(gtex_data.T)      
H = ica.mixing_.T                      

ic_names = [f"IC{i+1}" for i in range(W.shape[1])]

gtex_ica_scores = pd.DataFrame(W, index=gtex_data.columns, columns=ic_names)
gtex_ica_loadings = pd.DataFrame(H, index=ic_names, columns=gtex_data.index)

gtex_ica_B = gtex_ica_scores.T
gtex_ica_B.index.name = "IC"

In [None]:
gtex_ica_B.head()

In [None]:
gtex_ica_B.to_pickle(here('output/gtex/gtex_ica_B.pkl'))

# NMF  

In [None]:
nmf = NMF(
    n_components=n_components,
    init="nndsvd",
    random_state=0,
    max_iter=500,
)

W = nmf.fit_transform(gtex_data.T)
H = nmf.components_

comp_names = [f"LV{i+1}" for i in range(W.shape[1])]

gtex_nmf_scores = pd.DataFrame(W, index=gtex_data.index, columns=comp_names)
gtex_nmf_loadings = pd.DataFrame(H, index=comp_names, columns=gtex_data.index)
gtex_nmf_B = gtex_nmf_scores.T
gtex_nmf_B.index.name = "LV"

In [None]:
gtex_nmf_B.to_pickle(here('output/gtex/gtex_nmf_B.pkl'))