## 1. Import libraries.

In [60]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from scipy.sparse.csgraph import connected_components
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.io import output_notebook
output_notebook()

## 2. Load matrices.

In [61]:
dfs = [pd.read_csv("05_matrix/"+str(i+1)+".csv", index_col=0, header=0).T for i in range(14)]

## 3. Add meta info.

In [62]:
negative = pd.read_csv("../src/COS_control.txt", header=None).T.values.tolist()[0]
positive = pd.read_csv("../src/COS_sample.txt", header=None).T.values.tolist()[0]

for df in dfs:
    y = [0 if i in negative else 1 for i in list(df.index)]
    df["y"] = y

## 4. PCA.

In [63]:
def draw_PCA(df):
    pca = PCA(n_components=2)
    result = pca.fit_transform(df.iloc[:,:-1])

    df_res = pd.DataFrame(result)
    df_res.columns = ["x", "y"]
    df_res.index = df.index
    df_res["meta"] = df["y"].tolist()
    
    df_res_0 = df_res[df_res["meta"]==0]
    df_res_1 = df_res[df_res["meta"]==1]
    
    source1 = ColumnDataSource(
        data=dict(
            x=df_res_0["x"],
            y=df_res_0["y"],
            desc=df_res_0.index.tolist()
        )
    )

    source2 = ColumnDataSource(
        data=dict(
            x=df_res_1["x"],
            y=df_res_1["y"],
            desc=df_res_1.index.tolist()
        )
    )
    
    hover = HoverTool(
        tooltips=[
            ("index", "$index"),
            ("desc", "@desc"),
        ]
    )

    p = figure(tools=[hover, "save"], plot_width=550, plot_height=500)
    #p.xaxis.major_label_text_color = "white"
    #p.yaxis.major_label_text_color = "white"
    #p.xaxis.axis_label = 'PC1'
    #p.yaxis.axis_label = 'PC2'

    p.circle("x", "y", fill_color="black", line_color="black", fill_alpha=1, size=14, source=source1)
    p.triangle("x", "y", fill_color="darkorange", line_color="darkorange", fill_alpha=1, size=16, source=source2)
    
    show(p)

In [41]:
# q = 1
draw_PCA(dfs[0])

In [42]:
# q = 2
draw_PCA(dfs[1])

In [43]:
# q = 3
draw_PCA(dfs[2])

In [44]:
# q = 4
draw_PCA(dfs[3])

In [45]:
# q = 5
draw_PCA(dfs[4])

In [46]:
# q = 6
draw_PCA(dfs[5])

In [47]:
# q = 7
draw_PCA(dfs[6])

In [48]:
# q = 8
draw_PCA(dfs[7])

In [49]:
# q = 9
draw_PCA(dfs[8])

In [50]:
# q = 10
draw_PCA(dfs[9])

In [51]:
# q = 11
draw_PCA(dfs[10])

In [64]:
# q = 12
draw_PCA(dfs[11])

In [19]:
# q = 13
draw_PCA(dfs[12])

In [20]:
# q = 14
draw_PCA(dfs[13])

## 5. Interpretability

### q = 14

In [19]:
pca = PCA()
result = pca.fit_transform(dfs[13].iloc[:,:-1])
df_contribution = pd.DataFrame(pca.explained_variance_ratio_, index=["PC{}".format(x + 1) for x in range(len(dfs[13].iloc[:,:-1].columns))])
df_contribution

Unnamed: 0,0
PC1,0.9168589
PC2,0.05549807
PC3,0.02764303
PC4,2.051083e-09
PC5,1.176796e-34
PC6,3.140805e-35
PC7,2.2536069999999998e-36
PC8,1.671857e-49
PC9,1.322379e-59
PC10,8.369198e-69


In [20]:
x = [0,1,2,3,4,5,6,7,8,9,10]

contribution = df_contribution.iloc[:,0].tolist()
cumulative_contribution_rate = [0] + [sum(contribution[:i+1]) for i in range(len(contribution))]
y = cumulative_contribution_rate

p = figure(title="Contribution rate", x_axis_label='PC', y_axis_label='Cumulative Contribution Rate')
p.line(x, y, line_width=2)
show(p)

In [21]:
df_res = pd.DataFrame([pca.components_[0], pca.components_[1]]).T
df_res.columns = ["x", "y"]
df_res.index = dfs[13].iloc[:,:-1].columns.tolist()

source = ColumnDataSource(
    data=dict(
        x=df_res["x"],
        y=df_res["y"],
        desc=df_res.index.tolist()
    )
)

hover = HoverTool(
    tooltips=[
        ("index", "$index"),
        ("desc", "@desc"),
    ]
)

p = figure(tools=[hover, "save"], plot_width=550, plot_height=500)
#p.xaxis.major_label_text_color = "white"
#p.yaxis.major_label_text_color = "white"

p.circle("x", "y", fill_color="blue", line_color="blue", fill_alpha=1, size=14, source=source)

show(p)