In [None]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA 
from scipy import stats
import os

from mpl_toolkits.mplot3d import Axes3D

In [None]:
raw_df = pd.read_csv("Z:/DeepLearningData/research_010_NIH3T3/shCtrl_003/data/compare_OPA1/quantification_result_shCtrl_003_shOPA1_003.csv")
raw_df

In [None]:
# 大きいMitoは除く
ctrl_df = raw_df[raw_df["Gene"] == "shCtrl"]
ctrl_pca_df = ctrl_df[["mito_S_px", "mito_V_px", "cristae_S_px", "cristae_V_px", "tubular_S/cristae_S",  "tubular_V/cristae_V", "re_extract_CJ_number_no_1px_2px/IMM_50m", "mito_max_L/mito_min_L", "form_factor"]]

ctrl_df_ = ctrl_pca_df[ctrl_pca_df["mito_V_px"] != 2883123.0]
ctrl_df_

In [None]:
kd_df = raw_df[raw_df["Gene"] == "shOPA1"]
kd_df_ = kd_df[["mito_S_px", "mito_V_px", "cristae_S_px", "cristae_V_px", "tubular_S/cristae_S",  "tubular_V/cristae_V", "re_extract_CJ_number_no_1px_2px/IMM_50m", "mito_max_L/mito_min_L", "form_factor"]]
kd_df_ = kd_df_[kd_df_["tubular_V/cristae_V"] != '#DIV/0!']
kd_df_

# PCA

In [None]:
scaler = MinMaxScaler()
scaler.fit(ctrl_df_)
scaled_ctrl_df = scaler.transform(ctrl_df_)
scaled_ctrl_df

In [None]:
km = KMeans(
    n_clusters=3,
    random_state=0
)
ctrl_km = km.fit_predict(scaled_ctrl_df)

In [None]:
pca = PCA(random_state=0)
pca.fit(scaled_ctrl_df)
feature_ctrl = pca.transform(scaled_ctrl_df)

In [None]:
scaled_kd_df = scaler.transform(kd_df_)
scaled_kd_df

In [None]:
feature_kd = pca.transform(scaled_kd_df)

In [None]:
plt.figure(figsize=(10, 10))
ax = plt.subplot(111, projection='3d')

for x, y, z, name in zip(pca.components_[0], pca.components_[2], pca.components_[1], ctrl_df_.columns):
    ax.text(x, y, z, name)
    
ax.scatter(pca.components_[0], pca.components_[2], pca.components_[1], alpha=0.8)
ax.grid()
ax.view_init(azim=-25, elev= 35)

ax.set_xlabel("PC1")
ax.set_ylabel("PC3")
ax.set_zlabel("PC2")

ax.set_xlim([0, -1.0])
ax.set_ylim([-1.0, 0])
ax.set_zlim([0, 1.0])

ax.scatter(0, 0, 0)


plt.savefig("Z:/paper_data/2021_cristae/for_revision_#2/fig6C/PCA_orientation.svg", format="svg")

In [None]:
plt.figure(figsize=(10, 10))
ax = plt.subplot(111, projection='3d')

ax.scatter(feature_kd[:, 0] , feature_kd[:, 2], feature_kd[:, 1], alpha=0.5, s=100)
ax.scatter(feature_ctrl[:, 0] , feature_ctrl[:, 2], feature_ctrl[:, 1], alpha=0.5, s=100)

ax.set_xlabel("PC1")
ax.set_ylabel("PC3")
ax.set_zlabel("PC2")

ax.view_init(azim=-25, elev= 35)

ax.set_xlim([1.0, -1.0])
ax.set_ylim([-1.0, 1.0])
ax.set_zlim([-1.0, 1.0])

plt.savefig("Z:/paper_data/2021_cristae/for_revision_#2/fig6C/PCA_3d.svg", format="svg")

In [None]:
plt.figure(figsize=(6, 6))
for x, y, name in zip(pca.components_[0], pca.components_[1], ctrl_df_.columns):
    plt.text(x, y, name)
plt.scatter(pca.components_[0], pca.components_[1], alpha=0.8)
plt.grid()
plt.xlabel("PC1")
plt.ylabel("PC2")

plt.xticks([-1.0,  -0.5, -0.0, 0.5, 1.0])
plt.yticks([-1.0,  -0.5, -0.0, 0.5, 1.0])

plt.savefig("Z:/paper_data/2021_cristae/for_revision_#2/fig6C/PCA_orientation_12.svg", format="svg")
plt.show()

In [None]:
plt.figure(figsize=(10, 10))

plt.scatter(feature_kd[:, 0] , feature_kd[:, 1], alpha=0.5, s=100)
plt.scatter(feature_ctrl[:, 0] , feature_ctrl[:, 1], alpha=0.5, s=100)

plt.xlabel("PC1")
plt.ylabel("PC2")

plt.xticks([-1.0,  -0.5, -0.0, 0.5, 1.0])
plt.yticks([-1.0,  -0.5, -0.0, 0.5, 1.0])

ax.view_init(azim=45, elev= 45)
plt.savefig("Z:/paper_data/2021_cristae/for_revision_#2/fig6C/PCA_12.svg", format="svg")

# Ward's

In [None]:
ctrl_df_["Gene"] = "shCtrl"
kd_df_["Gene"] = "shOPA1"

all_df_ = pd.concat([ctrl_df_, kd_df_])
all_df_

In [None]:
all_df_[["mito_S_px", "mito_V_px", "cristae_S_px", "cristae_V_px", "tubular_S/cristae_S",  "tubular_V/cristae_V", "re_extract_CJ_number_no_1px_2px/IMM_50m", "mito_max_L/mito_min_L", "form_factor"]]


In [None]:
# standardize

scaler_= StandardScaler()
scaler_.fit(
    all_df_[["mito_S_px", "mito_V_px", "cristae_S_px", "cristae_V_px", "tubular_S/cristae_S",  "tubular_V/cristae_V", "re_extract_CJ_number_no_1px_2px/IMM_50m", "mito_max_L/mito_min_L", "form_factor"]]
)

scaled_df_ = scaler_.transform(
    all_df_[["mito_S_px", "mito_V_px", "cristae_S_px", "cristae_V_px", "tubular_S/cristae_S",  "tubular_V/cristae_V", "re_extract_CJ_number_no_1px_2px/IMM_50m", "mito_max_L/mito_min_L", "form_factor"]]
)


In [None]:
scaled_df_indexed = pd.DataFrame(
    scaled_df_, 
    columns=all_df_[["mito_S_px", "mito_V_px", "cristae_S_px", "cristae_V_px", "tubular_S/cristae_S",  "tubular_V/cristae_V", "re_extract_CJ_number_no_1px_2px/IMM_50m", "mito_max_L/mito_min_L", "form_factor"]].columns)
scaled_df_indexed

In [None]:
lut = dict(zip(all_df_['Gene'].unique(), ["#ff7f0e", "#1f77b4"]))
lut

In [None]:
row_colors = all_df_['Gene'].map(lut)
row_colors

In [None]:
sns_plot = sns.clustermap(
    scaled_df_indexed, 
    method='ward', 
    metric='euclidean', 
    figsize=(15,15), 
    cmap='magma', 
    row_colors=row_colors,
    vmin=-1.5,
    vmax=5
)

plt.savefig("Z:/paper_data/2021_cristae/for_revision_#2/fig6E/heatmap_without_angle.svg", format="svg")