In [None]:
%matplotlib inline
#%matplotlib notebook
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from scipy import stats
import copy

In [None]:
df = pd.read_csv('./quantification_result_shCtrl_shOPA1.csv')
df

In [None]:
df['tubular_S_ratio'] = df['tubular_S_px']/(df['lamellar_S_px'] + df['tubular_S_px'])
df['tubular_V_ratio'] = df['tubular_V_px']/(df['lamellar_V_px'] + df['tubular_V_px'])

In [None]:
df_a = pd.read_csv('./for_paper/angles_ctrl.csv')
df2_a = pd.read_csv('./for_paper/angles_opa1.csv')
df_c = pd.concat([df_a,df2_a])
df_c

In [None]:
df_c['mito_V_px'] = df_c['mito_V_px'].astype('int')
df_c

In [None]:
df = df.merge(df_c, on='mito_V_px')
df

In [None]:
plt.scatter(list(range(len(df['mito_V_px']))), df['mito_V_px'])

In [None]:
df[df['mito_V_px'] == df['mito_V_px'].max()]

In [None]:
df2= df[['Gene', 'tubular_S_ratio', 'tubular_V_ratio','cristae_S_px', 'cristae_V_px', 'mito_S_px', 'mito_V_px']]
df2= df2[df2['cristae_S_px']!=0] # exclude mitochondria without cristae
df2 = df2.drop(36).reset_index(drop=True) # exclude one mitochondrion with extremely large volume
df4 = df2.drop('Gene', axis=1)
df4_ctrl = df2[df2['Gene']=='shCtrl'].drop('Gene', axis=1) #　extract control
df4_ctrl 

In [None]:
# standardize features
scaler_ctrl= StandardScaler()
scaler_ctrl.fit(df4_ctrl)
scaled_df4_ctrl = scaler_ctrl.transform(df4_ctrl)
scaled_df4_all = scaler_ctrl.transform(df4)

scaled_df4_ctrl

In [None]:
# PCA
pca = PCA(random_state=0)
pca.fit(scaled_df4_ctrl) # fit using only control
feature = pca.transform(scaled_df4_all)
plt.figure(figsize=(6, 6))
x1 = [x for i, x in enumerate(feature[:,0]) if df2['Gene'][i] == 'shCtrl']
y1 = [x for i, x in enumerate(feature[:,1]) if df2['Gene'][i] == 'shCtrl']
x2 = [x for i, x in enumerate(feature[:,0]) if df2['Gene'][i] == 'shOPA1']
y2 = [x for i, x in enumerate(feature[:,1]) if df2['Gene'][i] == 'shOPA1']
plt.scatter(x1, y1, alpha=0.5, label='shCtrl')
plt.scatter(x2, y2, alpha=0.5, label='shOpa1')
plt.legend()
plt.grid(False)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()

In [None]:
df5 = copy.copy(df2)
df5['PC1'] = feature[:, 0]
df5['PC2'] = feature[:, 1]
df5

In [None]:
plt.figure(figsize=(6, 6))
plt.xlim(-3, 6)
plt.ylim(-6, 3)
arm = 5
trm =1.2
i=0
for x, y, name in zip(pca.components_[0], pca.components_[1], df4.columns):
    plt.arrow(0, 0,  x*arm, y*arm, width=0.05, color='r')
    if i ==0 or i==4:
        plt.text(x*arm*trm, y*arm*trm-0.3, name, color='r')
    elif i == 1 or i==5:
        plt.text(x*arm*trm, y*arm*trm, name, color='r')
    else:
        plt.text(x*arm*trm, y*arm*trm, name, color='r')
    
    i = i+1
plt.grid(False)
plt.show()

In [None]:
# create dataframe
scaled_df4df = pd.DataFrame(scaled_df4, columns=df4.columns)
scaled_df4df

In [None]:
lut = dict(zip(df2_wo_angle['Gene'].unique(), "rbg"))
row_colors = df2_wo_angle['Gene'].map(lut)
sns_plot = sns.clustermap(scaled_df4df_wo_angle, method='ward', metric='euclidean', figsize=(6,6), cmap='magma', row_colors=row_colors)
plt.setp(sns_plot.ax_heatmap.get_yticklabels(), rotation=0)
plt.setp(sns_plot.ax_heatmap.get_xticklabels(), rotation=30)
plt.savefig("heatmap_without_angle.svg", format="svg")

In [None]:
df_new = sns_plot.data2d
df_new['class'] = [0]*4+ [1]*10+ [2]*10 + [3] *30

In [None]:
df_new2 = df_new.sort_index()

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter([feature[i, 0] for i, x in enumerate(df_new2['class']) if x ==0], [feature[i, 1] for i, x  in enumerate(df_new2['class']) if x ==0], alpha=0.8, label=0)
plt.scatter([feature[i, 0] for i, x in enumerate(df_new2['class'])  if x ==1], [feature[i, 1] for i, x  in enumerate(df_new2['class']) if x ==1], alpha=0.8, label=1)
plt.scatter([feature[i, 0] for i, x  in enumerate(df_new2['class']) if x ==2], [feature[i, 1] for i, x  in enumerate(df_new2['class']) if x ==2], alpha=0.8, label=2)
plt.scatter([feature[i, 0] for i, x  in enumerate(df_new2['class']) if x ==3], [feature[i, 1] for i, x  in enumerate(df_new2['class']) if x ==3], alpha=0.8, label=3)
plt.legend()
plt.xlim(-4,7)
plt.grid()
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()

In [None]:
df3_new = copy.copy(df3)
df3_new['class'] = df_new2['class']
df3_new

In [None]:
columns = list(df3.columns)
num = len(columns)
print(num)
for i in range(num):
    plt.figure(figsize=(3,3))
    sns.stripplot(x="class", y=columns[i], data=df3_new)