2024.12.10(Tue)  
Principal component analysis for real and GAN-generated structures
---

In [8]:
import cv2
import numpy as np
import sys
import pandas as pd
import matplotlib.pyplot as plt
import os
import homcloud.interface as hc
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pickle
import seaborn as sns

# ---- my modules ----  #
import PCA_func

In [9]:
plt.rcParams['font.family'] ='Times New Roman'#使用するフォント
plt.rcParams['xtick.direction'] = 'in'#x軸の目盛線が内向き('in')か外向き('out')か双方向か('inout')
plt.rcParams['ytick.direction'] = 'in'#y軸の目盛線が内向き('in')か外向き('out')か双方向か('inout')
plt.rcParams['xtick.major.width'] = 1.0#x軸主目盛り線の線幅
plt.rcParams['ytick.major.width'] = 1.0#y軸主目盛り線の線幅
plt.rcParams['font.size'] = 10.5 #フォントの大きさ
plt.rcParams['axes.linewidth'] = 1.0# 軸の線幅edge linewidth。囲みの太さ
plt.rcParams['axes.axisbelow'] = True
plt.rcParams['mathtext.fontset'] = 'cm'

In [10]:
# ------- hyperparameters ------- #
# 1. phases 
phases =  ["Ni", "YSZ", "Pore"]

# 2. resolution size
sizes = ["revised_030","revised_040","revised_050" ]

# 3. dimension of homologies
dims = ["Dim_0", "Dim_1", "Dim_2", "all"]



In [13]:
def PCA_analysis(size, phase, dim, out_folder):

    # ======= step.1 load saved vector ======= #
    # ----- load PI_vector(real) ----- #
    header = "../2_Persistence_Image/PIs/real/{}".format(size)
    df_vects_real = PCA_func.cat_dfs(header, phase, dim)

    # ----- load PI_vector(WGAN-generated) ----- #
    header = "../2_Persistence_Image/PIs/WGAN-generated/{}".format(size)
    df_vects_wgan = PCA_func.cat_dfs(header, phase, dim)

    # ----- load PI_vector(sphere-generated) ----- #
    header = "../2_Persistence_Image/PIs/sphere-generated/{}".format(size)
    df_vects_sphere = PCA_func.cat_dfs(header, phase, dim)

    # ======= step.2 PCA ======= #
    # ----- real ----- #
    ad_real = PCA_func.AnodesData(df_vects_real)
    df_pca_real, pca_reducer = PCA_func.calc_pca(ad_real,
                                                 n_components=40,
                                                 pca_reducer=None)

    os.makedirs(os.path.join(out_folder,"real/{}".format(phase)), exist_ok=True)
    df_pca_real.to_csv(
        os.path.join(out_folder,"real/{}/PCA_{}_real.csv".format(phase,dim)), index=False
    )
    with open(os.path.join(out_folder,"real/{}/PCA_reducer_{}.pkl".format(phase,dim)), "wb") as f:
        pickle.dump(pca_reducer, f)

    # ------ PCA for fake (WGAN-gp) data ------ #
    ad_wgan = PCA_func.AnodesData(df_vects_wgan)
    df_pca_wgan, _ = PCA_func.calc_pca(ad_wgan, 40, pca_reducer)

    os.makedirs(os.path.join(out_folder,"WGAN-generated/{}".format(phase)), exist_ok=True)
    df_pca_wgan.to_csv(
        os.path.join(out_folder,"WGAN-generated/{}/PCA_{}_fake.csv".format(phase,dim)), index=False
    )

    # ------ PCA for fake (sphere) data ------ #
    ad_sphere = PCA_func.AnodesData(df_vects_sphere)
    df_pca_sphere, _ = PCA_func.calc_pca(ad_sphere, 40, pca_reducer)

    os.makedirs(os.path.join(out_folder,"sphere-generated/{}".format(phase)), exist_ok=True)
    df_pca_sphere.to_csv(
        os.path.join(out_folder,"sphere-generated/{}/PCA_{}_sphere.csv".format(phase,dim)), index=False
    )

    return df_pca_real, df_pca_wgan, df_pca_sphere


In [14]:
for size in sizes:

    # output directory for calculated principal components
    out_folder = "./PCA_results/{}".format(size)
    os.makedirs(out_folder, exist_ok=True)

    for dim in dims:
        for phase in phases:
            print("size: {}, phase: {}, dim: {}".format(size, phase, dim))
            _, _, _ = PCA_analysis(size, phase, dim, out_folder)
            print("done \n ")

size: revised_030, phase: Ni, dim: Dim_0
Cumulative explained variance ratio: 99.664%
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
done 
 
size: revised_030, phase: YSZ, dim: Dim_0
Cumulative explained variance ratio: 99.999%
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
done 
 
size: revised_030, phase: Pore, dim: Dim_0
Cumulative explained variance ratio: 99.999%
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
done 
 
size: revised_030, phase: Ni, dim: Dim_1
Cumulative explained variance ratio: 99.996%
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
Change of data shape: (1500, 703) ---> (1500, 40)
done 
 
size: revised_030, ph