In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import os
%matplotlib inline

In [None]:
# データ収集
g_dfraw = pd.read_csv("../data/anisotropicdata1.csv")
g_desriptor_labels = ["x1", "x2"]
g_target_name = "y"
g_dfraw

In [None]:
G_METADATA = {"outputdir": "image_executed", "prefix": "PCA_sample", 
              "dataname":"anisotropicdata1", "normalizationtype": "StandardScaler"}

In [None]:
g_Xraw = g_dfraw.loc[:, g_desriptor_labels].values
g_label = g_dfraw.loc[:, g_target_name].values
# データプリプロセス
g_scaler = StandardScaler()
g_scaler.fit(g_Xraw)
g_X = g_scaler.transform(g_Xraw)
# データ解析
g_pca = PCA(2)
g_pca.fit(g_X)
g_X_pca = g_pca.transform(g_X)


In [None]:
def plot_data2d(X, y0=None, title:str=None, metadata: dict = G_METADATA,
               labelfontsize=15, tickfontsize=15):
    """plot explanatory variables and optionally target variable.

    Args:
        X (np.array): descriptors
        y0 (np.array, optional): target variable. Defaults to None.
        title (str, optional): 図タイトル. Defaults to None.
        metadata (dict): 表示用データ. Defaults to G_METADATA.
        labelfontsize (int, optional): label font size. Defaults to 15.
        tickfontsize (int, optional): ticks font size. Defaults to 15.
    """
    fig, ax = plt.subplots(figsize=(5, 5))
    if y0 is not None:
        y = y0.ravel()
        ax.scatter(X[:, 0], X[:, 1], c=y, linewidths=0, s=30)
    else:
        ax.scatter(X[:, 0], X[:, 1], linewidths=0, s=30)
    ax.set_xlabel("axis 1", fontsize=labelfontsize)
    ax.set_ylabel("axis 2", fontsize=labelfontsize)
    if title is not None:
        ax.set_title(title)
        
    ax.tick_params(axis = 'x', labelsize =tickfontsize)
    ax.tick_params(axis = 'y', labelsize =tickfontsize)
    
    fig.tight_layout()
    filename = "_".join([metadata["prefix"],metadata["dataname"],str(title)])+".png"
    print(filename)
    fig.tight_layout()
    fig.savefig(os.path.join(metadata["outputdir"], filename))
    # fig.show()
    plt.show()
    
plot_data2d(g_X, g_label)


In [None]:
print("Scaled original data")
plot_data2d(g_X, g_label, title="original")
print ("PCA transformed data")
plot_data2d(g_X_pca, g_label, title="transformed")