In [None]:
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# data取得
def get_data():
    """make daa

    Returns:
        pd.DataFrame: 説明変数データ
    """
    n = 100
    xy = []
    for i in np.linspace(-1, 1, 10):
        for j in np.linspace(-1, 1, 10):
            xy.append([i, j])
    df = pd.DataFrame(xy, columns=["x1", "x2"])
    return df

def plot_df_2d(df, labelfontsize=15, tickfontsize=15, filename=None ):
    """plot df 2D
    
    Args:
        df (pd.DataFrame): data.
        labelfontsize (int, optional): label font size. Defaults to 15.
        tickfontsize (int, optional): ticks font size. Defaults to 15.
    """
    g_df = get_data()
    fig, ax = plt.subplots()
    g_df.plot(x="x1", y="x2", kind="scatter", ax=ax)
    ax.set_xlabel("axis1", fontsize=labelfontsize)
    ax.set_ylabel("axis2", fontsize=labelfontsize)
    ax.tick_params(axis = 'x', labelsize =tickfontsize)
    ax.tick_params(axis = 'y', labelsize =tickfontsize)
    fig.tight_layout()
    if filename is not None:
        fig.savefig(filename)
    
g_df_raw = get_data()

plot_df_2d(g_df_raw, filename="image_executed/PCA_sample_anisotropicdata1_None.png")

In [None]:
# data追加作成
def add_data(df):
    """観測データの追加を行う．

    Args:
        df (pd.DataFrame): 観測データ

    Returns:
        pd.DataFrame: 追加した観測データ
    """
    mean = [0.5, 0.5]
    cov = 0.2**2*np.array([[1, 0.0], [0.0, 1]])
    xy = np.random.multivariate_normal(mean=mean, cov=cov, size=10)
    df2 = pd.DataFrame(xy, columns=["x1", "x2"])
    dfraw = pd.concat([df, df2], axis=0).reset_index(drop=True)
    return dfraw

g_df_raw = add_data(g_df_raw)

def save_fig(df, labelfontsize=15, tickfontsize=15, filename="image_executed/2dmesh_plus_random.png" ):
    """plot df 2D
    
    Args:
        df (pd.DataFrame): data.
        labelfontsize (int, optional): label font size. Defaults to 15.
        tickfontsize (int, optional): ticks font size. Defaults to 15.
        filename (str, optional): filename. Defaults to "image_executed/2dmesh_plus_random.png".
    """
    fig, ax = plt.subplots(figsize=(5,5))
    df.plot(x="x1", y="x2", kind="scatter", ax=ax)
    # df.to_csv("../data_calculated/2dmesh_plus_random.csv",index=False)
    ax.set_xlabel("axis1", fontsize=labelfontsize)
    ax.set_ylabel("axis2", fontsize=labelfontsize)
    ax.tick_params(axis = 'x', labelsize =tickfontsize)
    ax.tick_params(axis = 'y', labelsize =tickfontsize)
    fig.tight_layout()
    fig.savefig(filename)
    
save_fig(g_df_raw, )

In [None]:
g_desriptor_labels = ["x1", "x2"]
g_Xraw = g_df_raw.loc[:, g_desriptor_labels].values

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

def apply_pca(Xraw):
    """PCAを行う．

    Args:
        Xraw (np.ndarray): 説明変数

    Returns:
        np.ndarray: PCAで次元圧縮をした説明変数
    """
    # データプリプロセス
    scaler = StandardScaler()
    scaler.fit(Xraw)
    X = scaler.transform(Xraw)
    # データ解析
    pca = PCA(2)
    pca.fit(X)
    X_pca=pca.transform(X)
    return X_pca

g_X_pca = apply_pca(g_Xraw)

In [None]:
def show_X(X, labelfontsize=15, tickfontsize=15, 
           filename="image_executed/2dmesh_plus_random_transformed.png" ):
    """plot df 2D
    
    Args:
        X (pd.DataFrame): data.
        labelfontsize (int, optional): label font size. Defaults to 15.
        tickfontsize (int, optional): ticks font size. Defaults to 15.
        filename (str, optional): filename. Defaults to image_executed/2dmesh_plus_random_transformed.png".
    """
    fig, ax = plt.subplots(figsize=(5,5))
    ax.plot(X[:,0], X[:,1], ".")
    # g_dfraw.to_csv("../data_calculated/2dmesh_plus_random_transformed.csv",index=False)
    ax.set_xlabel("pca1")
    ax.set_ylabel("pca2")
    ax.set_xlabel("axis1", fontsize=labelfontsize)
    ax.set_ylabel("axis2", fontsize=labelfontsize)
    ax.tick_params(axis = 'x', labelsize =tickfontsize)
    ax.tick_params(axis = 'y', labelsize =tickfontsize)
    fig.tight_layout()
    print(filename)
    fig.savefig(filename)    
    
show_X(g_X_pca)