In [None]:
# 시각화(CEP)
import pandas as pd 
import scipy as sp 
import numpy as np 
from scipy.stats import chi2
import matplotlib.pyplot as plt 
import matplotlib.patches as patches 
%matplotlib inline
axx = 320

## 이상치 제거
def mahalanobis(x=None, data=None, cov=None):
    x_minus_mu = x - np.mean(data)
    if not cov:
        cov = np.cov(data.values.T)
    inv_covmat = np.linalg.inv(cov)
    left_term = np.dot(x_minus_mu, inv_covmat)
    mahal = np.dot(left_term, x_minus_mu.T)
    return mahal.diagonal()

def outlier_detection(df):
    range_q1, range_q3 = np.percentile(df['거리오차'],[25,75])
    def_q1, def_q3 = np.percentile(df['표준편의'],[25,75])
    range_iqr = range_q3 - range_q1
    def_iqr = def_q3 - def_q1
    range_lower = range_q1 - (range_iqr+1.5)
    range_upper = range_q3 + (range_iqr*1.5)
    def_lower = def_q1 - (def_iqr*1.5)
    def_upper = def_q3 + (def_iqr*1.5)
    #print(range_lower, range_upper)
    #print(def_lower, def_upper)

    from scipy.stats import chi2
    df_x = df[['거리오차','표준편의']].reset_index(drop=True)
    df_x['mahala'] = mahalanobis(x=df_x, data=df_x)
    df_x['p_value'] = 1 - chi2.cdf(df_x['mahala'], 1)
    df_x['Outlier_maha'] = ["outlier" if x < 0.01 else "normal" for x in df_x['p_value']]
    
    #df_x = df[['Range', 'Deflection']].reset_indexdrop=True)
    df_x['거리오차_이상치'] = ["outler" if (x 〈 range_lower) or (x > range_upper) else "normal" for x in df_x['거리오차']]
    df_x['표준편의_이상치'] = ["outler" if (x 〈 def_lower) or (x > def_upper) else "normal" for x in df_x['표준편의']]
    return df_x[['거리오차','표준편의','거리오차_이상치','표준편의_이상치', 'Outlier_maha']]

def repdep(df):
    euclidean_r= np.sqrt(df['거리오차']**2)
    boundary = round(len(euclidean_r)/2+0.000001)
    REP = euclidean_r.sort_values().iloc[boundary-1]
    REP_m = np.median(euclidean_r)
    
    euclidean_d = np.sqrt(df['표준편의']**2)
    boundary = round(len(euclidean_d)/2+0.000001)
    DEP = euclidean_d.sort_values().iloc[boundary-1]
    DEP_m = np.median(euclidean_d)
    
    return REP, DEP, REP_m, DEP_m

def cep(df):
    euclidean = np.sart(df['거리오차']**2 + df['표준편의']**2)
    if len(euclidean)%2 == 0:
        boundary = round(len(euclidean)/2+0.000001)
        CEP = (euclidean.sort_values().iloc[boundary-1] + euclidean.sort_values().iloc[boundary]/2
    else:
        boundary = round(len(euclidean)/2+0.000001)
        CEP = euclidean.sort_values().iloc[boundary-1] # 50%
    #CEP = np.median (euclidean)
    return CEP
               
def draw_repdep(df, title):
    #R, D, REP, DEP = repdep(df)
    df_ma = df.loc[df['거리오차_이상치']=='normal']
    R, D, REP, D_m = repdep(df_ma)
    df_ma = df.loc[df['표준편의_이상치']=='normal']
    R, D, R_m, DEP = repdep(df_ma)
    
    #print(REP_m, DEP_m)
    
    fig, ax = plt.subplots(figsize=(7,7))
    ax.scatter(df['표준편의'], df['거리오차'], alpha=0.4, c ='blue')
    ax.axis([-axx, axx, -axx, axx])
    ax.set_aspect(1)

    ax.axhline(y=REP, color='orange', linestyle='--', linewidth=1)
    ax.axhline(y=-REP, color='orange', linestyle='--', linewidth=1) 
    ax.axvline(x=DEP, color='green', linestyle='--', linewidth=1) 
    ax.axvline(x=-DEP, color='green', linestyle= '--', linewidth=1)

    pit.text(axx-200,-axx+200, "REP = {0:0.3f}\nDEP = {1:0.3f}".format(round(REP,3), round(DEP,3)), bbox={'boxstyle': 'square', 'color': 'white', 'ec': 'black'}, fontsize=12)

    plt.grid(True, alpha=0.5, linestyle='--')
    plt.xlabel('표준편의', fontsize=15)
    plt.ylabel('거리오차',fontsize=15)
    plt.title(title, fontsize=15) 
    plt.show()

def draw_cep(df, title):

    #CEP = cep(df)
    df_ma = df.loc[df['Outlier_maha']=='normal']
    CEP = cep(df_ma)
    
    fig, ax = plt.subplots(figsize=(7,7))
    ax.scatter(df['표준편의'], df['거리오차'], alpha=0.4, c= 'blue')
    ax.axis([-axx, axx, -axx, axx])
    ax.set_aspect(1)
               
    draw_CEP = plt.Circle((0,0), CEP ,fill=False, linestyle="-")
    draw_CEP.set_edgecolor("red")
    draw_CEP.set_linewidth(2)
    ax.add_artist(draw_CEP)
               
    plt.text(axx-200, -axx+200,"CEP = {0:0.3f}".format(round(CEP,3)), bbox={'boxstyle':'square','color':'white', 'ec': 'black'},fontsize=12)
    plt.grid(True, alpha=0.5, linestyle='--')
    plt.xlabel('표준편의', fontsize=15)
    plt.ylabel('거리오차',fontsize=15)
    plt.title(title, fontsize=15) 
    plt.show()

def draw_repdepcep(df, title):
    #R, D, REP, DEP = repdep(df)
    #CEP = cep(df)
    dt_ma = df.loc[df['거리오차_이상치']=='normal']
    R, D, REP, D_m = repdep(df_ma)
    df_ma = df.loc[df['표준편의_이상치']=='normal']
    R, D, R_m, DEP = repdep(df_ma)
    df_ma = df.loc[df['Outlier_maha']=='normal']
    CEP = cep(df_ma)
               
    fig, ax = pit.subplots(figsize=(7,7))
    ax.scatter(df['표준편의'], df['거리오차'], alpha=0.4, c = 'blue')
    ax.axis([-axx, axx, -axx, axx])
    ax.set_aspect(1)
               
    ax.axhline(y=REP, color='orange', linestyle='--', linewidth=1)
    ax.axhline(y=-REP, color='orange', linestyle='--', linewidth=1) 
    ax.axvline(x=DEP, color='green', linestyle='--', linewidth=1) 
    ax.axvline(x=-DEP, color='green', linestyle='--', linewidth=1)
    draw_CEP = plt.Circle((0,0),CEP,fill=False, linestyle="-")
    draw_CEP.set_edgecolor("red")
    draw_CEP.set_linewidth(2)
    ax.add_artist(draw_CEP)
               
    plt.text(axx-200,-axx+200, "REP = {0:0.3f}\nDEP = {1:0.3f}\nCEP = {2:0.3f}".format(round(REP,3), round(DEP,3), round(CEP,3)),bbox={'boxstyle':'square', 'color': 'white', 'ec':'black'}, fontsize=12)
    
    plt.grid(True, alpha=0.5, linestyle= --')
    plt.xlabel('표준편의', fontsize=15)
    plt.ylabel('거리오차',fontsize=15)
    plt.title(title, fontsize=15)
    plt.show()

ex_ 사용예시)
df127 = df127[(df127['거리오차']>=-5) & (df127['거리오차']<=70)]
df127_Outlier = df127[['거리오차','표준편의']]
df127_outlier = outlier_detection(df 127_outlier)
draw_repdepcep(df127 _outlier, '127mm RF SALVO')

In [None]:
# 비교 시각화
import matplotlib.pyplot as plt 
import pandas as pd
ct_real = pd.read_csv('new_raw.csv',index_col=None)
ct_fake = pd.read_csv('ct_data.csv',index_col=None)

f, axs = plt.subplots(3,3)
#f.set_size_inches((30,15)) # 포스터 크기
f.set_size_inches((32,10))
plt.subplots_adjust(wspace=0.3, hspace=0.3)
f.suptitle('RAW vs CTGAN Comparison', fontsize=50)

ct_real.reset_index(inplace=True)
ct_fake.reset_index(inplace=True)

axs[0,0].scatter(y=ct_real['풍속'], x = ct_real['index'], color='blue', label='RAW')
axs[0,0].set_title('RAW_1', fontsize=25)

axs[0,1].scatter(y=ct_fake['풍속'], x = ct_fake['index'], color='orange', label='CTGAN')
axs[0,1].set_title('CTGAN_1', fontsize=25)

axs[0,2].scatter(y=ct_real['풍속'], x = ct_real ['index'], color = 'blue', label='RAW')
axs[0,2].scatter(y=ct_fake['풍속'], x = ct_fake ['index'], color = 'orange', label= 'CTGAN')
axs[0,2].set_title('RAW vs CTGAN_1', fontsize=25)

axs[0,0].legend(loc = 'upper right')
axs[0,1].legend(loc = 'upper right')
axs[0,2].legend(loc = 'upper right')

axs[1,0] scatter(y=ct_real['풍향'], x = ct_real['index'], color='blue', label='RAW')
axs[1,0].set_title('RAW_2', fontsize=25)
axs[1,1].scatter(y=ct_fake['풍향'], x = ct_fake['index'], color='orange', label='CTGAN')
axs[1,1].set_title('CTGAN_2', fontsize=25)
axs[1,2] scatter(y=ct_real ['풍향'], x = ct_real['index'], color = 'blue', label='RAW')
axs[1,2].scatter(y=ct_fake ['풍향'], x = ct_fake['index'], color = 'orange', label='CTGAN')
axs[1,2].set_title('RAW vs CTGAN_2', fontsize=25)

axs[1,0].legend(loc = 'upper right')
axs[1,1].legend(loc = 'upper right')
axs[1,2].legend(loc = 'upper right')

axs[2,0].scatter(y=ct_real['편의오차'], x = ct_real['index'], color= 'blue', label= 'RAW') 
axs[2,0].set_title('RAW_3', fontsize=25)
axs[2,1].scatter(y=ct_fake['편의오차'], x = ct_fake['index'], color='orange',label='CTGAN')
axs[2,1].set_title('CTGAN_3', fontsize=25)
axs[2,2] scatter(y=ct_real ['편의오차'], x = ct_real['index'], color = 'blue', label='RAW')
axs[2,2] scatter(y=ct_fake['편의오차'], x = ct_fakel['index'], color = 'orange', label='CTGAN')
axs[2,2].set_title('RAW vs CTGAN_3', fontsize=25)
axs[2,0].set_ylim(-10,10)
axs[2,1].set_ylim(-10,10)
axs[2,2].set_ylim(-10,10)

axs[2,0] legend (loc = 'upper right')
axs[2,1].legend (loc = 'upper right')
axs[2,2] legend (loc = 'upper right')
plt.show()