In [44]:
import pandas as pd
import numpy as np
import os
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from IPython.display import display, HTML

In [6]:
import matplotlib.pyplot as plt
pd.options.display.float_format = '{:0,.3f}'.format
from pyensae.graphhelper import Corrplot

In [9]:
import seaborn as sns
from matplotlib.colors import ListedColormap
import statsmodels.api as sm

In [14]:
input_path = 'sample_data/2.pearson_사출_labeled_data.csv'

In [15]:
_, file_type = os.path.splitext(input_path)
if 'xls' in file_type:
    input_df = pd.read_excel(input_path)
    input_df = input_df.dropna()
elif 'csv':
    input_df = pd.read_csv(input_path)
    input_df = input_df.fillna('0.0')
elif 'txt':
    input_df = pd.read_csv(input_path)
    input_df = input_df.fillna('0.0')
else:
    print('unknown format')

In [49]:
display(input_df.columns.tolist())

['_id',
 'TimeStamp',
 'PART_FACT_PLAN_DATE',
 'PART_FACT_SERIAL',
 'PART_NAME',
 'EQUIP_CD',
 'EQUIP_NAME',
 'PassOrFail',
 'Reason',
 'Injection_Time',
 'Filling_Time',
 'Plasticizing_Time',
 'Cycle_Time',
 'Clamp_Close_Time',
 'Cushion_Position',
 'Switch_Over_Position',
 'Plasticizing_Position',
 'Clamp_Open_Position',
 'Max_Injection_Speed',
 'Max_Screw_RPM',
 'Average_Screw_RPM',
 'Max_Injection_Pressure',
 'Max_Switch_Over_Pressure',
 'Max_Back_Pressure',
 'Average_Back_Pressure',
 'Barrel_Temperature_1',
 'Barrel_Temperature_2',
 'Barrel_Temperature_3',
 'Barrel_Temperature_4',
 'Barrel_Temperature_5',
 'Barrel_Temperature_6',
 'Barrel_Temperature_7',
 'Hopper_Temperature',
 'Mold_Temperature_1',
 'Mold_Temperature_2',
 'Mold_Temperature_3',
 'Mold_Temperature_4',
 'Mold_Temperature_5',
 'Mold_Temperature_6',
 'Mold_Temperature_7',
 'Mold_Temperature_8',
 'Mold_Temperature_9',
 'Mold_Temperature_10',
 'Mold_Temperature_11',
 'Mold_Temperature_12']

In [20]:
input_df.head()

Unnamed: 0,_id,TimeStamp,PART_FACT_PLAN_DATE,PART_FACT_SERIAL,PART_NAME,EQUIP_CD,EQUIP_NAME,PassOrFail,Reason,Injection_Time,...,Mold_Temperature_3,Mold_Temperature_4,Mold_Temperature_5,Mold_Temperature_6,Mold_Temperature_7,Mold_Temperature_8,Mold_Temperature_9,Mold_Temperature_10,Mold_Temperature_11,Mold_Temperature_12
0,5f8928bb9c0189cc666ef19b,2020-10-16 04:57:47,2020-10-16 오전 12:00:00,24,CN7 W/S SIDE MLD'G RH,S14,650톤-우진2호기,Y,0.0,9.59,...,24.8,27.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5f8928de9c0189cc666ef20b,2020-10-16 04:58:48,2020-10-16 오전 12:00:00,24,CN7 W/S SIDE MLD'G RH,S14,650톤-우진2호기,Y,0.0,9.6,...,24.8,27.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5f8928df9c0189cc666ef213,2020-10-16 04:58:48,2020-10-16 오전 12:00:00,23,CN7 W/S SIDE MLD'G LH,S14,650톤-우진2호기,Y,0.0,9.6,...,24.8,27.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5f8928f39c0189cc666ef25e,2020-10-16 04:59:48,2020-10-16 오전 12:00:00,23,CN7 W/S SIDE MLD'G LH,S14,650톤-우진2호기,Y,0.0,9.59,...,25.0,27.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5f8928f59c0189cc666ef265,2020-10-16 04:59:48,2020-10-16 오전 12:00:00,24,CN7 W/S SIDE MLD'G RH,S14,650톤-우진2호기,Y,0.0,9.59,...,25.0,27.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
plt.rc('font', family='NanumGothic')

src_col_list = ['Average_Back_Pressure', 'Barrel_Temperature_2']
dst_col_list = src_col_list

In [39]:
tbl=[]
allval={}
for src_col in src_col_list:
    for dst_col in dst_col_list:
        if src_col != dst_col:
            allval[src_col+dst_col+'_pearson']='%0.3f'%(input_df[src_col].corr(input_df[dst_col]))

metrics = ['mean','std', 'count']
metrics_label = ['평균','표준편차', '총 개수']
metrics_fmt = ['{0:,.3f}','{0:,.3f}', '{0:,.0f}']
for col_name in src_col_list:
    r = {"변수":col_name}
    for idx, metric in enumerate(metrics):
        r[metrics_label[idx]] = metrics_fmt[idx].format(input_df.describe().loc[metric,col_name])
        allval[col_name+"_"+metric]=r[metrics_label[idx]]
    tbl.append(r)
pd.DataFrame(tbl)

Unnamed: 0,변수,평균,표준편차,총 개수
0,Average_Back_Pressure,59.579,3.403,7996
1,Barrel_Temperature_2,276.706,4.089,7996


In [45]:
corr_df = input_df[src_col_list].corr()

In [46]:
display(corr_df)

Unnamed: 0,Average_Back_Pressure,Barrel_Temperature_2
Average_Back_Pressure,1.0,0.559
Barrel_Temperature_2,0.559,1.0
