#### [ TEST ]


[1] 모듈 로딩 및 데이터 준비<hr>

In [10]:
## [1-1] 모듈 로딩
import cv2                           # 컴퓨터비젼 모듈
import numpy as np                   # 이미지 데이터 저장 모듈
import matplotlib.pyplot as plt      # 시각화 모듈
import os                            # 파일, 폴더, 경로 관련 모듈

In [None]:
## [1-2] 데이터 준비
FILE_CSV = '../ML_CV/data/cat_dog.csv'

In [None]:
if os.path.exists(FILE_CSV):
    print(f'{FILE_CSV} Dataset 파일이 없습니다.')

[2] 이미지 데이터 로딩 <hr>

In [None]:
## [2-1] 피쳐와 타겟 분리
import pandas as pd 

catdogDF=pd.read_csv(FILE_CSV, header=None)
catdogDF.info()

[3] 학습 준비<hr>
- 피쳐와 타겟 분리
- 피쳐 스케일링
- 학습용, 테스트용 데이터셋 분리

In [None]:
## [3-1] 피쳐 & 타겟 분리
featureDF = catdogDF[catdogDF.columns[1:]]
targetSR = catdogDF[catdogDF.columns[0]]

print(f'featureDF : {featureDF.shape}   targetSR : {targetSR.shape}')

featureDF : (2431, 2500)   targetSR : (2431,)


In [28]:
## [3-2] 피쳐 정규화
featureDF = featureDF/255.
featureDF.describe()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,2491,2492,2493,2494,2495,2496,2497,2498,2499,2500
count,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,...,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0,2431.0
mean,0.000779,0.000918,0.000924,0.00093,0.000935,0.000941,0.00094,0.000947,0.000949,0.000953,...,0.001501,0.001275,0.001228,0.001221,0.001214,0.001216,0.001215,0.001203,0.001189,0.001188
std,0.001268,0.00133,0.001339,0.001342,0.001344,0.001342,0.001342,0.00135,0.001351,0.001359,...,0.001428,0.001381,0.001414,0.001408,0.001404,0.001406,0.001409,0.001405,0.00139,0.001393
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.001323,0.000584,0.000384,0.000369,0.0004,0.000369,0.000323,0.000354,0.000369,0.000338
75%,0.00143,0.001838,0.001799,0.001899,0.001907,0.001953,0.001892,0.001961,0.001953,0.001992,...,0.002899,0.002537,0.002522,0.002476,0.002491,0.002491,0.00253,0.002507,0.00243,0.002445
max,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,...,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922,0.003922


In [29]:
## [3-3] 학습용, 테스트용 데이터셋 분리
from sklearn.model_selection import train_test_split

x_train ,x_test, y_train, y_test = train_test_split(featureDF, targetSR,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=targetSR)

In [30]:
print(f'x_train : {x_train.shape}   y_train : {y_train.shape}')
print(f'x_test : {x_test.shape}   y_test : {y_test.shape}')

x_train : (1944, 2500)   y_train : (1944,)
x_test : (487, 2500)   y_test : (487,)


[4] 학습 진행<hr>
- 최적 모델 체크
- 기준선 설정 
- 학습 진행

In [32]:
## [4-1] 최적 모델 체크
from sklearn.utils.discovery import *
from sklearn.metrics import *
import warnings

# {“classifier”, “regressor”, “cluster”, “transformer”} 
rets=all_estimators(type_filter='classifier')

In [37]:
resultList=[]
for name, estimator_ in rets:
    try:
        model=estimator_()
        if 'Logistic' in name or 'SGD' in name or 'MLP' in name:
            model.set_params(max_iter=10000)
        if 'SV' in name:
            model.set_params(max_iter=100000, dual='auto')   
 
        model.fit(x_train, y_train)

        trainScore= model.score(x_train, y_train)
        testScore = model.score(x_test, y_test)

        resultList.append((name, round(trainScore, 3), round(testScore, 3)))
    except Exception:
        pass



In [39]:
sorted(resultList, key=lambda x : x[2], reverse=True)

[('HistGradientBoostingClassifier', 1.0, 0.885),
 ('ExtraTreesClassifier', 1.0, 0.858),
 ('RandomForestClassifier', 1.0, 0.836),
 ('GradientBoostingClassifier', 0.975, 0.834),
 ('BaggingClassifier', 0.993, 0.799),
 ('DecisionTreeClassifier', 1.0, 0.789),
 ('MLPClassifier', 0.977, 0.776),
 ('AdaBoostClassifier', 0.764, 0.729),
 ('ExtraTreeClassifier', 1.0, 0.715),
 ('KNeighborsClassifier', 0.769, 0.694),
 ('GaussianNB', 0.684, 0.684),
 ('QuadraticDiscriminantAnalysis', 1.0, 0.669),
 ('CalibratedClassifierCV', 0.645, 0.665),
 ('LinearDiscriminantAnalysis', 0.998, 0.659),
 ('RidgeClassifierCV', 0.635, 0.647),
 ('NearestCentroid', 0.635, 0.628),
 ('BernoulliNB', 0.588, 0.598),
 ('LinearSVC', 0.583, 0.577),
 ('RidgeClassifier', 0.57, 0.573),
 ('CategoricalNB', 0.566, 0.567),
 ('DummyClassifier', 0.566, 0.567),
 ('GaussianProcessClassifier', 0.566, 0.567),
 ('LabelPropagation', 0.566, 0.567),
 ('LabelSpreading', 0.566, 0.567),
 ('LogisticRegression', 0.566, 0.567),
 ('LogisticRegressionCV', 

In [40]:
## [4-1] 최적 모델 체크
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(x_train , y_train)

In [45]:
from sklearn.metrics import classification_report

trainScore= model.score(x_train, y_train)
testScore = model.score(x_test, y_test)
print('[TRAIN]' , trainScore , '[TEST]' , testScore)

pre = model.predict(x_test)
resultDict=classification_report(y_test, pre)
print(resultDict)

[TRAIN] 1.0 [TEST] 0.8521560574948666
              precision    recall  f1-score   support

         Dog       0.86      0.89      0.87       276
         cat       0.85      0.81      0.83       211

    accuracy                           0.85       487
   macro avg       0.85      0.85      0.85       487
weighted avg       0.85      0.85      0.85       487



[5] 학습 및 튜닝 진행 <hr>