In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
# import sklearn
# print("sklearn version: ", sklearn.__version__)
# assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
    IS_COLAB = True
except Exception:
    IS_COLAB = False

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
print("TF version: ", tf.__version__)
assert tf.__version__ >= "2.0"

if not tf.config.list_physical_devices('GPU'):
    print("No GPU was detected. CNNs can be very slow without a GPU.")
    if IS_COLAB:
        print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")

# GPU test
print("GPU installed: ",tf.test.is_built_with_gpu_support())

# To prevent "CUDNN_STATUS_ALLOC_FAILED" error with GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)
    
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "cnn"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
    
# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")    

TF version:  2.4.0
GPU installed:  True
1 Physical GPUs, 1 Logical GPUs


In [6]:
import numpy as np
import pandas as pd
import os
import time
# import scoring as scoring
import pickle
import gzip
from pyarrow import csv
import csv
# train_path = "D:/2022AIComp_data/train.csv"
# test_path = "D:/2022AIComp_data/test.csv"

#---------------------- Load Train,Test DF
train_pd = pd.read_csv("D:/2022AIComp_data/train.csv")
test_pd = pd.read_csv("D:/2022AIComp_data/test.csv")

print("Train: %s, Test: %s" %(train_pd.shape, test_pd.shape))
train_pd.head()
print(train_pd.iloc[:,0].unique())
# test_pd.head()

def remake_data():
    X = np.array(train_pd.iloc[:,1:])
    y = train_pd.iloc[:,0].replace(['out','in', 'normal', 'other', 'noise'],[0,1,2,3,4])
    # y_train = np.argmax(np.array(pd.get_dummies(train_pd.iloc[:,0])),axis=1)

    submit_test = np.array(test_pd.iloc[:,1:])

    # y_test = np.array(pd.get_dummies(test_pd.iloc[:,0]))
    #data Normalize

    return X,y,submit_test

Train: (33600, 514), Test: (7820, 514)
['out' 'in' 'normal' 'other' 'noise']


In [57]:
import sklearn.model_selection
from sklearn.model_selection import train_test_split


# from sklearn import model_selection
class PreProcessing():
#     def __init__(self,**kwargs):


    def data_shape(self,*data_li,local = None):
        for data in data_li:
            vnames = [name for name in globals() if globals()[name] is data ]
            if len(vnames) != 0:
                print(vnames[0],'.shape : ',data.shape)
            else:
                if local == None: local = locals()
                vnames = [name for name in local if local[name] is data]
                print(vnames[0],'.shape : ',data.shape)
        print('\n')

    
    def data_reshape(self, *args, **kwargs):
        reshaped = []
        for data in args:
#             print(data.shape)
            reshaped.append(data.reshape((-1,27,19,1)))
        return reshaped
    
    
    def split(self,X,y):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state = 42, stratify = y)
        return X_train, X_test, y_train, y_test
    
    def make_label_set(self, X,y,label):
        X_sub=[]
        y_sub=[]
        for i in label:
            X_sub.append(X[y==i])
            y_sub.append(y[y==i])
        

        X=np.concatenate(tuple(X_sub),axis = 0)
        y=np.concatenate(tuple(y_sub),axis = 0)
        y=np.array(pd.get_dummies(y))
        
        return X,y
    
    def pipeline(self, **kwargs):
        X=kwargs.get('X')
        y=kwargs.get('y')
        submit=kwargs.get('submit')
        shape = kwargs.get('shape')
        label = kwargs.get('label')
        
        X,y=self.make_label_set(X,y,label)
        self.data_shape(X,y,local = locals())
        
        if shape is not None:
#             print(shape)
#             print(len(data))
            X,submit = self.data_reshape(X,submit,shape = shape)
#             print(len(res))
            
        X_train,X_test,y_train,y_test = self.split(X,y)
        self.data_shape(X_train,X_test,y_train,y_test,submit,local = locals())
        
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.submit = submit
        
        return X_train,X_test,y_train,y_test,submit

In [None]:
#feature 뽑아서 normal x , in,out 만
batch_size= 64
shape = (-1,27,19,1)

X,y,submit = remake_data()



X .shape :  (3600, 513)
y .shape :  (3600, 2)


X_train .shape :  (2880, 27, 19, 1)
X_test .shape :  (720, 27, 19, 1)
y_train .shape :  (2880, 2)
y_test .shape :  (720, 2)
submit .shape :  (7820, 27, 19, 1)


Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_16 (Batc (None, 27, 19, 1)         4         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 27, 19, 16)        416       
_________________________________________________________________
batch_normalization_17 (Batc (None, 27, 19, 16)        64        
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 13, 9, 16)         0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 13, 9, 64)         25664     
___________________________________________

KeyboardInterrupt: 

In [59]:
from tensorflow import keras
from datetime import datetime
from tensorflow.keras import layers, models
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint
import time
from sklearn.model_selection import train_test_split

class Train_model(PreProcessing):
    
    def __init__(self,*args,**kwargs):
        self.args = args,
        self.kwargs = kwargs
#         self.model = model
        self.batch_size = self.get('batch_size',64)
        self.epoch = self.get('epoch',100)
        self.class_weight = self.get('class_weight',True)
        self.loss = self.get('loss','categorical_crossentropy')
        self.optimizer  = self.get('optimizer','sgd')
        self.patience = self.get('patience',20)
#         self.X_train = X_train
#         self.y_train = y_train
#         self.X_test = X_test
#         self.y_test = y_test
#         self.submit = submit
        
    def get(self, instance_name,default):
        return self.kwargs.get(instance_name,default)
    
    def this_time(self):
        c = datetime.now() ##모델 돌릴 때 시작 시간
        n_time = c.strftime('%Y-%m-%d_%H_%M')
        print(n_time)
        return(n_time)
    
    def model_save_path(self):
        self.n_time = self.this_time()
        MODEL_SAVE_FOLDER_PATH = './model_flow/'  #모델 저장 경로
        if not os.path.exists(MODEL_SAVE_FOLDER_PATH): 
            os.mkdir(MODEL_SAVE_FOLDER_PATH) 
        model_path = MODEL_SAVE_FOLDER_PATH + str(self.n_time)+'.hdf5'
        return model_path
        
    def early_modelcheck(self, model_path):
            es = EarlyStopping(monitor='val_loss', mode='min', 
                       verbose=1, patience=self.patience, restore_best_weights = True)

            mc = ModelCheckpoint(model_path, monitor='val_loss',
                         mode='min', save_best_only=True)
            return es,mc
        
    def calculate_class_weight(self,y_train):
        
        
#         classes_in = np.argmax(y_train,axis =1)
#         print(classes_in)
        class_weights = compute_class_weight(
            class_weight = 'balanced',classes = np.unique(y_train),y = y_train)
        class_weights = dict(enumerate(class_weights))
        return class_weights
    
    #   model,X_train,y_train,X_test,y_test = args
    def compile_fit(self, *args, **kwargs): 
        loss = kwargs.pop('loss',self.loss)
        optimizer = kwargs.pop('optimizer',self.optimizer)
        
        label= self.get('label',None)
        
        model,X_train,y_train,X_test,y_test = args
        
        model_path = self.model_save_path()
        es,mc = self.early_modelcheck(model_path)
        
        
        class_weights = self.calculate_class_weight(y_train = np.argmax(y_train,axis =1))
        
        model.compile(loss=loss,optimizer = optimizer,metrics=['accuracy'])
        
        history = model.fit(X_train,y_train, epochs = self.epoch,
                     batch_size=self.batch_size, validation_split=0.2, 
                                callbacks=[es,mc],
                               class_weight = class_weights,
                                shuffle=True,
                       ).history
        
        print('\n-----test score : ',model.evaluate(X_test, y_test))
        
        self.learning_curve(history)
        
        y_pred = self.predict_label(model,X_test,y_test,label)
        
        return model,y_pred
        
    def learning_curve(self,history):
    
        #러닝 커브
        plt.plot(history['accuracy'], label='train_acc')
        plt.plot(history['val_accuracy'], label= 'val_acc')
        plt.plot(history['loss'], label= 'train_loss')
        plt.plot(history['val_loss'], label= 'val_loss')
        plt.title('Learning Curve')
        plt.xlabel('Epoch')
        plt.legend(loc='lower left')
        plt.show()
    
    # model, X_train,y_train,X_test,y_test =args
    def train_pipeline(self,*args,**kwargs):
        model = self.compile_fit(self,*args,loss=self.loss,
                                 optimizer = self.optimizer)
        
        y_pred = self.predict_label(model,*args,**kargs)
        
    
    #
    def predict_label(self,model,X_test,y_test,label=None):
        
        self.data_shape(X_test,y_test,local=locals())
        y_pred = model.predict(X_test)
        if label != None:
            y_pred_re = real_pred(y_pred,label)
        else:
            y_pred_re =y_pred
        self.show_matrix(y_test,y_pred)

        return y_pred_re
    
    def real_pred(y_pred,label):
        for la_be, la_af in enumerate(label):
            y_pred[y_pred==la_be]=la_af
        return y_pred
    
    def show_matrix(self,y_test,y_pred):
        
        y_pred = np.argmax(y_pred,axis =1)
        if len(y_test[0])!=1:
            y_test = np.argmax(y_test,axis =1)

        cf = confusion_matrix(y_test, y_pred)

        per_cf =[]
        for i in cf:
            per_cf.append(i/np.sum(i))
        axes=[]
    #     plt.rc('font', size=10)
        for i , cf in enumerate([cf, per_cf]):
            fig=plt.figure(figsize=(8,8))
            axes.append(fig.add_subplot(2,2,i+1))
            ax = sns.heatmap(np.round(cf,4),annot=True, fmt='', cmap='Blues')#확률로 표시
            ax.set_title('Seaborn Confusion Matrix with labels\n\n');
            ax.set_xlabel('\nPredicted Values')
            ax.set_ylabel('Actual Values ');
            fig.tight_layout()
            ## Display the visualiztion of the Confusion Matrix.
            plt.show()
        print('model_name : ', self.n_time)
    
    
    def pred_sum(self,*args, **kwargs):
        model= kwargs.pop('model',self.model)
        
    
    def make_test_csv(self, model,submit_test,model_name,reshape=False):
        sample_pd = pd.read_csv("D:/2022AIComp_data/sample_submission.csv")
        sample_pd.set_index('id',inplace=True)

        if reshape == True:
            submit_te = submit_test.reshape((-1,27,19,1))
        else:
            submit_te=submit_test
        pred = np.argmax(model.predict(submit_te),axis = 1)
        print(pred.shape)
        sub = pd.concat([test_pd.iloc[:,0],
                               pd.DataFrame(pred,columns=['leaktype']).replace([0,1,2,3,4],['out','in', 'normal', 'other', 'noise'])],axis=1)

        sub.set_index('id',inplace=True)
        submit_pd = sub.reindex(sample_pd.index)
        print('제출 할 csv 클래스 별 분포 : ', submit_pd.value_counts())
        plt.hist(np.array(submit_pd))
        plt.show()

        MODEL_SAVE_FOLDER_PATH = './result_csv/' 
        if not os.path.exists(MODEL_SAVE_FOLDER_PATH): 
            os.mkdir(MODEL_SAVE_FOLDER_PATH) 
        submit_pd.to_csv(MODEL_SAVE_FOLDER_PATH + model_name[:-5]+'.csv')
        print(model_name)
        return submit_pd

In [None]:
#feature 뽑아서 normal x , in,out 만
batch_size= 64
shape = (-1,27,19,1)
label = [0,1]
out_num = len(label)
X,y,submit = remake_data()
# X=np.concatenate((X[:3600,1:140],X[:3600,240:321],X[:3600,400:500]),axis = 1)
# submit_test=np.concatenate((submit_test[:,1:140],submit_test[:,240:321],submit_test[:,400:500]),axis = 1)
# X=X[-12000:]
# y= y[-12000:]
# print(X.reshape(shape).shape)


# y_train = ohe.fit_transform(y_train.values.reshape(-1,1))
prep = {'X' : X, 'y': y, 'submit': submit,'shape' : shape,'label':label }
p=PreProcessing()
X_train,X_test,y_train,y_test,submit = p.pipeline(**prep)



input_shape = (X_train.shape[1],X_train.shape[2],1)
model = keras.models.Sequential([
    keras.Input(shape=input_shape),
    layers.BatchNormalization(),
#     keras.layers.experimental.preprocessing.Resizing(32,32),
    keras.layers.Conv2D(16, kernel_size=5, strides=1,  activation='relu', padding='same'), #C11
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    keras.layers.Conv2D(64, kernel_size=5, strides=1, activation='relu', padding='same'), #C3
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    keras.layers.Conv2D(128, kernel_size=5, strides=1, activation='relu', padding='same'), #C5
    layers.BatchNormalization(),
    keras.layers.Flatten(), #Flatten    
    keras.layers.Dense(64, activation='relu'), #F6
    keras.layers.Dense(out_num, activation='softmax') #Output layer
    ])

model.summary()


train_par ={
#     'model':model, 'X_train':X_train, 'y_train':y_train, 'X_test':X_test,
#         'y_test':y_test, 'submit':submit,
        'batch_size':64, 'optimizer':'sgd', 'epoch':100, 'class_weight':True,
        'loss': 'categorical_crossentropy','patience':20}

T=Train_model()
T.compile_fit(model, X_train,y_train,X_test,y_test)








# y_pred, submit = train_model(model,data,
#             batch_size=batch_size,patience = 20,optimizer = 'sgd',reshape=False,
#             class_weight=False,label_encoder = True)

X .shape :  (3600, 513)
y .shape :  (3600, 2)


X_train .shape :  (2880, 27, 19, 1)
X_test .shape :  (720, 27, 19, 1)
y_train .shape :  (2880, 2)
y_test .shape :  (720, 2)
submit .shape :  (7820, 27, 19, 1)


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_20 (Batc (None, 27, 19, 1)         4         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 27, 19, 16)        416       
_________________________________________________________________
batch_normalization_21 (Batc (None, 27, 19, 16)        64        
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 13, 9, 16)         0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 13, 9, 64)         25664     
___________________________________________