In [145]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential  # Functional API --> 기능이 많다. 로컬 연결
from keras.layers import Dense
from keras.backend import clear_session
from tensorflow.keras.optimizers import Adam
import numpy as np
import pandas as pd
import numpy as np

# Knowledge Distillation

https://keras.io/examples/vision/knowledge_distillation/#introduction-to-knowledge-distillation

In [146]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # 텐서플로가 첫 번째 GPU에 1GB 메모리만 할당하도록 제한
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=3072)])
  except RuntimeError as e:
    # 프로그램 시작시에 가상 장치가 설정되어야만 합니다
    print(e)

In [147]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super(Distiller, self).__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):
        """ Configure the distiller(증류기 구성)

        Args:
            optimizer: Keras optimizer for the student weights
            metrics: Keras metrics for evaluation
            student_loss_fn: Loss function of difference between student
                predictions and ground-truth
            distillation_loss_fn: Loss function of difference between soft
                student predictions and soft teacher predictions
            alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
            temperature: Temperature for softening probability distributions.
                Larger temperature gives softer distributions.
                
            1) 옵티마이저 : 학생 가중치를위한 Keras 옵티마이저
            2) 메트릭 : 평가를위한 Keras 메트릭
            3) student_loss_fn : 학생차의 손실 함수(예측값과 실제값)
            4) distillation_loss_fn : 연약한 차이의 손실 함수(소프트학생 예측 및 소프트교사 예측)
            5) alpha : student_loss_fn 및 1-alpha to distillation_loss_fn에 대한 가중치
            6) 온도 : 확률 분포를 연화시키기 위한 온도(더 큰 온도는 더 부드러운 분포를 제공)
            
        """
        super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        # Unpack data(데이터 언패킹)
        x, y = data

        # Forward pass of teacher(교사의 Foward Pass)
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student(학생의 Foward Pass)
            student_predictions = self.student(x, training=True)

            # Compute losses(Losses 계산)
            student_loss = self.student_loss_fn(y, student_predictions)
            distillation_loss = self.distillation_loss_fn(
                tf.nn.relu(teacher_predictions / self.temperature),
                tf.nn.relu(student_predictions / self.temperature),
            )
            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients(gradients 계산)
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights(가중치 업데이트)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`. (컴파일안에서 메트릭 업데이트)
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance(퍼포먼스 dictionary 리턴)
        results = {m.name: m.result() for m in self.metrics}
        results.update(
            {"student_loss": student_loss, "distillation_loss": distillation_loss}
        )
        return results

    def test_step(self, data):
        # Unpack the data(데이터 언패킹)
        x, y = data

        # Compute predictions(예측 수행)
        y_prediction = self.student(x, training=False)

        # Calculate the loss(loss 계산)
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.(메트릭 업데이트)
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance(퍼포먼스 dictionary 리턴)
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})
        return results

https://aakashgoel12.medium.com/how-to-add-user-defined-function-get-f1-score-in-keras-metrics-3013f979ce0d

In [148]:
def get_f1(y_true, y_pred, logs={}): #taken from old keras source code
    import keras.backend as K
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    logs["f1"]=f1_val
    return f1_val

In [None]:
df = pd.read_csv('train.csv', delimiter=',')
df.head()

In [150]:
col = ['ID', 'FH2O', 'FNOX', 'FOPTIMETHGLY', 'FOXID', 'FSO4', 'FTBN', 'FUEL', 'SOOTPERCENTAGE',
        'U100', 'U75', 'U50', 'U25', 'U20', 'U14', 'U6', 'U4', 'V100', 'P', 'MO', 'MG']
df.drop(col, inplace=True, axis=1)
df['COMPONENT_ARBITRARY'] = df['COMPONENT_ARBITRARY'].str[9:]
df['COMPONENT_ARBITRARY'] = df['COMPONENT_ARBITRARY'].astype('int64')
df.drop_duplicates(inplace=True)

In [151]:
def fillknn(df):
    from sklearn.impute import KNNImputer
    imputer = KNNImputer(n_neighbors=5)
    knn_df=imputer.fit_transform(df)
    knn_df=pd.DataFrame(knn_df, columns=df.columns)
    return knn_df

In [152]:
# 결측치 있는 항만 확인
nas =pd.DataFrame(df.isna().sum())
nas.loc[nas[0]!=0]

Unnamed: 0,0
CD,1394
K,2299


In [153]:
df = fillknn(df)

In [154]:
# target 확인
target = 'Y_LABEL'

# 데이터 분리
x = df.drop(target, axis = 1)
x = x.astype('int64')
y = df[target]
norm_cols = list(x.columns)

In [None]:
# 가변수화 열 지정
dumm_cols = ['COMPONENT_ARBITRARY']

# 가변수화
x = pd.get_dummies(x, columns = dumm_cols, drop_first=True)
x1 = x.copy()
# 확인
x.head()

In [None]:
test = pd.read_csv('test.csv')
test.head()

In [None]:
col = ['ID', 'FH2O', 'FNOX', 'FOPTIMETHGLY', 'FOXID', 'FSO4', 'FTBN', 'FUEL', 'SOOTPERCENTAGE',
        'U100', 'U75', 'U50', 'U25', 'U20', 'U14', 'U6', 'U4', 'V100', 'P', 'MO', 'MG']
for i in col:
    try:
        test.drop(i, inplace=True, axis=1)
    except:
        col.remove(i)
test['COMPONENT_ARBITRARY'] = test['COMPONENT_ARBITRARY'].str[9:]
test['COMPONENT_ARBITRARY'] = test['COMPONENT_ARBITRARY'].astype('int64')
test.drop_duplicates(inplace=True)

# 가변수화 열 지정
dumm_cols = ['COMPONENT_ARBITRARY']

# 가변수화
test = pd.get_dummies(test, columns = dumm_cols, drop_first=True)

# 확인
test.head()

In [158]:
x1=x1[list(test.columns)]

In [159]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit_transform(x)
x = scaler.fit_transform(x)
x1 = scaler.fit_transform(x1)

In [160]:
scaler.fit_transform(test)
test = scaler.fit_transform(test)

In [161]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y, test_size=0.3)

In [162]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
x_train, y_train_over = smote.fit_resample(x_train, y_train)
print("SMOTE 적용 전 학습용 피처/레이블 데이터 세트 : ", x_train.shape, y_train.shape)
print('SMOTE 적용 후 학습용 피처/레이블 데이터 세트 :', x_train.shape, y_train_over.shape)
print('SMOTE 적용 후 값의 분포 :\n',pd.Series(y_train_over).value_counts() )

SMOTE 적용 전 학습용 피처/레이블 데이터 세트 :  (18116, 34) (9866,)
SMOTE 적용 후 학습용 피처/레이블 데이터 세트 : (18116, 34) (18116,)
SMOTE 적용 후 값의 분포 :
 0.0    9058
1.0    9058
Name: Y_LABEL, dtype: int64


In [163]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
x1_train, y1_train_over = smote.fit_resample(x1_train, y1_train)
print("SMOTE 적용 전 학습용 피처/레이블 데이터 세트 : ", x1_train.shape, y1_train.shape)
print('SMOTE 적용 후 학습용 피처/레이블 데이터 세트 :', x1_train.shape, y1_train_over.shape)
print('SMOTE 적용 후 값의 분포 :\n',pd.Series(y1_train_over).value_counts() )

SMOTE 적용 전 학습용 피처/레이블 데이터 세트 :  (17978, 19) (9866,)
SMOTE 적용 후 학습용 피처/레이블 데이터 세트 : (17978, 19) (17978,)
SMOTE 적용 후 값의 분포 :
 0.0    8989
1.0    8989
Name: Y_LABEL, dtype: int64


In [166]:
# Create the teacher # 교사 모델 생성
n_features = x_train.shape[1]
n_features1 = test.shape[1]
teacher = keras.Sequential(
    [
        layers.Dense(128, input_shape = (n_features1, ), activation = 'swish' ),
        layers.LeakyReLU(alpha=0.2),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(64, activation = 'swish' ),
        layers.LeakyReLU(alpha=0.2),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(32, activation = 'swish' ),
        layers.LeakyReLU(alpha=0.2),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(1, activation='sigmoid'),
    ],
    name="teacher",
)

# Create the student # 학생 모델 생성
student = keras.Sequential(
    [
        layers.Dense(128, input_shape = (n_features1, ), activation = 'swish' ),
        layers.LeakyReLU(alpha=0.2),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(64, activation = 'swish' ),
        layers.LeakyReLU(alpha=0.2),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(32, activation = 'swish' ),
        layers.LeakyReLU(alpha=0.2),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(1, activation='sigmoid'),
    ],
    name="student",
)

# Clone student for later comparison # 후행 비교를 위한 학생 복제
student_scratch = keras.models.clone_model(student)

In [167]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
# Train teacher as usual # 평소와 같은 방법으로 교사모델 훈련시작
teacher.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[get_f1],
)

# Train and evaluate teacher on data. # 데이터셋을 통해 교사의 트레인 및 평가
es = EarlyStopping(monitor='get_f1', min_delta=0, patience=20, verbose=1, restore_best_weights=True)
mc = ModelCheckpoint('best_model.h5', monitor='get_f1', mode='max', save_best_only=True)
teacher.fit(x1_train, y1_train_over, epochs=50, verbose=1, callbacks=[es, mc])
teacher.evaluate(x1_test, y1_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Restoring model weights from the end of the best epoch.
Epoch 00021: early stopping


[0.6642853021621704, 0.16294357180595398]

In [169]:
# Initialize and compile distiller
distiller = Distiller(student=student, teacher=teacher)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[get_f1],
    student_loss_fn=keras.losses.BinaryCrossentropy(),
    distillation_loss_fn=keras.losses.BinaryCrossentropy(),
    alpha=0.1,
    temperature=10,
)

# Distill teacher to student
es = EarlyStopping(monitor='get_f1', min_delta=0, patience=20, verbose=1, restore_best_weights=True)
distiller.fit(x1_train, y1_train_over, epochs=50, verbose=1, callbacks=[es])

# Evaluate student on test dataset
distiller.evaluate(x1_test, y1_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Restoring model weights from the end of the best epoch.
Epoch 00021: early stopping


[0.17247577011585236, 0.8291529417037964]

In [170]:
pred = student.predict(test)

# np.where ==> 0, 1
pred = np.where(pred >= 0.5, 1, 0)


In [None]:
teacher.save('teacher.h5')
student.save('student.h5')



In [172]:
submission =pd.read_csv('sample_submission.csv')
submission['Y_LABEL'] = pred
submission.head()

Unnamed: 0,ID,Y_LABEL
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,1
3,TEST_0003,1
4,TEST_0004,1


In [174]:
submission.to_csv('sample_submission.csv', index=False)

In [178]:
submission['Y_LABEL'].value_counts()

0    4060
1    1981
Name: Y_LABEL, dtype: int64