In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
# 관련 라이브러리 임포트 
import matplotlib.font_manager as fm
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.style.use('ggplot')

# 과적합 방지


- 학습 횟수 최대한 , 학습 완료시 학습종료

```
1. 학습횟수는 최대한
2. patience : 기다리는 횟수
3. min_delta : cost 값의 차이 ( 0.01, 0.001 )

```


## tensor 방식으로 구현
- 코드로 구현해야댐

In [2]:
df = pd.read_csv('data/test.csv',header=None)
df.columns = ['q1','q2','mid','final']
df.head(3)

Unnamed: 0,q1,q2,mid,final
0,73,80,75,152
1,93,88,93,185
2,89,91,90,180


In [3]:
df.iloc[:,-1]

0     152
1     185
2     180
3     196
4     142
5     101
6     149
7     115
8     175
9     164
10    141
11    141
12    184
13    152
14    148
15    192
16    147
17    183
18    177
19    159
20    177
21    175
22    175
23    149
24    192
Name: final, dtype: int64

In [4]:
df.iloc[:,[-1]]

Unnamed: 0,final
0,152
1,185
2,180
3,196
4,142
5,101
6,149
7,115
8,175
9,164


In [5]:
x_data = df.iloc[:,:-1].values
y_data = df.iloc[:,[-1]].values

In [6]:
x_data,y_data

(array([[ 73,  80,  75],
        [ 93,  88,  93],
        [ 89,  91,  90],
        [ 96,  98, 100],
        [ 73,  66,  70],
        [ 53,  46,  55],
        [ 69,  74,  77],
        [ 47,  56,  60],
        [ 87,  79,  90],
        [ 79,  70,  88],
        [ 69,  70,  73],
        [ 70,  65,  74],
        [ 93,  95,  91],
        [ 79,  80,  73],
        [ 70,  73,  78],
        [ 93,  89,  96],
        [ 78,  75,  68],
        [ 81,  90,  93],
        [ 88,  92,  86],
        [ 78,  83,  77],
        [ 82,  86,  90],
        [ 86,  82,  89],
        [ 78,  83,  85],
        [ 76,  83,  71],
        [ 96,  93,  95]], dtype=int64),
 array([[152],
        [185],
        [180],
        [196],
        [142],
        [101],
        [149],
        [115],
        [175],
        [164],
        [141],
        [141],
        [184],
        [152],
        [148],
        [192],
        [147],
        [183],
        [177],
        [159],
        [177],
        [175],
        [175],
        [149],


In [7]:
x = tf.constant(x_data, tf.float32)
y = tf.constant(y_data, tf.float32)

In [8]:
w = tf.Variable(tf.random.uniform([3,1]))# 특성데이터갯수,label개수
b = tf.Variable(tf.random.uniform([1]))# [라벨개수]

In [11]:
def compute_loss():
    hx = tf.matmul(x,w) + b 
    cost = tf.reduce_mean((hx-y)**2)
    
    return cost

In [12]:
patience = 20  
min_delta = 0.01  # (전 - 후) 값을 했을 때 이 숫자보다 작으면 조기종료 
hist_cost = [] 

pcnt = 0
optimizer = Adam(0.1)
for i in range(10000):
    optimizer.minimize(compute_loss, var_list=[w,b])  # 미분 
    c = compute_loss().numpy()
    hist_cost.append(c)

    print(i, 'cost: ',c)
    
    if i>0:
        if hist_cost[i-1] - hist_cost[i] > min_delta:
            pcnt = 0
        else:
            pcnt += 1
        if pcnt >= patience : 
            print('early stop')
            break

0 cost:  156.15411
1 cost:  144.6863
2 cost:  23.253227
3 cost:  58.504345
4 cost:  110.14633
5 cost:  66.25777
6 cost:  18.786385
7 cost:  35.89716
8 cost:  69.23599
9 cost:  58.353462
10 cost:  25.446524
11 cost:  18.853645
12 cost:  39.59565
13 cost:  49.05338
14 cost:  33.29953
15 cost:  17.293434
16 cost:  21.70499
17 cost:  34.599655
18 cost:  34.03375
19 cost:  21.732983
20 cost:  16.082804
21 cost:  22.825396
22 cost:  28.65316
23 cost:  23.972673
24 cost:  16.548733
25 cost:  17.002628
26 cost:  22.413643
27 cost:  22.826357
28 cost:  17.703651
29 cost:  15.217151
30 cost:  18.140802
31 cost:  20.29651
32 cost:  17.787363
33 cost:  14.896362
34 cost:  15.859114
35 cost:  17.947306
36 cost:  17.047106
37 cost:  14.733865
38 cost:  14.723359
39 cost:  16.240236
40 cost:  16.041414
41 cost:  14.403871
42 cost:  14.081182
43 cost:  15.079973
44 cost:  15.079442
45 cost:  13.944562
46 cost:  13.617737
47 cost:  14.26305
48 cost:  14.249219
49 cost:  13.444416
50 cost:  13.217983
51

##  케라스 구현
- 모듈 사용

In [13]:
from tensorflow.keras.optimizers import Adam,Adagrad
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [14]:
dense  = Dense(units = 1, input_dim=3)
model = Sequential([dense])
model.compile(loss = 'mse', optimizer = Adam(0.1))
earlyStop = EarlyStopping(monitor = 'loss',min_delta=0.01, patience=20)
h = model.fit(x_data,y_data ,epochs=10000, callbacks=[earlyStop])

Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000


In [None]:
# 얼리스타핑을 적용한 결과 50번만에 에포크를 멈추고 종료함.