In [14]:
# 다중선형회귀를 구현해 보자.
# 오존데이터를 가지고 모델을 만들어 볼꺼다.

# 1. 필요한 module을 불러들이자
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam

In [22]:
# 2. Raw Data Loading 
raw_data = pd.read_csv('./Data/ozone.csv')
display(raw_data.head(), raw_data.shape) # (153, 6)

# 결측치 처리! => 원래는 해야하지만 삭제를 할꺼다.
raw_data =raw_data.dropna(how='any') # NaN이 들어가 있느 행은 다 지우리는 의미
display(raw_data.head(), raw_data.shape) # (111, 6)

# 이상치는 처리하지 않을 거다. 

# 대신 정규화는 진행해야 된다.
# 정규화는 MinMaxScaling을 통해 진행할꺼다.
scaler_x = MinMaxScaler()
scaler_t = MinMaxScaler()

# 제일 먼저 scaler한테 최대값과 최소값을 알려줘야 그 정보를 가지고 정규화를 진행할 수 있다.
scaler_x.fit(raw_data[['Solar.R','Wind','Temp']].values) #  Fancy indexing 한거를 values를 하면
# 2차원 numpy로 나온다.
scaler_t.fit(raw_data['Ozone'].values.reshape(-1,1)) # raw_data['Ozone'] 이상테는 1차원인데 
# raw_data['Ozone'].values.reshape(-1,1)) 이를 통해 2차원으로 변경해 차원을 맞춰준다.

Unnamed: 0,Ozone,Solar.R,Wind,Temp,Month,Day
0,41.0,190.0,7.4,67,5,1
1,36.0,118.0,8.0,72,5,2
2,12.0,149.0,12.6,74,5,3
3,18.0,313.0,11.5,62,5,4
4,,,14.3,56,5,5


(153, 6)

Unnamed: 0,Ozone,Solar.R,Wind,Temp,Month,Day
0,41.0,190.0,7.4,67,5,1
1,36.0,118.0,8.0,72,5,2
2,12.0,149.0,12.6,74,5,3
3,18.0,313.0,11.5,62,5,4
6,23.0,299.0,8.6,65,5,7


(111, 6)

In [26]:
# 3. Training Data Set
x_data = scaler_x.transform(raw_data[['Solar.R','Wind','Temp']].values)
# print(x_data)
t_data = scaler_t.transform(raw_data['Ozone'].values.reshape(-1,1))
# print(t_data)

In [36]:
# Model
model = Sequential()

# Layer 추가
model.add(Flatten(input_shape=(3,)))
model.add(Dense(1,activation='linear'))

#Model 설정
model.compile(optimizer=Adam(learning_rate=1e-4),
             loss='mse')
# Model 학습
model.fit(x_data ,t_data, epochs=2000,verbose=0)

<keras.callbacks.History at 0x14f7b0df0>

In [37]:
# 우리 모델이 잘 만들어졌는지.. 좋은 모델인지 평가!
# 현재 우리는 적당한 평가 기준이 없다, 일단 지금은 그냥 넘어가자. 

In [43]:
# 모델이 완성이 되었다고 생각되면, 
# 예측 작업에 들어간다.
# 예측하고 싶은건 ,.. 태양광 150 , 바람 10, 온도 80 일 경우 오존량 예측
predict_data =np.array([[150.0, 10.0, 80.0]])
scaled_predict_data= scaler_x.transform(predict_data)
# print(scaled_predict_data)
result = model.predict(scaled_predict_data)
# print(result) # [[0.15387532]]
final_result = scaler_t.inverse_transform(result)
# print(final_result) # [[26.697178]]



In [65]:
# Logistic Regression을 구현해보자
# Binary Classification(이항 분류)

# 처음엔 당현히 필요한 모듈 불러들이는 것부터 시작!
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler # 정규화
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam

In [66]:
# Raw Data Loading
# 데이터 전처리는 크게 3가지만 진행 ( 이것들을 진해안하면 망함)
# 1. 결측기
# 2. 이상치
# 3. 정규화

df = pd.read_csv('./Data/admission.csv')
# display(df.head())
print(df.info()) # 확인했더니 결측치는 존재하지 않는다.

# 이상치 처리인데... 사실 이 데이터는 이상치가 존재한다.
# 이상치를 처리하는 방법은 여러가지 방법이 있다.
# 가장 대표적인 방법은 2가지이다.
# 1. Tukey Fence : 4분위를 이용하는 방법
# 2. Z-Score 방식 : 정규분포를 이용하는 방법
# 여기서는 2번. Z-Score 방식으로 이상치를 걸러내서 사용할 거다. (우린 못쓰니 교수님 자료 받자)
from scipy import stats

zscore_threshold = 2.0 # zscore outliers 임계값 (2.0이하가 optimal)


for col in df.columns:
    outliers = df[col][(np.abs(stats.zscore(df[col])) > zscore_threshold)]
    df = df.loc[~df[col].isin(outliers)]
    
# print(df.shape)  # (382, 4)
display(df.head())

# 마지막으로 정규화를 진행해야 한다.
scaler = MinMaxScaler()
scaler.fit(df[['gre','gpa']].values)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   admit   400 non-null    int64  
 1   gre     400 non-null    int64  
 2   gpa     400 non-null    float64
 3   rank    400 non-null    int64  
dtypes: float64(1), int64(3)
memory usage: 12.6 KB
None


Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [67]:
from sklearn.model_selection import train_test_split
# Training Data Set
x_data = scaler.transform(df[['gre','gpa']].values)
t_data = df['admit'].values.reshape(-1,1)

x_data_train, x_data_test , t_data_train , t_data_test = \
train_test_split(x_data ,t_data , test_size=0.2 ) # test_size 에 0.2 주면 8:2 비율 , 0.3 주면 7:3 비율


In [69]:
# Model
model = Sequential()

# Model에 Layer 추가
model.add(Flatten(input_shape=(2,)))
model.add(Dense(1,activation='sigmoid'))

#Model 설정
model.compile(optimizer=Adam(learning_rate=1e-3),
                            loss='binary_crossentropy',
             metrics=['accuracy'])
              
# Model 학습
model.fit(x_data_train, 
          t_data_train, 
          epochs=500, 
          validation_split=0.2,
          verbose=1)
              

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500


Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500


Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500


Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500


Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500


Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500


Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500


Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500


Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.History at 0x15d690940>

In [70]:
# 우리 모델이 잘 만든 모델인지 평가를 진행해야 된다.
eval_result = model.evaluate(x_data_train,t_data_train)
print(eval_result)

[0.6289017200469971, 0.6622951030731201]


In [72]:
# 평가가 잘 끝났으면(우리 모델이 잘 만든 모델이면)
# 예측해봐야된다.
# 성적이 550, 3.5 일 떼, 합격 여부를 알아보자.
predict_data = np.array([[550.0 , 3.5]])
scaled_predict_data =scaler.transform(predict_data)
result=model.predict(scaled_predict_data)
print(result) # [[0.33646333]]

[[0.34808615]]


In [99]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler # 정규화
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam

In [101]:
df = pd.read_csv('./Data/train.csv')
display(df.head())
print(df.info()) 





Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
None


In [81]:
scaler = MinMaxScaler()
scaler.fit(df[['Pclass','Fare']].values)

In [83]:
# Training Data Set
x_data = scaler.transform(df[['Pclass','Fare']].values)
t_data = df['Survived'].values.reshape(-1,1)



In [94]:
# Model
model = Sequential()

# model에 Layer 추가
model.add(Flatten(input_shape=(2,)))
model.add(Dense(1,activation='sigmoid'))

# Model 설정
model.compile(optimizer=Adam(learning_rate=1e-3),
             loss='binary_crossentropy',
             metrixs=['accuracy'])

# Model 학습
model.fit(x_data, t_data, epochs=500, validation=0.2, verbose=1)


TypeError: Invalid keyword argument(s) in `compile()`: ({'metrixs'},). Valid keyword arguments include "cloning", "experimental_run_tf_function", "distribute", "target_tensors", or "sample_weight_mode".