# 딥러닝 따라하기1_회귀

* 목적 : 무작정 따라하면서 코드 눈과 손으로 익히기

# 1.환경준비

* 라이브러리 Import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# 2.Regression : Advertising

## (1) 데이터 전처리

### 1) 데이터 준비

In [None]:
path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/advertising.csv'
adv = pd.read_csv(path)
adv.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [None]:
target = 'Sales'
x = adv.drop(target, axis=1)
y = adv.loc[:, target]

### 2) 가변수화

### 3) 데이터분할

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

## (2) ML 연습 : 선형회귀

In [None]:
# 선형회귀 알고리즘을 불러 옵시다.
from sklearn.linear_model import LinearRegression

### 1) 모델 선언

In [None]:
model = LinearRegression()

### 2) 학습

In [None]:
model.fit(x_train, y_train)

### 3) 예측

In [None]:
pred = model.predict(x_val)

### 4) 검증
만든 모델은 얼마나 정확한지 검증해 봅시다.



In [None]:
print(f'RMSE  : {mean_squared_error(y_val, pred, squared=False)}')
print(f'MAE   : {mean_absolute_error(y_val, pred)}')
print(f'MAPE  : {mean_absolute_percentage_error(y_val, pred)}')

RMSE  : 1.8716493530685259
MAE   : 1.4155875681427736
MAPE  : 0.14622589325825738


## (3) 딥러닝 모델링
* 필요한 함수들 불러오기
* 모델 선언
* 학습
* 예측
* 성능 검증

### 1) 전처리 : Scaling

In [None]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

### 2) 필요한 함수들 불러오기

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.backend import clear_session

### 3) 모델 선언

In [None]:
nfeatures = x_train.shape[1] #num of columns
nfeatures

3

In [None]:
# 메모리 정리
clear_session()

# Sequential 타입 모델 선언
model = Sequential( Dense(1, input_shape = (nfeatures,))  )

# 모델요약
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 4         
                                                                 
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer='adam', loss='mse')

### 4) 학습

In [None]:
model.fit(x_train, y_train)



<keras.callbacks.History at 0x7f1b37fe1b80>

### 5) 예측

In [None]:
pred = model.predict(x_val)



### 6) 검증
만든 모델은 얼마나 정확한지 검증해 봅시다.



In [None]:
print(f'RMSE  : {mean_squared_error(y_val, pred, squared=False)}')
print(f'MAE   : {mean_absolute_error(y_val, pred)}')
print(f'MAPE  : {mean_absolute_percentage_error(y_val, pred)}')

RMSE  : 15.878043509937653
MAE   : 14.55580283591524
MAPE  : 1.0328081371217863


# 3.Regression : Carseat

## (1) 데이터 전처리
* 데이터 준비
* 가변수화
* 스케일링(필요하다면)
* 데이터 분할

### 1) 데이터 준비

* 카시트 판매량 데이터

|	변수명	|	설명	|	구분	|
|	----	|	----	|	----	|
|	**Sales** 	|	 **각 지역 판매액(단위 : 1000달러)**	|	**Target**	|
|	CompPrice 	|	지역별 경쟁사 판매가격(달러)	|	feature	|
|	Income 	|	가구당 평균 소득액(1000달러)	|	feature	|
|	Advertising 	|	 각 지역, 회사의 광고 예산(1000달러)	|	feature	|
|	Population 	|	 지역 인구수(단위 : 1000명)	|	feature	|
|	Price 	|	 자사 지역별 판매가격(달러)	|	feature	|
|	ShelveLoc 	|	 진열상태(범주 : Bad, Medium, Good)	|	feature	|
|	Age 	|	 지역 인구의 평균 연령	|	feature	|
|	Education 	|	 교육수준(범주 : 10~18)	|	feature	|
|	Urban 	|	 매장이 도심에 있는지 여부(범주 : Yes, No)	|	feature	|
|	US 	|	 매장이 미국에 있는지 여부(범주 : Yes, No)	|	feature	|


* 데이터 경로 : https://raw.githubusercontent.com/DA4BAM/dataset/master/Carseats.csv

In [None]:
path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/Carseats.csv'
carseat = pd.read_csv(path)
carseat.head()

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
0,9.5,138,73,11,276,120,Bad,42,17,Yes,Yes
1,11.22,111,48,16,260,83,Good,65,10,Yes,Yes
2,10.06,113,35,10,269,80,Medium,59,12,Yes,Yes
3,7.4,117,100,4,466,97,Medium,55,14,Yes,Yes
4,4.15,141,64,3,340,128,Bad,38,13,Yes,No


In [None]:
target = 'Sales'
x = carseat.drop(target, axis=1)
y = carseat.loc[:, target]

### 2) 가변수화

In [None]:
cat_cols = ['ShelveLoc', 'Education', 'US', 'Urban']
x = pd.get_dummies(x, columns = cat_cols, drop_first = True)

### 3) 데이터분할

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

### 4) Scaling

In [None]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## (2) 모델링
* 필요한 함수들 불러오기
* 모델 선언
* 학습
* 예측
* 성능 검증

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.backend import clear_session

### 1) 모델 선언

In [None]:
x_train.shape

(320, 18)

In [None]:
nfeatures = x_train.shape[1] #num of columns
nfeatures

18

In [None]:
# 메모리 정리(필수는 아님!)
clear_session()

# Sequential 타입 모델 선언
model = Sequential( Dense(1, input_shape = (nfeatures,)) )

# 모델요약
model.summary()

In [None]:
model.compile(optimizer='adam', loss='mse')

### 2) 학습

In [None]:
model.fit(x_train, y_train)



<keras.callbacks.History at 0x7f1b440478b0>

### 3) 예측

In [None]:
pred = model.predict(x_val)



### 4) 검증
만든 모델은 얼마나 정확한지 검증해 봅시다.



In [None]:
print(f'RMSE  : {mean_squared_error(y_val, pred, squared=False)}')
print(f'MAE   : {mean_absolute_error(y_val, pred)}')
# print(f'MAPE  : {mean_absolute_percentage_error(y_val, pred)}')

RMSE  : 8.431373878767097
MAE   : 7.866295295543969


# 4.Regression : 보스턴 집값

## (1) 데이터 전처리

### 1) 데이터 준비

In [None]:
path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/boston.csv'
boston = pd.read_csv(path)
boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2


In [None]:
target = 'medv'
x = boston.drop(target, axis=1)
y = boston.loc[:, target]

### 2) 가변수화

### 3) 데이터분할

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

### 4) Scaling

In [None]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## (2) 모델링
* 필요한 함수들 불러오기
* 모델 선언
* 학습
* 예측
* 성능 검증

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.backend import clear_session

### 1) 모델 선언

In [None]:
nfeatures = x_train.shape[1] #num of columns
nfeatures

12

In [None]:
a = np.array([1,2,3])
b = np.array([[1,2],[3,4]])

print(a, a.shape)
print(b, b.shape)

In [None]:
# 메모리 정리
clear_session()

# Sequential 타입 모델 선언
model = Sequential( Dense(1 , input_shape = (nfeatures , )  )  )

# 모델요약
model.summary()

In [None]:
model.compile(optimizer='adam', loss='mse')

### 2) 학습

In [None]:
model.fit(x_train, y_train)



<keras.callbacks.History at 0x7f1b37693520>

### 3) 예측

In [None]:
pred = model.predict(x_val)



### 4) 검증
만든 모델은 얼마나 정확한지 검증해 봅시다.



In [None]:
print(f'RMSE  : {mean_squared_error(y_val, pred, squared=False)}')
print(f'MAE   : {mean_absolute_error(y_val, pred)}')
print(f'MAPE  : {mean_absolute_percentage_error(y_val, pred)}')

RMSE  : 23.2201455290913
MAE   : 21.740412871083066
MAPE  : 1.0028701011959449
