Linear Model(선형 모델)
- 입력 특성에 대한 선형 함수를 만들어 예측을 수행
- 다양한 선형 모델이 존재
- 분류와 회귀에 모두 사용 가능

## 성적 데이터 생성

In [1]:
import pandas as pd

In [2]:
data =  pd.DataFrame([[2,20],[4,40],[8,80],[9,90]],
                    index = ['해도','병관','명훈','동원'],
                    columns = ['시간','성적'])

In [3]:
data

Unnamed: 0,시간,성적
해도,2,20
병관,4,40
명훈,8,80
동원,9,90


### 수학 공식을 이용한 해석적 방법(Ordinary Least Square)
- Linear Regression

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
linearModel = LinearRegression()

In [6]:
linearModel.fit(data[['시간']],data['성적'])

LinearRegression()

In [7]:
# coef : 가중치, intercept : 절편
linearModel.coef_,linearModel.intercept_

(array([10.]), 7.105427357601002e-15)

In [8]:
linearModel.predict([[7]])

array([70.])

In [9]:
linearModel.score(data[['시간']],data['성적'])

1.0

### 경사하강법(Gradient Descent Algorithm)

In [10]:
from sklearn.linear_model import SGDRegressor

In [11]:
sgdModel = SGDRegressor(max_iter = 5000, # 가중치 업데이트 반복 횟수
                        eta0 = 0.01, # 학습률 (learning rate)
                        verbose = 1) # 학습과정 확인

In [12]:
sgdModel.fit(data[['시간']],data['성적'])

-- Epoch 1
Norm: 8.21, NNZs: 1, Bias: 1.260493, T: 4, Avg. loss: 965.285176
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 9.39, NNZs: 1, Bias: 1.431770, T: 8, Avg. loss: 29.650140
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 9.67, NNZs: 1, Bias: 1.467132, T: 12, Avg. loss: 2.433305
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 9.76, NNZs: 1, Bias: 1.474009, T: 16, Avg. loss: 0.429741
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 9.79, NNZs: 1, Bias: 1.472082, T: 20, Avg. loss: 0.253476
Total training time: 0.00 seconds.
-- Epoch 6
Norm: 9.80, NNZs: 1, Bias: 1.468319, T: 24, Avg. loss: 0.228881
Total training time: 0.00 seconds.
-- Epoch 7
Norm: 9.81, NNZs: 1, Bias: 1.463381, T: 28, Avg. loss: 0.228747
Total training time: 0.00 seconds.
-- Epoch 8
Norm: 9.81, NNZs: 1, Bias: 1.458740, T: 32, Avg. loss: 0.224307
Total training time: 0.00 seconds.
-- Epoch 9
Norm: 9.81, NNZs: 1, Bias: 1.453743, T: 36, Avg. loss: 0.225041
Total training time: 0.00 seconds.
-- Epoch 

SGDRegressor(max_iter=5000, verbose=1)

In [13]:
sgdModel.coef_,sgdModel.intercept_

(array([9.83773774]), array([1.20250819]))

In [14]:
sgdModel.predict([[7]])

array([70.06667237])

In [15]:
sgdModel.score(data[['시간']],data['성적'])

0.9996480007583528

## 지난 시험 점수를 추가

In [16]:
data['전 시험 점수'] = [20,48,80,90]
data

Unnamed: 0,시간,성적,전 시험 점수
해도,2,20,20
병관,4,40,48
명훈,8,80,80
동원,9,90,90


In [17]:
test = pd.DataFrame([[7,90]],columns = ['시간','전 시험 점수'],index=['도연'])
test

Unnamed: 0,시간,전 시험 점수
도연,7,90


### 수학 공식을 이용한 해석적 방법(Ordinary Least Square)
- Linear Regression

In [18]:
linearModel = LinearRegression()

In [19]:
linearModel.fit(data[['시간','전 시험 점수']],data['성적'])

LinearRegression()

In [20]:
# coef : 가중치, intercept : 절편
linearModel.coef_,linearModel.intercept_

(array([ 1.00000000e+01, -6.48527378e-16]), 3.552713678800501e-14)

In [21]:
linearModel.predict(test)

array([70.])

In [22]:
linearModel.score(data[['시간','전 시험 점수']],data['성적'])

1.0

### 경사하강법(Gradient Descent Algorithm)

In [23]:
sgdModel2 = SGDRegressor(max_iter = 5000, # 가중치 업데이트 반복 횟수
                        eta0 = 0.01, # 학습률 (learning rate)
                        verbose = 1) # 학습과정 확인

In [24]:
sgdModel2.fit(data[['시간','전 시험 점수']],data['성적'])

-- Epoch 1
Norm: 120034.06, NNZs: 2, Bias: -1498.908071, T: 4, Avg. loss: 5826593985.776116
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 5225543515.66, NNZs: 2, Bias: -57635056.327679, T: 8, Avg. loss: 12307786864938678272.000000
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 16459778985.91, NNZs: 2, Bias: -4109593897.568495, T: 12, Avg. loss: 165070422541078908027011072.000000
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 243682255990.65, NNZs: 2, Bias: -1380601330.786339, T: 16, Avg. loss: 76931966087274035847102464.000000
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 163587447144.43, NNZs: 2, Bias: -9177194900.274151, T: 20, Avg. loss: 55138872543519565299056640.000000
Total training time: 0.00 seconds.
-- Epoch 6
Norm: 194252271982.91, NNZs: 2, Bias: 63223349.530787, T: 24, Avg. loss: 68656602098413949163667456.000000
Total training time: 0.00 seconds.
Convergence after 6 epochs took 0.00 seconds


SGDRegressor(max_iter=5000, verbose=1)

In [25]:
sgdModel2.coef_,sgdModel2.intercept_

(array([1.48305441e+10, 1.93685312e+11]), array([63223349.53078747]))

In [26]:
sgdModel2.predict(test)

array([1.75355551e+13])

In [27]:
temp = pd.DataFrame(sgdModel2.predict(test),dtype=float)
temp

Unnamed: 0,0
0,17535560000000.0


In [28]:
sgdModel2.score(data[['시간','전 시험 점수']],data['성적'])

-2.000293043594153e+23