<a href="https://colab.research.google.com/github/s-ryuri/TIL/blob/main/%ED%9A%8C%EA%B7%80.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd drive/MyDrive/파이썬머신러닝완벽가이드

In [None]:
import numpy as np
import matplotlib.pyplot as plt


np.random.seed(0)
x = 2 * np.random.rand(100,1)
y = 6 + 4 * x + np.random.randn(100,1)

plt.scatter(x,y)

In [None]:
def get_cost(y,y_pred):
    n = len(y)
    cost = np.sum(np.square(y - y_pred)) /n
    return cost

In [None]:
def get_weight_updates(w1,w0,x,y,learning_rate = 0.01):
    n = len(y)
    w1_update = np.zeros_like(w1)
    w0_update = np.zeros_like(w0)
    y_pred = np.dot(x,w1.T) + w0
    diff = y - y_pred

    w0_factors = np.ones((n,1))
    w1_update = -(2/n) * learning_rate * (np.dot(x.T,diff))
    w0_update = -(2/n) * learning_rate * (np.dot(w0_factors.T,diff))

    return w1_update,w0_update

In [None]:
def gradient_descent_steps(x,y,iters = 10000):
    w0 = np.zeros((1,1))
    w1 = np.zeros((1,1))

    for ind in range(iters):
        w1_update, w0_update = get_weight_updates(w1,w0,x,y,learning_rate=0.01)
        w1 = w1 - w1_update
        w0 = w0 - w0_update

    return w1, w0

In [None]:
w1, w0 = gradient_descent_steps(x,y,iters = 1000)
print('w1 : {0:.3f} w0 : {1:.3f}'.format(w1[0,0],w0[0,0]))
y_pred = w1[0,0] * x + w0

print('Gradient Descent Total cost : {0:.4f}'.format(get_cost(y,y_pred)))

In [None]:
plt.scatter(x,y)
plt.plot(x,y_pred)

In [None]:
def stochastic_gradient_descent_steps(x,y,batch_size = 10,iters = 1000):
    w0 = np.zeros((1,1))
    w1 = np.zeros((1,1))
    prev_cost = 100000
    iter_index = 0

    for ind in range(iters):
        np.random.seed(ind)
        stochastic_random_index = np.random.permutation(x.shape[0])
        sample_x = x[stochastic_random_index[0:batch_size]]
        sample_y = y[stochastic_random_index[0:batch_size]]

        w1_update,w0_update = get_weight_updates(w1,w0,sample_x,sample_y,learning_rate=0.01)
        w1 = w1-w1_update
        w0 = w0 - w0_update

    return w1, w0


w1,w0 = stochastic_gradient_descent_steps(x,y,iters = 1000)
print('w1 : ',round(w1[0,0],3),'w0:',round(w0[0,0],3))
y_pred = w1[0,0] * x + w0

print(get_cost(y,y_pred))

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
from sklearn.datasets import load_boston

boston = load_boston()

bostonDF = pd.DataFrame(boston.data,columns = boston.feature_names)

bostonDF['PRICE'] = boston.target
print('Boston 데이터 세트 크기 : ',bostonDF.shape)
bostonDF.head()

In [None]:
bostonDF.info()

In [None]:
fig ,ax = plt.subplots(figsize = (16,8),ncols = 4,nrows = 2)
lm_features = ['RM','ZN','INDUS','NOX','AGE','PTRATIO','LSTAT','RAD']
for i, feature in enumerate(lm_features):
    row = int(i / 4)
    col = i % 4
    sns.regplot(x = feature,y = 'PRICE',data = bostonDF,ax = ax[row][col])

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score

y_target = bostonDF['PRICE']
x_data = bostonDF.drop(['PRICE'],axis = 1)

# x_train,x_test,y_train,y_test = train_test_split(x_data,y_target,test_size = 0.3,random_state = 156)

# lr = LinearRegression()
# lr.fit(x_train,y_train)
# y_preds = lr.predict(x_test)
# mse = mean_squared_error(y_test,y_preds)
# rmse = np.sqrt(mse)

# print('MSE : {0:.3f}, RMSE : {1:.3F}'.format(mse,rmse))
# print('Variance score : {0:.3f}'.format(r2_score(y_test,y_preds)))

In [None]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

ridge = Ridge(alpha = 10)
neg_mse_scores = cross_val_score(ridge,x_data,y_target,scoring = 'neg_mean_squared_error',cv = 5)
rmse_scores = np.sqrt(-1 * neg_mse_scores)
avg_rmse = np.mean(rmse_scores)

In [None]:
print(np.round(neg_mse_scores,3))
print(np.round(rmse_scores,3))
print(avg_rmse)

In [None]:
alphas  = [0,0.1,1,10,100]

for alpha in alphas :
    ridge = Ridge(alpha = alpha)
    neg_mse_scores = cross_val_score(ridge,x_data,y_target,scoring = 'neg_mean_squared_error',cv = 5)
    avg_rmse = np.mean(np.sqrt(-1*neg_mse_scores))
    print('alpha {0} 일 때 5 folds 의 평균 RMSE : {1:.3f}'.format(alpha,avg_rmse))

In [None]:
fig,ax = plt.subplots(1,5,figsize = (18,6))

coeff_df = pd.DataFrame()

for pos,alpha in enumerate(alphas):
    ridge = Ridge(alpha = alpha)
    ridge.fit(x_data,y_target)

    coeff = pd.Series(data = ridge.coef_,index = x_data.columns)
    colname = 'alpha:' + str(alpha)
    coeff_df[colname] = coeff
    coeff = coeff.sort_values(ascending = False)
    ax[pos].set_title(colname)
    ax[pos].set_xlim(-3,6)
    sns.barplot(x = coeff.values,y = coeff.index,ax = ax[pos])

plt.show()

In [None]:
print('절편 값 :',lr.intercept_)
print('회귀 계수값 : ',np.round(lr.coef_,1))

In [None]:
coeff =pd.Series(data = np.round(lr.coef_,1),index = x_data.columns)
coeff.sort_values(ascending = False)

In [None]:
from sklearn.model_selection import cross_val_score

neg_mse_scores = cross_val_score(lr,x_data,y_target,scoring = 'neg_mean_squared_error',cv = 5)
rmse_scores = np.sqrt(-1 * neg_mse_scores)
avg_rmse = np.mean(rmse_scores)

print(' 5 folds의 개별 Negative MSE scores : ',np.round(neg_mse_scores,2))
print(' 5 folds의 개별 RMSE scores : ',np.round(rmse_scores,2))
print(' 5 folds의 개별 RMSE :  ', np.round(avg_rmse,2)) 

In [None]:
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

x = np.arange(4).reshape(2,2)
print('일차 단항식 계수 피처 : \n',x)
poly = PolynomialFeatures(degree = 2)
poly.fit(x)
poly_ftr = poly.transform(x)
print('변환된 2차 다항식 계수 피처 : \n',poly_ftr)

In [None]:
def polynomial_func(x):
    y = 1 + 2*x[:,0] + 3 * x[:,0] ** 2 + 4*x[:,1]**3
    return y

x = np.arange(4).reshape(2,2)
print('일차 단항식 계수 feature : \n',x)
y = polynomial_func(x)
print('삼차 다항식 결정값 : \n',y)

In [None]:
2*x[:,0] + 3 * x[:,0] ** 2 + 4*x[:,1]**3 + 1