# Linear Regression - Bike Sharing
[ch1-gradient-boosting.ipynb](https://github.com/kyopark2014/ML-Algorithms/blob/main/xgboost/src/ch1-gradient-boosting.ipynb)

In [1]:
import pandas as pd

In [2]:
df_bikes = pd.read_csv('bike_rentals_cleaned.csv')

In [3]:
df_bikes.head()

Unnamed: 0,instant,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,1,1.0,0.0,1,0.0,6.0,0.0,2,0.344167,0.363625,0.805833,0.160446,985
1,2,1.0,0.0,1,0.0,0.0,0.0,2,0.363478,0.353739,0.696087,0.248539,801
2,3,1.0,0.0,1,0.0,1.0,1.0,1,0.196364,0.189405,0.437273,0.248309,1349
3,4,1.0,0.0,1,0.0,2.0,1.0,1,0.2,0.212122,0.590435,0.160296,1562
4,5,1.0,0.0,1,0.0,3.0,1.0,1,0.226957,0.22927,0.436957,0.1869,1600


In [4]:
df_bikes.describe()

Unnamed: 0,instant,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
count,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0
mean,366.0,2.49658,0.500684,6.519836,0.028728,2.997264,0.682627,1.395349,0.495423,0.474391,0.627908,0.190411,4504.348837
std,211.165812,1.110807,0.500342,3.451913,0.167155,2.004787,0.465773,0.544894,0.183023,0.162938,0.142074,0.077462,1937.211452
min,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.05913,0.07907,0.0,0.022392,22.0
25%,183.5,2.0,0.0,4.0,0.0,1.0,0.0,1.0,0.337083,0.337842,0.522291,0.13495,3152.0
50%,366.0,3.0,1.0,7.0,0.0,3.0,1.0,1.0,0.498333,0.486733,0.6275,0.180971,4548.0
75%,548.5,3.0,1.0,10.0,0.0,5.0,1.0,2.0,0.655417,0.608602,0.729791,0.233206,5956.0
max,731.0,4.0,1.0,12.0,1.0,6.0,1.0,3.0,0.861667,0.840896,0.9725,0.507463,8714.0


In [5]:
df_bikes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   instant     731 non-null    int64  
 1   season      731 non-null    float64
 2   yr          731 non-null    float64
 3   mnth        731 non-null    int64  
 4   holiday     731 non-null    float64
 5   weekday     731 non-null    float64
 6   workingday  731 non-null    float64
 7   weathersit  731 non-null    int64  
 8   temp        731 non-null    float64
 9   atemp       731 non-null    float64
 10  hum         731 non-null    float64
 11  windspeed   731 non-null    float64
 12  cnt         731 non-null    int64  
dtypes: float64(9), int64(4)
memory usage: 74.4 KB


### 누락한값이 있는지 확인

In [6]:
df_bikes.isna().sum().sum()

0

In [7]:
df_bikes['cnt'].value_counts()

5119    2
4401    2
1977    2
6824    2
5191    2
       ..
6273    1
5501    1
4760    1
1683    1
4097    1
Name: cnt, Length: 696, dtype: int64

### Define Feature and Target

In [8]:
X = df_bikes.iloc[:,:-1]
y = df_bikes.iloc[:,-1]

In [9]:
pd.DataFrame(y).head()

Unnamed: 0,cnt
0,985
1,801
2,1349
3,1562
4,1600


### Split Train/Test dataset

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2)

### Linear Regression

In [11]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

### Training

In [12]:
lr.fit(X_train, y_train)

LinearRegression()

### Evaluation

In [13]:
y_pred = lr.predict(X_test)

from sklearn.metrics import mean_squared_error
import numpy as np

# mean_squared_error
mse = mean_squared_error(y_test, y_pred)

# Root Mean Square Deviation; RMSD
rmse = np.sqrt(mse)
print("RMSE: %0.2f" % (rmse))

RMSE: 898.21


### cross_val_score

In [14]:
from sklearn.model_selection import cross_val_score

model = LinearRegression()

scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=50)

rmse = np.sqrt(-scores)

print('RMSE:', np.round(rmse, 2))
print('Avg. RMSE: %0.2f' % (rmse.mean()))

RMSE: [ 455.58  603.3   432.29  388.81  527.14  374.48  436.54  932.38  965.13
  950.29  725.    524.15  896.83 1742.58  935.63  872.92  569.34  888.27
  480.54  608.9   373.15  779.39  493.92  576.19 1107.63  824.15  529.23
  479.87  692.1  1347.51  927.13  948.31  774.09 1136.82  939.   1011.63
 1471.44 1610.6   928.67  603.41 1163.44  717.66 1351.52 1041.79 1050.94
 1644.85  454.87 1555.68  632.57 1629.97]
Avg. RMSE: 862.15
