# Hands on Machine learning
- [10 Minutes to pandas](https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html)
- [scikit-learn Tutorials](https://scikit-learn.org/stable/tutorial/index.html)

## Import packages

In [None]:
#packages for data manipulation
import pandas as pd
import numpy as np

#packages for machine learning model
from sklearn import preprocessing, model_selection, linear_model, metrics

#source of dataset
from sklearn import datasets

#packages for data visualization
import matplotlib.pyplot as plt
%matplotlib inline

## Step 0 : Load data

In [None]:
boston_data = datasets.load_boston()

In [None]:
[k for k in boston_data.keys()]

In [None]:
dat = pd.DataFrame(boston_data['data'], columns = boston_data['feature_names'])

target = boston_data['target']

In [None]:
dat.head()

In [None]:
print(boston_data.DESCR)

---

## Step 1 : Data exploration

In [None]:
dat.info()

In [None]:
dat.describe()

In [None]:
for column in dat.columns:
    dat[column].hist()
    plt.title(column)
    plt.show()

---

## Step 2 : Preprocessing

### Normalization
- standardize
- min-max scale

In [None]:
scaler = preprocessing.MinMaxScaler()

normalize_dat = scaler.fit_transform(dat)

In [None]:
normalize_dat.min(axis = 0)

In [None]:
normalize_dat.max(axis = 0)

---

## Step 3 : Train-test split

In [None]:
train_x, test_x, train_y, test_y = model_selection.train_test_split(normalize_dat, target, shuffle = True,
                                                                    test_size = 0.2, random_state = 400)

In [None]:
print(train_x.shape, train_y.shape)

In [None]:
print(test_x.shape, test_y.shape)

---

## Step 4 : Build model

In [None]:
model = linear_model.LinearRegression()

In [None]:
# fitting model
model.fit(train_x, train_y)

---

## Step 5 : Model Evaluation

In [None]:
# training score

train_prediction = model.predict(train_x)
test_prediction = model.predict(test_x)

train_r2 = metrics.r2_score(y_true = train_y, y_pred = train_prediction)
test_r2 = metrics.r2_score(y_true = test_y, y_pred = test_prediction)

In [None]:
print('R2 score of training : {}'.format(train_r2))

print('R2 score of testing : {}'.format(test_r2))

### --Exercise--
- try to report another metric (ex. mse) of this model

---