# Fazendo o deploy do nosso modelo

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Regressão no Scikit-Learn
- https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
<br><br>
- Vamos usar o dataset de preço das casas na Califórnia
    - https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html#sklearn.datasets.fetch_california_housing
    - Esse dataset já está foi separado nos arquivos `casas_treino.csv` e `casas_teste.csv` utilizando o train_test_split com os parâmetros test_size=0.33 e random_state=42

In [None]:
# Importando o pandas
import pandas as pd
import numpy as np

In [None]:
# Importando a base de treino
db = pd.read_csv('/content/drive/My Drive/Dados/casas_treino.csv')

In [None]:
# Visualizando as 3 primeiras linhas
db.head(3)

Unnamed: 0.1,Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,5088,0.9809,19.0,3.187726,1.129964,726.0,2.620939,33.98,-118.28,1.214
1,17096,4.2232,33.0,6.189696,1.086651,1015.0,2.377049,37.46,-122.23,3.637
2,5617,3.5488,42.0,4.821577,1.095436,1044.0,4.33195,33.79,-118.26,2.056


In [None]:
# Removendo a coluna Unnamed (não interfere no treinamento do modelo)
db = db.drop('Unnamed: 0', axis=1)

In [None]:
db.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,0.9809,19.0,3.187726,1.129964,726.0,2.620939,33.98,-118.28,1.214
1,4.2232,33.0,6.189696,1.086651,1015.0,2.377049,37.46,-122.23,3.637
2,3.5488,42.0,4.821577,1.095436,1044.0,4.33195,33.79,-118.26,2.056
3,1.6469,24.0,4.274194,1.048387,1686.0,4.532258,35.87,-119.26,0.476
4,3.9909,14.0,4.608303,1.08935,2738.0,2.471119,37.54,-121.96,2.36


In [None]:
# Separando X e y
X = db.drop("MedHouseVal", axis =1)
y = db.MedHouseVal

### Usando o modelo de regressão linear
- https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [None]:
# Importando a Regressão Linear
from sklearn.linear_model import LinearRegression

In [None]:
# Criando nosso algoritmo de regressão
reg = LinearRegression().fit(X, y)

In [None]:
# Avaliando o modelo nos dados de TESTE usando apenas o score
test = pd.read_csv('/content/drive/My Drive/Dados/casas_teste.csv')

In [None]:
test = test.drop("Unnamed: 0", axis =1)

In [None]:
test.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,1.6812,25.0,4.192201,1.022284,1392.0,3.877437,36.06,-119.01,0.477
1,2.5313,30.0,5.039384,1.193493,1565.0,2.679795,35.14,-119.46,0.458
2,3.4801,52.0,3.977155,1.185877,1310.0,1.360332,37.8,-122.44,5.00001
3,5.7376,17.0,6.163636,1.020202,1705.0,3.444444,34.28,-118.72,2.186
4,3.725,34.0,5.492991,1.028037,1063.0,2.483645,36.62,-121.93,2.78


In [None]:
X_test = test.drop("MedHouseVal", axis=1)
y_test = test.MedHouseVal

In [None]:
reg.score(X_test, y_test)

0.5970494128783954

In [None]:
# Exibindo o intercept
reg.intercept_

-37.08201093907984

In [None]:
# Exibindo o coef
reg.coef_

array([ 4.44870466e-01,  9.55004561e-03, -1.21991503e-01,  7.79144696e-01,
       -7.68990809e-08, -3.29948505e-03, -4.19131153e-01, -4.34103468e-01])

### Persistência do modelo
- https://scikit-learn.org/stable/model_persistence.html

In [None]:
# Fazendo o dump do nosso modelo
from joblib import dump
dump(reg, "regressao.joblib", cache_size= None)

['regressao.joblib']