In [1]:
import joblib
import json
import pandas

import sklearn.linear_model
import sklearn.pipeline

# Instructions

- Read **the train data** from the CSV file and properly set the index


In [2]:
data_train = pandas.read_csv('./data/cubic_features.train.csv').set_index('id')
data_train

Unnamed: 0_level_0,feature_1,label,feature_2,feature_3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
253,1.4920,1.0676,2.226064,3.321287
667,-0.9317,0.8359,0.868065,-0.808776
85,-1.4077,-1.6642,1.981619,-2.789525
969,2.2750,0.7641,5.175625,11.774547
75,-2.7649,-0.0689,7.644672,-21.136754
...,...,...,...,...
835,-0.1237,-0.8241,0.015302,-0.001893
192,-1.8828,-1.0807,3.544936,-6.674405
629,-2.1139,-2.9819,4.468573,-9.446117
559,2.3218,1.2918,5.390755,12.516256


# Instructions

- Instantiate a `sklearn.linear_model.LogisticRegression` model with the default configurations
- Fit the model with data columns properly selected
- Use `joblib.dump` to save the model to `./model/model.joblib`
- Also, use `.get_params`, `json.dump` to save the model configuration to  `./model/config.json`
- Print out all model parameters (not to be confused with model configuration)


In [3]:
model = sklearn.linear_model.LinearRegression()

In [4]:
model.fit(
    data_train[['feature_1', 'feature_2', 'feature_3']],
    data_train['label'],
)

In [5]:
joblib.dump(model, './model/model.joblib')

['./model/model.joblib']

In [6]:
with open('./model/config.json', 'w') as f:
    json.dump(model.get_params(), f, indent=4)

# Instructions

- Type the the model equation $\mathbb{E}[y|x_1, x_2, x_3]$ using the Markdown language 
- Print out thoses model parameters (not to be confused with model configuration)

$\mathbb{E}[y|x_1, x_2, x_3] = w_0 + w_1 x_1 + w_2 x_2 + w_3 x_3$

In [7]:
print(f"model.coef_     : {model.coef_}")
print(f"model.intercept_: {model.intercept_}")

model.coef_     : [0.11387919 0.00552581 0.08973625]
model.intercept_: 0.02776915700884651
