# Introduction to Machine Learning: Task 1b

__Author__: Jannick Sicher

### Initial Configurations and Packages

In [37]:
# Load packages
import numpy as np
import matplotlib.pyplot as plot
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn import datasets, linear_model
from sklearn.model_selection import GridSearchCV

In [38]:
# Load data
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,Id,y,x1,x2,x3,x4,x5
0,0,-5.522114,1.764052,0.400157,0.978738,2.240893,1.867558
1,1,-21.78998,-0.977278,0.950088,-0.151357,-0.103219,0.410599
2,2,-7.911497,0.144044,1.454274,0.761038,0.121675,0.443863
3,3,-3.698062,0.333674,1.494079,-0.205158,0.313068,-0.854096
4,4,-16.001833,-2.55299,0.653619,0.864436,-0.742165,2.269755


### Define Target and Predictor Variables

In [39]:
# Define Target Variable
y = df["y"] # define the target variable (dependent variable) as y

# Define Predictor Variables
x1 = df["x1"]
x2 = df["x2"]
x3 = df["x3"]
x4 = df["x4"]
x5 = df["x5"]

# Define all Variable Transformations
Xs = np.column_stack((x1, x2, x3, x4, x5, x1**2, x2**2, x3**2, x4**2, x5**2,
                       np.exp(x1),np.exp(x2),np.exp(x3),np.exp(x4),np.exp(x5), 
                        np.cos(x1),np.cos(x2),np.cos(x3),np.cos(x4),np.cos(x5),
                        np.ones(df.shape[0])))

###  Linear Regression

In [40]:
# Fit linear regression model
model = linear_model.LinearRegression(fit_intercept = False, normalize = False)
parameters = {'fit_intercept':[False], 'normalize':[False], 'copy_X':[False]}

# Build Linear Model with GridsearchCV
grid = GridSearchCV(model, parameters, cv = 10)
grid.fit(Xs, y)

GridSearchCV(cv=10, error_score='raise',
       estimator=LinearRegression(copy_X=True, fit_intercept=False, n_jobs=1, normalize=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'fit_intercept': [False], 'normalize': [False], 'copy_X': [False]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [41]:
## Access and store the best estimators
coefficients = grid.best_estimator_.coef_
submission = pd.DataFrame(data = coefficients, columns = None)
submission.to_csv('submission_Linear.csv', header = False, index = False)
submission

Unnamed: 0,0
0,0.621003
1,-1.880502
2,0.384095
3,-0.441386
4,0.386888
5,-0.361083
6,0.391405
7,0.175731
8,-2.719074
9,2.234341


###  Ridge Regression with GridSearchCV

In [42]:
# Define Regularization Parameters
alpha = np.arange(0,1000, 1)
param_grid = dict(alpha = alpha)

# Ridge Regression
ridge = Ridge(fit_intercept = False, normalize = False)

ridge_regressor = GridSearchCV(ridge, param_grid,
                               scoring = 'neg_mean_squared_error', cv = 50)

ridge_regressor.fit(Xs, y)

GridSearchCV(cv=50, error_score='raise',
       estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=False, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'alpha': array([  0,   1, ..., 998, 999])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=0)

In [47]:
## Access and store the best estimators
coefficients = ridge_regressor.best_estimator_.coef_
submission = pd.DataFrame(data = coefficients, columns = None)
submission.to_csv('submission_Ridge_GridSearch.csv', header = False, index = False)
submission

Unnamed: 0,0
0,0.940576
1,-0.508385
2,-0.010886
3,0.355657
4,-0.045356
5,-0.994205
6,0.13821
7,-0.40317
8,-0.394762
9,0.557236


###  Ridge Regression with RidgeCV

In [44]:
# Define alpha values
alpha = np.arange(0,1000, 1)

# RidgeCV regression
reg = linear_model.RidgeCV(alphas=alpha, cv=50, fit_intercept=False, scoring=None, normalize=False)
reg.fit(Xs, y)
weights = reg.coef_

In [48]:
submission = pd.DataFrame(data = weights, columns=None)
submission.to_csv('submission_Ridge_RidgeCV.csv', header = False, index = False)
submission

Unnamed: 0,0
0,0.856919
1,-0.333958
2,-0.064776
3,0.296246
4,-0.044689
5,-0.822274
6,0.093264
7,-0.428551
8,-0.320046
9,0.43066
