In [None]:
import boto3
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Defining the s3 bucket
s3 = boto3.resource('s3')
bucket_name = 'data-445'
bucket = s3.Bucket(bucket_name)

## Defining the file to be read from s3 bucket
file_key = 'Chapter3/housing.csv'

bucket_object = bucket.Object(file_key)
file_object = bucket_object.get()
file_content_stream = file_object.get('Body')

## Reading the csv file
housing = pd.read_csv(file_content_stream)
housing.head()

In [None]:
## Defining the input and target
X = housing[['RM', 'LSTAT', 'PTRATIO']]
Y = housing['MEDV']

## Splitting the data into train and validation
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.5, shuffle = True)

## Importing LinearRegression
from sklearn.linear_model import LinearRegression

## Fitting a simple linear regression model on train sets
lm_md = LinearRegression().fit(X_train, Y_train)

## Predicting on validation set
val_preds = lm_md.predict(X_val)

## Comparing predictions and actuals (via mse)
mse = np.mean((Y_val - val_preds)**2)
mse

In [None]:
## Defining the input and target
X = housing[['RM', 'LSTAT', 'PTRATIO']]
Y = housing['MEDV']

## Importing LeaveOneOut and LinearRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.linear_model import LinearRegression

## Defining list to store results
y_preds = []

cv = LeaveOneOut()

for train_ix, test_ix in cv.split(X):
    
    ## Splitting data into train and validation
    X_train, X_val = X.loc[train_ix], X.loc[test_ix]
    Y_train, Y_val = Y.loc[train_ix], Y.loc[test_ix]
    
    ## Fitting the linear regression model
    lm_md = LinearRegression().fit(X_train, Y_train)
    
    ## Predicting on validation set 
    val_pred = lm_md.predict(X_val)
    
    ## Storing results
    y_preds.append(val_pred[0])

## Comparing predictions and actuals (via mse)
mse = np.mean((y_preds - Y)**2)
mse

In [None]:
## Defining the input and target
X = housing[['RM', 'LSTAT', 'PTRATIO']]
Y = housing['MEDV']

## Importing LeaveOneOut and LinearRegression
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression

## Defining list to store results
k_mse = []

kfold = KFold(n_splits = 5, shuffle = True)

for train_ix, test_ix in kfold.split(X):
    
    ## Splitting data into train and validation
    X_train, X_val = X.loc[train_ix], X.loc[test_ix]
    Y_train, Y_val = Y.loc[train_ix], Y.loc[test_ix]
    
    ## Fitting the linear regression model
    lm_md = LinearRegression().fit(X_train, Y_train)
    
    ## Predicting on validation set 
    val_preds = lm_md.predict(X_val)
    
    ## Computing the MSE of the k-fold
    k_mse.append(np.mean((val_preds - Y_val)**2))

## Computing the average MSE
mse = np.mean(k_mse)