In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# Step 1: Load the diabetes dataset
diabetes = load_diabetes()
diabetes_df = pd.DataFrame(data=diabetes.data, columns=diabetes.feature_names)
diabetes_df['target'] = diabetes.target

diabetes_df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [2]:
diabetes.feature_names

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [3]:
# splitting the data into X and y
X = diabetes_df.iloc[:, :-1]
y = diabetes_df.iloc[:, -1]

In [4]:
# shuffling the data
def shuffle(X, y):
    np.random.seed(67)
    randomize = np.arange(len(X))
    np.random.shuffle(randomize)
    X = X.iloc[randomize]
    y = y.iloc[randomize]
    return X, y

X, y = shuffle(X, y)

In [5]:
# make sure that the labels and features are still matching after shuffling the data.
print(pd.DataFrame(X.head()))
print(pd.DataFrame(y.head()))

          age       sex       bmi        bp        s1        s2        s3  \
295 -0.052738  0.050680  0.039062 -0.040099 -0.005697 -0.012900  0.011824   
114  0.023546 -0.044642  0.110198  0.063187  0.013567 -0.032942 -0.024993   
197  0.048974  0.050680  0.003494  0.070072 -0.008449  0.013404 -0.054446   
255  0.001751 -0.044642 -0.065486 -0.005670 -0.007073 -0.019476  0.041277   
429 -0.041840 -0.044642 -0.033151 -0.022885  0.046589  0.041587  0.056003   

           s4        s5        s6  
295 -0.039493  0.016307  0.003064  
114  0.020655  0.099241  0.023775  
197  0.034309  0.013317  0.036201  
255 -0.039493 -0.003301  0.007207  
429 -0.024733 -0.025953 -0.038357  
     target
295    85.0
114   258.0
197   129.0
255   153.0
429    94.0


In [6]:
# Step 2: Split the dataset into train, dev, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_dev, X_test, y_dev, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [7]:
# initializing theta with random values
n = X_train.shape[1]
theta = np.random.normal(0, 0.1, (n,))

In [8]:
def compute_cost(Y_pred, Y_true):
    m = len(Y_true)
    cost = (1/2*m) * np.sum(np.square(Y_pred - Y_true)) # MSE
    return cost

#def multivariate_linear_regression(theta, input):
    #pred = np.dot(input, theta[1:]) + theta[0] * np.ones((input.shape[0], 1))
    #return pred

#def multivariate_linear_regression(theta, input):
    #theta = theta.reshape(-1, 1)
    #pred = np.dot(input, theta[1:]) + theta[0] * np.ones((input.shape[0], 1))
    #return pred


#def multivariate_linear_regression(theta, input):
    # Add a bias term to the input matrix
    bias_term = np.ones((input.shape[0], 1))
    input = np.hstack((bias_term, input))
    
    # Make sure theta is a column vector
    theta = theta.reshape(-1, 1)
    
    # Perform the matrix multiplication
    pred = np.dot(input, theta)
    
    return pred

def multivariate_linear_regression(theta, input):
    # Add a bias term to the input matrix
    bias_term = np.ones((input.shape[0], 1))
    input = np.hstack((bias_term, input))
    
    # Make sure theta is a column vector with n+1 elements
    theta = np.reshape(theta, (input.shape[1], 1))
    
    # Perform the matrix multiplication
    pred = np.dot(input, theta)
    
    return pred

In [9]:
print(theta.shape)
print(X_train.shape)

(10,)
(309, 10)


In [None]:
pred = multivariate_linear_regression(theta, X_train)
print(pred)

In [12]:
print(theta.shape)
print(X_train.shape)
print(y_train.shape)

(10,)
(309, 10)
(309,)


In [13]:
def gradient_descent(theta, X, Y_true, Y_pred, learning_rate, num_iterations):
    m = len(Y_true)
    cost_history = np.zeros(num_iterations)
    theta_history = np.zeros((num_iterations, n))
    for it in range(num_iterations):
        theta = theta - (1/m) * learning_rate * (X.T.dot((Y_pred - Y_true)))
        theta_history[it, :] = theta.T
        Y_pred = multivariate_linear_regression(theta, X)
        cost_history[it]  = compute_cost(Y_pred, Y_true)     
    return theta, cost_history, theta_history

# Apply gradient descent to the training data
theta = gradient_descent(theta, X_train, y_train, multivariate_linear_regression(theta, X_train), 0.01, 1000)[0]
theta

ValueError: shapes (309,11) and (10,1) not aligned: 11 (dim 1) != 10 (dim 0)

In [None]:
def gradient_descent(theta, X, Y_true, Y_pred, learning_rate):
    # calculate the number of samples in the training data
    m = len(Y_true)
    # update theta_0 using gradient descent
    theta[0] = theta[0] - learning_rate * (1/m) * np.sum(Y_pred - Y_true)
    # update theta_1 using gradient descent
    theta[1:] = theta[1:] - learning_rate * (1/m) * np.dot((Y_pred - Y_true), X)
    return theta


theta = gradient_descent(theta, X_train, y_train, multivariate_linear_regression(theta, X_train), 0.0001)
print(theta)

In [17]:
def normalize(X):
    mean = np.mean(X)
    std = np.std(X)
    X = (X - mean) / std
    return X, mean, std

X_train, mean, std = normalize(X_train)

In [None]:
def train(X, Y_true, theta, learning_rate, iterations):
    # normalize the features of X
    X, X_mean, X_std = normalize(X)
    # initialize an array to store the cost history for each iteration
    cost_history = np.zeros(iterations)
    for i in range(iterations):
        # get the predicted values for Y using the current theta
        Y_pred = multivariate_linear_regression(theta, X)
        # update theta using the gradient descent algorithm
        theta = gradient_descent(theta, X, Y_true, Y_pred, learning_rate)
        # store the cost for the current iteration
        cost_history[i] = compute_cost(Y_pred, Y_true)
    # return the updated theta, the cost history, and the normalization parameter
    return theta, cost_history, X_mean, X_std

def predict(X, theta, X_mean, X_std):
    # normalize the features of X using the normalization parameters
    X = (X - X_mean) / X_std
    # get the predicted values for Y using the updated theta
    Y_pred = multivariate_linear_regression(theta, X)
    # return the predictions
    return Y_pred

theta, cost_history, X_mean, X_std = train(X_train, y_train, theta, 0.0001, 1000)
pred = predict(X_test, theta, X_mean, X_std)

print("RMSE: ", np.sqrt(np.mean((pred - y_test) ** 2)))
print("MSE: ", np.mean((pred - y_test) ** 2))

In [None]:
compute_cost(pred, y_test)

In [None]:
# test the model on the dev set
pred = predict(X_dev[best_feature], theta, X_mean, X_std)
print("RMSE: ", np.sqrt(np.mean((pred - y_dev) ** 2)))
print("MSE: ", np.mean((pred - y_dev) ** 2))

In [None]:
compute_cost(pred, y_dev)