In [7]:
import numpy as np

# data and splitting
data = np.genfromtxt('./Admission_Predict_Ver1.1.csv.csv', delimiter=',', skip_header=1)
split_ratio = 0.8
split_index = int(split_ratio * len(data))
train_data = data[:split_index]
test_data = data[split_index:]


X_train = train_data[:, 1:-1]
y_train = train_data[:, -1]

X_test = test_data[:, 1:-1]
y_test = test_data[:, -1]

#normalization
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std


X_train = np.column_stack((np.ones(X_train.shape[0]), X_train))
X_test = np.column_stack((np.ones(X_test.shape[0]), X_test))

#hyperparameters

learning_rate = 0.1
num_iterations = 100

num_features = X_train.shape[1]
weights = np.zeros(num_features)

def calculate_mse(X, y, weights):
    predictions = np.dot(X, weights)
    squared_errors = (predictions - y) ** 2
    mse = np.mean(squared_errors)
    return mse

def calculate_gradient(X, y, weights):
    predictions = np.dot(X, weights)
    gradient = np.dot(X.T, (predictions - y)) / len(y)
    return gradient

for i in range(num_iterations):
    gradient = calculate_gradient(X_train, y_train, weights)
    weights -= learning_rate * gradient

# prediction test-data
y_pred = np.dot(X_test, weights)

# SSE, MSE, and R2 on test data
sse = calculate_mse(X_test, y_test, weights) * len(y_test)
mse = calculate_mse(X_test, y_test, weights)
y_mean = np.mean(y_test)
sst = np.sum((y_test - y_mean) ** 2)
r2 = 1 - (sse / sst)

y_train_pred = np.dot(X_train, weights)

# SSE, MSE, and R2 scores for the train
sse_train = calculate_mse(X_train, y_train, weights) * len(y_train)
mse_train = calculate_mse(X_train, y_train, weights)
y_train_mean = np.mean(y_train)
sst_train = np.sum((y_train - y_train_mean) ** 2)
r2_train = 1 - (sse_train / sst_train)


# Answers
print("Coefficients for the  optimised model are :\n", weights)
print("SSE -----> ", sse)
print("Train Set SSE ----->", sse_train)
print("MSE ------>", mse)
print("Train Set MSE ------>", mse_train)
print("R2 Score ------>", r2)
print("Train Set R2 Score ----->",r2_train)

Coefficients for the  optimised model are :
 [ 0.72433076  0.02395506  0.02020956  0.00750254 -0.00162662  0.02057688
  0.06191103  0.01187439]
SSE ----->  0.18389109419579386
Train Set SSE -----> 1.6011492669088456
MSE ------> 0.0018389109419579385
Train Set MSE ------> 0.004002873167272114
R2 Score ------> 0.8985221850982109
Train Set R2 Score -----> 0.8026836627680487
