In [54]:
import pandas as pd
data = pd.read_csv('/content/drive/MyDrive/student.csv')
print(data)

     Math  Reading  Writing
0      48       68       63
1      62       81       72
2      79       80       78
3      76       83       79
4      59       64       62
..    ...      ...      ...
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72

[1000 rows x 3 columns]


In [55]:
print("Top 5 rows of dataset:")
print(data.head())

print("Top 5 columns of dataset:")
print(data.tail())

Top 5 rows of dataset:
   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62
Top 5 columns of dataset:
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72


In [56]:
print("Dataset information")
data.info()

Dataset information
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Math     1000 non-null   int64
 1   Reading  1000 non-null   int64
 2   Writing  1000 non-null   int64
dtypes: int64(3)
memory usage: 23.6 KB


In [57]:
print("descriptive info:")
print(data.describe())

descriptive info:
              Math      Reading      Writing
count  1000.000000  1000.000000  1000.000000
mean     67.290000    69.872000    68.616000
std      15.085008    14.657027    15.241287
min      13.000000    19.000000    14.000000
25%      58.000000    60.750000    58.000000
50%      68.000000    70.000000    69.500000
75%      78.000000    81.000000    79.000000
max     100.000000   100.000000   100.000000


In [58]:
X = data[['Math', 'Reading']]
Y = data['Writing']

print("Feature X :\n", X.head())
print("Label Y :\n", Y.head())

Feature X :
    Math  Reading
0    48       68
1    62       81
2    79       80
3    76       83
4    59       64
Label Y :
 0    63
1    72
2    78
3    79
4    62
Name: Writing, dtype: int64


In [59]:
import numpy as np

d = 2
n = 5

W = np.random.rand(d, 1)
X_synthetic = np.random.rand(d, n)

Y_synthetic = np.dot(W.T, X_synthetic)

print("Weight Vector (W):\n", W)
print("\nFeature Matrix (X):\n", X_synthetic)
print("\nPredicted Values (Y):\n", Y_synthetic)

Weight Vector (W):
 [[0.94530153]
 [0.86948853]]

Feature Matrix (X):
 [[0.4541624  0.32670088 0.23274413 0.61446471 0.03307459]
 [0.01560606 0.42879572 0.06807407 0.25194099 0.22116092]]

Predicted Values (Y):
 [[0.4428897  0.68166381 0.27920301 0.79991423 0.22356234]]


In [60]:
from sklearn.model_selection import train_test_split

X = data[['Math', 'Reading']]
Y = data['Writing']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("Y_train shape:", Y_train.shape)
print("Y_test shape:", Y_test.shape)

X_train shape: (700, 2)
X_test shape: (300, 2)
Y_train shape: (700,)
Y_test shape: (300,)


In [61]:
import numpy as np

def cost_function(X, Y, W):
    ypred = np.dot(X, W)

    squared_error = np.square(ypred - Y)

    n = len(Y)
    cost = np.sum(squared_error) / (2 * n)

    return cost


In [62]:
X_test = np.array([[1, 2], [3, 4], [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])

cost = cost_function(X_test, Y_test, W_test)

if cost == 0:
    print("Proceed Further")
else:
    print("Something went wrong: Reimplement the cost function")

print("Cost function output:", cost)


Proceed Further
Cost function output: 0.0


In [63]:
def gradient_descent(X, Y, W, alpha, iterations):
    cost_history = []
    m = len(Y)

    for _ in range(iterations):
        Y_pred = np.dot(X, W)

        loss = Y_pred - Y

        dw = (1 / m) * np.dot(X.T, loss)

        W -= alpha * dw

        cost = cost_function(X, Y, W)
        cost_history.append(cost)

    return W, cost_history

In [64]:
np.random.seed(0)
X = np.random.rand(100, 3)
Y = np.random.rand(100)
W = np.random.rand(3)

alpha = 0.01
iterations = 1000

final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)

print("Final Parameters:", final_params)
print("Cost History:", cost_history)

Final Parameters: [0.20551667 0.54295081 0.10388027]
Cost History: [0.10711197094660153, 0.10634880599939901, 0.10559826315680616, 0.10486012948320558, 0.1041341956428534, 0.10342025583900626, 0.1027181077540776, 0.1020275524908062, 0.10134839451441931, 0.1006804415957737, 0.1000235047554587, 0.09937739820884377, 0.09874193931205609, 0.09811694850887098, 0.09750224927850094, 0.0968976680842672, 0.09630303432313951, 0.09571818027612913, 0.09514294105952065, 0.09457715457692842, 0.09402066147216397, 0.09347330508290015, 0.09293493139511913, 0.09240538899833017, 0.09188452904154543, 0.0913722051899995, 0.09086827358260123, 0.09037259279010502, 0.08988502377398917, 0.08940542984603007, 0.08893367662855953, 0.08846963201539432, 0.08801316613342668, 0.08756415130486386, 0.08712246201010665, 0.08668797485125507, 0.08626056851623205, 0.08584012374351278, 0.08542652328745133, 0.08501965188419301, 0.0846193962181636, 0.08422564488912489, 0.08383828837978763, 0.08345721902397185, 0.08308233097530

In [65]:
import numpy as np

def rmse(Y, Y_pred):

    rmse = np.sqrt(np.mean((Y - Y_pred) ** 2))
    return rmse

In [66]:
import numpy as np

def r2(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)  # Total Sum of Squares
    ss_res = np.sum((Y - Y_pred) ** 2)  # Sum of Squared Residuals
    r2 = 1 - (ss_res / ss_tot)  # Coefficient of Determination
    return r2

In [67]:
def main():
    W = np.zeros(X_train.shape[1])
    alpha = 0.00001
    iterations = 1000

    W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)

    Y_pred = np.dot(X_test, W_optimal)

    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2(Y_test, Y_pred)

    print("Final Weights:", W_optimal)
    print("Cost History (First 10 iterations):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)

if __name__ == "__main__":
    main()

Final Weights: [0.34973248 0.64484523]
Cost History (First 10 iterations): [2011.9142554734751, 1639.7141350925601, 1336.957481353757, 1090.6876393991413, 890.3653872086318, 727.4178946799879, 594.871837233527, 487.0549625191084, 399.35333835029707, 328.0138929475419]
RMSE on Test Set: 3.749953052183746
R-Squared on Test Set: -0.31832636502333056


Did your Model Overfitt, Underfitts, or performance is acceptable.
- The model underfits as the negative 𝑅^2 (-0.318) and high RMSE (3.75) show poor prediction accuracy. A low learning rate limits improvement, and better feature selection or tuning is needed.

In [68]:
def gradient_descent(X, Y, learning_rate=0.01, iterations=1000, epsilon=1e-10):
    m = len(Y)
    W = np.zeros(X.shape[1])
    cost_history = []

    for i in range(iterations):
        predictions = np.dot(X, W)
        error = predictions - Y

        if np.any(np.abs(error) > 1e10):
            print("Warning: Large error values detected. Stopping gradient descent.")
            break

        gradient = (1/m) * np.dot(X.T, error)
        W -= learning_rate * gradient

        cost = (1/(2*m)) * np.sum(error ** 2)

        if np.isnan(cost):
            print("Warning: Cost became NaN. Stopping gradient descent.")
            break

        cost_history.append(cost)

    return W, cost_history

learning_rates = [0.0001, 0.001, 0.01, 0.1, 1]
results = {}

for lr in learning_rates:
    W_optimal, _ = gradient_descent(X_train.values, Y_train.values, learning_rate=lr, iterations=1000)
    Y_pred = np.dot(X_test, W_optimal)
    Y_pred = np.nan_to_num(Y_pred, nan=0.0)
    rmse = np.sqrt(mean_squared_error(Y_test, Y_pred))
    r2 = r2_score(Y_test, Y_pred)
    results[lr] = {'RMSE': rmse, 'R²': r2}

for lr, result in results.items():
    print(f"Learning Rate: {lr}")
    print(f"RMSE: {result['RMSE']}")
    print(f"R²: {result['R²']}")
    print('-' * 40)


Learning Rate: 0.0001
RMSE: 3.557387030877995
R²: -0.1864064831992771
----------------------------------------
Learning Rate: 0.001
RMSE: 1237496099.6745336
R²: -1.4356843094153283e+17
----------------------------------------
Learning Rate: 0.01
RMSE: 33406535864.02825
R²: -1.046246848532443e+20
----------------------------------------
Learning Rate: 0.1
RMSE: 3642529393.069695
R²: -1.2438769105665638e+18
----------------------------------------
Learning Rate: 1
RMSE: 3652574048702.0586
R²: -1.2507466107423515e+24
----------------------------------------
