<a href="https://colab.research.google.com/github/np03cs4a240319-eng/Concept-and-technology-of-AI/blob/main/divyanikhatri_worksheet5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


In [2]:
# Load dataset
data = pd.read_csv("student.csv")

# Top 5 rows
print("Top 5 rows:")
print(data.head())

# Bottom 5 rows
print("\nBottom 5 rows:")
print(data.tail())

# Dataset info
print("\nDataset Info:")
print(data.info())

# Descriptive statistics
print("\nDataset Description:")
print(data.describe())

# Split features and target
X = data[['Math', 'Reading']].values
Y = data['Writing'].values


Top 5 rows:
   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62

Bottom 5 rows:
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Math     1000 non-null   int64
 1   Reading  1000 non-null   int64
 2   Writing  1000 non-null   int64
dtypes: int64(3)
memory usage: 23.6 KB
None

Dataset Description:
              Math      Reading      Writing
count  1000.000000  1000.000000  1000.000000
mean     67.290000    69.872000    68.616000
std      15.085008    14.657027    15.241287
min      13.000000    19.000000    14.000000
25%      58.000000    60.750000    58.000000
50%      68.

In [3]:
# Feature matrix X (d x n will be handled using transpose if needed)
# Weight vector W (d,)
W = np.zeros(X.shape[1])

print("Feature Matrix Shape:", X.shape)
print("Target Vector Shape:", Y.shape)
print("Weight Vector Shape:", W.shape)

Feature Matrix Shape: (1000, 2)
Target Vector Shape: (1000,)
Weight Vector Shape: (2,)


In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

Training samples: 800
Testing samples: 200


In [7]:
import numpy as np

def cost_function(X, y, w):
    m = X.shape[0]
    predictions = X.dot(w)
    errors = predictions - y
    cost = (1 / (2 * m)) * np.sum(errors ** 2)
    return cost

X_test = np.array([[1, 2],
                   [3, 4],
                   [5, 6]])

Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])

cost = cost_function(X_test, Y_test, W_test)
print("Cost:", cost)


Cost: 0.0


In [8]:
X_test_case = np.array([[1, 2], [3, 4], [5, 6]])
Y_test_case = np.array([3, 7, 11])
W_test_case = np.array([1, 1])

cost = cost_function(X_test_case, Y_test_case, W_test_case)

if cost == 0:
    print("Proceed Further")
else:
    print("Something went wrong")

print("Cost function output:", cost)

Proceed Further
Cost function output: 0.0


In [10]:
import numpy as np

# Cost function (Mean Squared Error)
def cost_function(X, y, w):
    m = X.shape[0]
    predictions = X.dot(w)
    errors = predictions - y
    cost = (1 / (2 * m)) * np.sum(errors ** 2)
    return cost

# Gradient Descent function
def gradient_descent(X, y, w, alpha, iterations):
    m = X.shape[0]
    cost_history = []

    for _ in range(iterations):
        predictions = X.dot(w)
        errors = predictions - y

        gradient = (1 / m) * X.T.dot(errors)
        w = w - alpha * gradient

        cost = cost_function(X, y, w)
        cost_history.append(cost)

    return w, cost_history

# Dummy data
np.random.seed(0)
X = np.random.rand(100, 3)
Y = np.random.rand(100)
W = np.zeros(3)

alpha = 0.01
iterations = 100

# Call function
W_final, cost_history = gradient_descent(X, Y, W, alpha, iterations)

# Print output
print("Final Weights:", W_final)
print("Cost History (first 5):", cost_history[:5])
print("Cost History (last):", cost_history[-1])



Final Weights: [0.15296994 0.16475757 0.16080411]
Cost History (first 5): [np.float64(0.15059661786297981), np.float64(0.14896115898591414), np.float64(0.14735331507172653), np.float64(0.1457726192196863), np.float64(0.14421861242432132)]
Cost History (last): 0.07164071016747253


In [11]:
np.random.seed(0)

X_rand = np.random.rand(100, 3)
Y_rand = np.random.rand(100)
W_rand = np.random.rand(3)

alpha = 0.01
iterations = 1000

final_params, cost_history = gradient_descent(X_rand, Y_rand, W_rand, alpha, iterations)

print("Final Parameters:", final_params)
print("Cost History (last 5):", cost_history[-5:])


Final Parameters: [0.20551667 0.54295081 0.10388027]
Cost History (last 5): [np.float64(0.05436764093665037), np.float64(0.054364452919908414), np.float64(0.05436127052403898), np.float64(0.05435809373901896), np.float64(0.05435492255484332)]


In [14]:
import numpy as np

def rmse(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

Y = np.array([3, 5, 7])
Y_pred = np.array([2, 5, 8])

error = rmse(Y, Y_pred)
print("RMSE:", error)


RMSE: 0.816496580927726


In [16]:
import numpy as np

Y = np.array([3, 5, 7, 9])
Y_pred = np.array([2.5, 5.1, 6.8, 9.2])

ss_res = np.sum((Y - Y_pred) ** 2)
ss_tot = np.sum((Y - np.mean(Y)) ** 2)

r2 = 1 - (ss_res / ss_tot)
print("R² Score:", r2)



R² Score: 0.983


In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# ---------- Helper Functions ----------

def compute_cost(X, y, W):
    m = len(y)
    predictions = np.dot(X, W)
    return (1 / (2 * m)) * np.sum((predictions - y) ** 2)

def gradient_descent(X, y, W, alpha, iterations):
    m = len(y)
    cost_history = []

    for _ in range(iterations):
        predictions = np.dot(X, W)
        gradients = (1 / m) * np.dot(X.T, (predictions - y))
        W -= alpha * gradients
        cost_history.append(compute_cost(X, y, W))

    return W, cost_history

def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

def r2_score(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - (ss_res / ss_tot)

# ---------- Main Function ----------

def main():
    # Load data
    data = pd.read_csv("student.csv")

    # Features and target
    X = data[['Math', 'Reading']].values
    Y = data['Writing'].values

    # Train-test split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2, random_state=42
    )

    # Initialize parameters
    W = np.zeros(X_train.shape[1])
    alpha = 0.0001
    iterations = 1000

    # Train model
    W_optimal, cost_history = gradient_descent(
        X_train, Y_train, W, alpha, iterations
    )

    # Predictions
    Y_pred = np.dot(X_test, W_optimal)

    # Evaluation
    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2_score(Y_test, Y_pred)

    # Output
    print("Final Weights:", W_optimal)
    print("Cost History (First 10):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)

# Run
if __name__ == "__main__":
    main()


Final Weights: [0.0894932  0.89504864]
Cost History (First 10): [np.float64(17.813797177522098), np.float64(16.983149024878305), np.float64(16.925140245010397), np.float64(16.867870818076216), np.float64(16.811093513105355), np.float64(16.754804026075387), np.float64(16.69899816573971), np.float64(16.64367177688582), np.float64(16.588820740001896), np.float64(16.53444097097003)]
RMSE on Test Set: 4.792607360540954
R-Squared on Test Set: 0.908240340333986


1.Model Performance
The linear regression model was evaluated using RMSE and R² score.

The error values are low.

The R² score is reasonably high.

Conclusion: The model does not overfit or underfit. Its performance is acceptable.

2.Learning Rate Experiment
Different learning rates were tested.

High learning rate: Model becomes unstable and does not converge.

Very low learning rate: Learning is very slow.

Small learning rate (0.00001): Stable learning and best result.

Conclusion: A small learning rate gives the best and most stable performance.

Final Conclusion

The linear regression model works correctly when a suitable learning rate is used.