In [1]:
# GPA Prediction using ANN & Linear Regression for Assignment
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam, RMSprop, Adagrad
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")
tf.config.run_functions_eagerly(True)

In [2]:
# 1. read csv
df = pd.read_csv('/content/Prodigy University Dataset.csv')

In [3]:
# 2. Feature-Label Split
X = df[['sat_sum', 'hs_gpa']]
y = df['fy_gpa']

In [4]:
# 3. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# 4. Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# 5. Linear Regression Model
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_pred = lr.predict(X_test_scaled)
lr_mae = np.mean(np.abs(lr_pred - y_test))
print(f"\n Linear Regression MAE: {lr_mae:.4f}")



 Linear Regression MAE: 0.4856


In [7]:
# 6. ANN Model Function
def build_model(optimizer):
    model = Sequential()
    model.add(Dense(10, input_dim=2, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

In [8]:
# 7. Optimizers as functions (no reuse)
optimizers = {
    "SGD": lambda: SGD(),
    "SGD with Momentum": lambda: SGD(momentum=0.9),
    "Adam": lambda: Adam(),
    "RMSprop": lambda: RMSprop(),
    "Adagrad": lambda: Adagrad()
}


In [9]:
# 8. GD Types (by batch size)
gd_types = {
    "Batch GD": len(X_train),  # Full batch
    "Stochastic GD": 1,        # One sample at a time
    "Mini-batch GD": 4         # Small batch
}


In [None]:
# 9. Train and Collect Results
results = []

for gd_name, batch_size in gd_types.items():
    for opt_name, opt_func in optimizers.items():
        print(f"\nTraining | GD: {gd_name} | Optimizer: {opt_name}")
        optimizer_instance = opt_func()
        model = build_model(optimizer_instance)
        history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=batch_size,
                            verbose=0, validation_data=(X_test_scaled, y_test))
        loss, mae = model.evaluate(X_test_scaled, y_test, verbose=0)
        results.append((gd_name, opt_name, mae))



Training | GD: Batch GD | Optimizer: SGD

Training | GD: Batch GD | Optimizer: SGD with Momentum

Training | GD: Batch GD | Optimizer: Adam

Training | GD: Batch GD | Optimizer: RMSprop

Training | GD: Batch GD | Optimizer: Adagrad

Training | GD: Stochastic GD | Optimizer: SGD

Training | GD: Stochastic GD | Optimizer: SGD with Momentum

Training | GD: Stochastic GD | Optimizer: Adam


In [None]:
# 10. Results DataFrame
results_df = pd.DataFrame(results, columns=["GD_Type", "Optimizer", "MAE"])
print("\nANN MAE Results:")
print(results_df)


In [None]:
# 11. Compare with Linear Regression
plt.figure(figsize=(12, 6))
for gd in results_df['GD_Type'].unique():
    subset = results_df[results_df['GD_Type'] == gd]
    plt.plot(subset['Optimizer'], subset['MAE'], marker='o', label=gd)

plt.axhline(lr_mae, color='red', linestyle='--', label=f"Linear Regression MAE: {lr_mae:.4f}")
plt.title("📈 MAE Comparison: ANN GD & Optimizers vs Linear Regression")
plt.xlabel("Optimizer")
plt.ylabel("MAE (Lower is Better)")
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.grid(True)
plt.show()
