<a href="https://colab.research.google.com/github/sabhinav3/Intro_to_ML/blob/main/assignment_5_2_c.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
data = pd.read_csv("/content/Housing.csv")



In [2]:
# Preprocess the data by dropping unnecessary columns
columns_to_drop = ["mainroad", "guestroom", "basement", "hotwaterheating", "airconditioning", "prefarea", "furnishingstatus"]
data.drop(columns=columns_to_drop, inplace=True)

# Split the data into features and targets
X = data[['area', 'bedrooms', 'bathrooms', 'stories', 'parking']]
y = data['price'].values.reshape(-1, 1)  # Reshape if y is a 1D array


In [3]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)



In [5]:
# Initialize variables to store the best model and its performance
best_model = None
best_val_loss = float('inf')

# Loop through different learning rates
learning_rates = [0.1, 0.01, 0.001,0.0001]
for learning_rate in learning_rates:
    # Create and train the model
    model = LinearRegression()
      # Update the model using the entire dataset
    model.fit(X_train_scaled, y_train)
    # Training loop
    for epoch in range(5001):
        # Predict and evaluate the model
        y_train_pred = model.predict(X_train_scaled)
        y_val_pred = model.predict(X_val_scaled)

        # Calculate performance metrics every 500 epochs
        if epoch % 500 == 0:
            train_mse = mean_squared_error(y_train, y_train_pred)
            val_mse = mean_squared_error(y_val, y_val_pred)
            train_r2 = r2_score(y_train, y_train_pred)
            val_r2 = r2_score(y_val, y_val_pred)

            print(f"\nLearning Rate: {learning_rate}, Epoch: {epoch}")
            print(f"Training MSE: {train_mse:.2f}, Training R²: {train_r2:.4f}")
            print(f"Validation MSE: {val_mse:.2f}, Validation R²: {val_r2:.4f}")

            # Check if the current model has lower validation loss
            if val_mse < best_val_loss:
                best_val_loss = val_mse
                best_model = model



Learning Rate: 0.1, Epoch: 0
Training MSE: 1350008211326.58, Training R²: 0.5622
Validation MSE: 2292721545725.37, Validation R²: 0.5464

Learning Rate: 0.1, Epoch: 500
Training MSE: 1350008211326.58, Training R²: 0.5622
Validation MSE: 2292721545725.37, Validation R²: 0.5464

Learning Rate: 0.1, Epoch: 1000
Training MSE: 1350008211326.58, Training R²: 0.5622
Validation MSE: 2292721545725.37, Validation R²: 0.5464

Learning Rate: 0.1, Epoch: 1500
Training MSE: 1350008211326.58, Training R²: 0.5622
Validation MSE: 2292721545725.37, Validation R²: 0.5464

Learning Rate: 0.1, Epoch: 2000
Training MSE: 1350008211326.58, Training R²: 0.5622
Validation MSE: 2292721545725.37, Validation R²: 0.5464

Learning Rate: 0.1, Epoch: 2500
Training MSE: 1350008211326.58, Training R²: 0.5622
Validation MSE: 2292721545725.37, Validation R²: 0.5464

Learning Rate: 0.1, Epoch: 3000
Training MSE: 1350008211326.58, Training R²: 0.5622
Validation MSE: 2292721545725.37, Validation R²: 0.5464

Learning Rate: 0

In [6]:
# information about the best model
if best_model:
    print("\nBest model setup:")
    print(f"Best Validation Loss: {best_val_loss:.4f}")
    print(f"Training Loss: {train_mse:.4f}")
    print(f"R² (R-squared) Validation: {val_r2:.4f} (higher is better)")
    # You can also print other information about the best model, e.g., learning rate, epoch, etc.
else:
    print("No results to report.")


Best model setup:
Best Validation Loss: 2292721545725.3667
Training Loss: 1350008211326.5803
R² (R-squared) Validation: 0.5464 (higher is better)
