In [1]:
#TSK1
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
data = np.load('Data.npz')

# Extract features and target variable
X = data['x']
y = data['y']

# Preprocessing: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Feature Engineering: Create polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# Concatenate original and polynomial features
X_final = np.concatenate((X_scaled, X_poly), axis=1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest Regression model
random_forest = RandomForestRegressor()
random_forest.fit(X_train, y_train)

# Predict on the training set
y_pred_train = random_forest.predict(X_train)
mse_train = mean_squared_error(y_train, y_pred_train)
print("Random Forest Regression (Training Set): Mean Squared Error =", mse_train)




Random Forest Regression (Training Set): Mean Squared Error = 0.3072541789321716


In [2]:
#task 1c after applying lasso regression

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset
data = np.load('Data.npz')

# Extract features and target variable
X = data['x']
y = data['y']
# Feature Engineering: Create polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# Concatenate original and polynomial features
X_final = np.concatenate((X_scaled, X_poly), axis=1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest Regression model
random_forest = RandomForestRegressor()
random_forest.fit(X_train, y_train)

# Predict on the training set
y_pred_train = random_forest.predict(X_train)
mse_train = mean_squared_error(y_train, y_pred_train)
print("Random Forest Regression (Training Set): Mean Squared Error =", mse_train)
# Preprocessing: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define a pipeline for feature engineering and model training
pipeline = Pipeline([
    ('poly', PolynomialFeatures()),
    ('lasso', Lasso())
])

# Define hyperparameters grid for grid search
param_grid = {
    'poly__degree': [2, 3],
    'lasso__alpha': [0.0001, 0.001, 0.01, 0.1]
}

# Perform grid search
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best model
best_model = grid_search.best_estimator_

# Predict on the training set using the best model
y_pred_train_best = best_model.predict(X_train)
mse_train_best = mean_squared_error(y_train, y_pred_train_best)
print(" Mean Squared Error after lasso regresison as it is more sensitive increased mse =", mse_train_best)


Random Forest Regression (Training Set): Mean Squared Error = 0.301303066479766
 Mean Squared Error after lasso regresison as it is more sensitive increased mse = 1.9403378061739296


In [3]:
#task2
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Step 1: Data Preprocessing
X = data['x']  # Features
y = data['y']  # Target variable

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Model Training
decision_tree = DecisionTreeRegressor(random_state=42)
decision_tree.fit(X_train, y_train)



# Step 4: Tune Hyperparameters
param_grid = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(DecisionTreeRegressor(random_state=42), param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Step 5: Final Evaluation
best_decision_tree = grid_search.best_estimator_
best_decision_tree.fit(X, y)  # Retrain on entire dataset

# Evaluate on entire dataset
y_pred_full = best_decision_tree.predict(X)
mse_full = mean_squared_error(y, y_pred_full)
print("MSE on entire dataset:", mse_full)


Best Hyperparameters: {'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 5}
MSE on entire dataset: 0.8124073578788146


In [4]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Load the dataset
data = np.load("Data_bonus.npz")

# Extract features and targets
features = data['x']
targets = data['y']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Define a more complex neural network architecture
class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # Increase the number of neurons in the hidden layers
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Create an instance of the neural network
model = NeuralNet(input_size=X_train.shape[1])

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert data into PyTorch DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Training the model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)
    test_loss = criterion(y_pred.squeeze(), y_test_tensor)
    print(f"MSE IS {test_loss:.4f}")


Epoch 1/100, Loss: 0.7294
Epoch 2/100, Loss: 0.4125
Epoch 3/100, Loss: 0.3459
Epoch 4/100, Loss: 0.3056
Epoch 5/100, Loss: 0.2590
Epoch 6/100, Loss: 0.2198
Epoch 7/100, Loss: 0.1890
Epoch 8/100, Loss: 0.1497
Epoch 9/100, Loss: 0.1175
Epoch 10/100, Loss: 0.0961
Epoch 11/100, Loss: 0.0733
Epoch 12/100, Loss: 0.0577
Epoch 13/100, Loss: 0.0444
Epoch 14/100, Loss: 0.0432
Epoch 15/100, Loss: 0.0323
Epoch 16/100, Loss: 0.0265
Epoch 17/100, Loss: 0.0232
Epoch 18/100, Loss: 0.0216
Epoch 19/100, Loss: 0.0196
Epoch 20/100, Loss: 0.0190
Epoch 21/100, Loss: 0.0180
Epoch 22/100, Loss: 0.0149
Epoch 23/100, Loss: 0.0170
Epoch 24/100, Loss: 0.0170
Epoch 25/100, Loss: 0.0154
Epoch 26/100, Loss: 0.0142
Epoch 27/100, Loss: 0.0136
Epoch 28/100, Loss: 0.0147
Epoch 29/100, Loss: 0.0130
Epoch 30/100, Loss: 0.0138
Epoch 31/100, Loss: 0.0139
Epoch 32/100, Loss: 0.0136
Epoch 33/100, Loss: 0.0125
Epoch 34/100, Loss: 0.0122
Epoch 35/100, Loss: 0.0123
Epoch 36/100, Loss: 0.0143
Epoch 37/100, Loss: 0.0143
Epoch 38/1