In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt

In [None]:
data1=pd.read_csv('/content/drive/MyDrive/backpack_train.csv')
data2=pd.read_csv('/content/drive/MyDrive/backback_train_extra.csv')

In [None]:
data=pd.concat([data1,data2],ignore_index=True)

In [None]:
data.shape

(3994318, 11)

In [None]:
data=data.dropna()

In [None]:
data.shape

(3288503, 11)

In [None]:
cat_cols=data.select_dtypes(include='object').columns
print(cat_cols)

Index(['Brand', 'Material', 'Size', 'Laptop Compartment', 'Waterproof',
       'Style', 'Color'],
      dtype='object')


In [None]:
label_encoders = {}
categorical_cols = ['Brand', 'Material', 'Style','Color']
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    # predict_data[col]=le.transform(predict_data[col])
    label_encoders[col] = le  # Store for inverse transformation if needed

# Binary Encoding for Yes/No features
data['Laptop Compartment'] = data['Laptop Compartment'].map({'Yes': 1, 'No': 0})
# predict_data['Laptop Compartment'] = predict_data['Laptop Compartment'].map({'Yes': 1, 'No': 0})

data['Waterproof'] = data['Waterproof'].map({'Yes': 1, 'No': 0})
# predict_data['Waterproof'] = predict_data['Waterproof'].map({'Yes': 1, 'No': 0})

data['Size']=data['Size'].map({'Small':0,'Medium':1,'Large':2})
# predict_data['Size']=predict_data['Size'].map({'Small':0,'Medium':1,'Large':2})

In [None]:
label_encoders

{'Brand': LabelEncoder(),
 'Material': LabelEncoder(),
 'Style': LabelEncoder(),
 'Color': LabelEncoder()}

In [None]:
data.drop('id', axis=1, inplace=True)
# predict_data.drop('id', axis=1, inplace=True)

In [None]:
# Assign last column to y
y = data.iloc[:, -1]
# Assign remaining columns to X
x = data.iloc[:, :-1]

In [None]:
numerical_cols = [x for x in x.columns]
print(numerical_cols,len(numerical_cols))

['Brand', 'Material', 'Size', 'Compartments', 'Laptop Compartment', 'Waterproof', 'Style', 'Color', 'Weight Capacity (kg)'] 9


In [None]:
for i in x.columns:
  print(f"{i} : {x[i].unique()}")

Brand : [1 4 2 0 3]
Material : [1 0 2 3]
Size : [1 0 2]
Compartments : [ 7. 10.  2.  8.  1.  5.  3.  6.  4.  9.]
Laptop Compartment : [1 0]
Waterproof : [0 1]
Style : [2 1 0]
Color : [0 3 5 1 2 4]
Weight Capacity (kg) : [11.61172281 27.07853658 16.64375995 ...  9.54895871 12.79080004
 16.64173875]


In [None]:
scaler = MinMaxScaler()
x[numerical_cols] = scaler.fit_transform(x[numerical_cols])
# predict_data[numerical_cols]=scaler.transform(predict_data[numerical_cols])

In [None]:
for i in x.columns:
  print(f"{i} : {x[i].unique()}")

Brand : [0.25 1.   0.5  0.   0.75]
Material : [0.33333333 0.         0.66666667 1.        ]
Size : [0.5 0.  1. ]
Compartments : [0.66666667 1.         0.11111111 0.77777778 0.         0.44444444
 0.22222222 0.55555556 0.33333333 0.88888889]
Laptop Compartment : [1. 0.]
Waterproof : [0. 1.]
Style : [1.  0.5 0. ]
Color : [0.  0.6 1.  0.2 0.4 0.8]
Weight Capacity (kg) : [0.26446891 0.88314146 0.4657504  ... 0.18195835 0.311632   0.46566955]


In [None]:
x_torch=torch.tensor(x.values,dtype=torch.float32)
y_torch=torch.tensor(y.values,dtype=torch.float32)
# predict_data_torch=torch.tensor(predict_data.values,dtype=torch.float32)

In [None]:
print(x_torch.shape)
print(y_torch.shape)

torch.Size([3288503, 9])
torch.Size([3288503])


In [None]:
batch_size = 512  # Reduce further if memory issues persist
train_dataset = TensorDataset(x_torch, y_torch)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

In [None]:
class PricePredictionNN(nn.Module):
    def __init__(self, input_size):
        super(PricePredictionNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 18)
        self.fc2 = nn.Linear(18, 12)
        self.fc3 = nn.Linear(12, 12)
        self.fc4 = nn.Linear(12, 1)# Output layer

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x=F.relu(self.fc3(x))
        x = self.fc4(x)  # No activation (regression task)
        return x

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_torch, y_torch, test_size=0.2, random_state=42)

In [None]:
batch_size = 512  # Reduce further if memory issues persist
train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

In [None]:
class PricePredictionNN(nn.Module):
    def __init__(self, input_size):
        super(PricePredictionNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1) # Output layer

        self.dropout = nn.Dropout(0.2)  # Dropout layer with probability 0.2

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout
        x = F.relu(self.fc2(x))
        x = self.dropout(x)  # Apply dropout
        x = F.relu(self.fc3(x)) # No activation (Regression task)
        return x

In [None]:
class RMSELoss(nn.Module):
    def forward(self, predictions, targets):
        mse = F.mse_loss(predictions, targets)
        return torch.sqrt(mse)

In [None]:
# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Initialize model
input_size = x_torch.shape[1]
model = PricePredictionNN(input_size).to(device)

# Loss and optimizer
criterion = RMSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.002, weight_decay=1e-4)  # L2 regularization (weight decay)

# Training loop with Early Stopping
epochs = 500
# patience = 50  # Stop if no improvement for 50 epochs
# best_val_loss = float('inf')
# epochs_no_improve = 0

# Lists to store loss values for plotting
train_losses = []
val_losses = []

for epoch in range(epochs):
    # model.train()
    train_loss = 0.0

    # Training Phase
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)  # Move to GPU

        optimizer.zero_grad()
        predictions = model(batch_X)
        loss = criterion(predictions.reshape(-1), batch_y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    train_loss /= len(train_loader)  # Compute average training loss

    # Validation Phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_X, val_y in val_loader:
            val_X, val_y = val_X.to(device), val_y.to(device)
            val_predictions = model(val_X)
            val_loss += criterion(val_predictions.reshape(-1), val_y).item()

    val_loss /= len(val_loader)  # Compute average validation loss

    # Store losses for plotting
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    # Print loss every 100 epochs tjis is loss of last batch at every 100th epoch
    if epoch % 100 == 0:
        print(f"Loss of last batch: {loss.item():.4f}, at Epoch {epoch}")

    # Early Stopping Check
    # if val_loss < best_val_loss:
    #     best_val_loss = val_loss
    #     epochs_no_improve = 0
    # else:
    #     epochs_no_improve += 1

    # if epochs_no_improve >= patience:
    #     print(f"Early stopping at epoch {epoch}. Best validation loss: {best_val_loss:.4f}")
        # break  # Stop training

print("Training complete!")
print('train losses : ',train_losses)
print('validation losses : ',val_losses)

cpu
Loss of last batch: 39.6585, at Epoch 0
Loss of last batch: 37.7339, at Epoch 100
Loss of last batch: 38.6018, at Epoch 200
Loss of last batch: 39.1955, at Epoch 300
Loss of last batch: 37.7447, at Epoch 400


In [None]:
# Plot Training & Validation Loss
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss (RMSE)")
plt.title("Training & Validation Loss Over Epochs")
plt.legend()
plt.grid()
plt.show()


**Submission**

In [None]:
import numpy as np

A = np.array([[2, 4], [1, 4]])
singular_values = np.linalg.svd(np.dot(A,A.T), compute_uv=False)
print(singular_values)

[36.56239187  0.43760813]


In [None]:
import numpy as np

A = np.array([[2, 4], [1, 4]])
singular_values = np.linalg.svd(A, compute_uv=False)
print(singular_values)

[6.04668437 0.66151956]
