Downloading Images and Data Files

In [None]:
!pip install kaggle
!echo '{"username":"malvikapatelll","key":"006cf0f3c6830fca23fe7e0a63cd0f4c"}' > kaggle.json
!mkdir -p /root/.kaggle
!mv kaggle.json /root/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
!kaggle competitions download -c cs-480-2024-spring
!unzip *.zip

Imports

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import os
from PIL import Image
from scipy.stats import randint
from scipy.stats import zscore
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from tqdm import tqdm
from transformers import ViTModel
from xgboost import XGBRegressor

Preprocessing

In [3]:
# Loading data from the csv files
train_df = pd.read_csv('/content/data/train.csv')
test_df = pd.read_csv('/content/data/test.csv')

# Aligning columns of both the training dataset and the test dataset
common_columns = train_df.columns[1:164].intersection(test_df.columns[1:164])

# Separating the features and targets
X_ancillary = train_df[common_columns].values
y = train_df.iloc[:, 164:].values

# Removing outliers
z_scores = np.abs(zscore(X_ancillary))
threshold = 3
mask = (z_scores < threshold).all(axis=1)

X_ancillary_filtered = X_ancillary[mask]
y_filtered = y[mask]
train_sample_filtered = train_df[mask].reset_index(drop=True)

# Scaling to normalize data between 0 and 1
# Scaling training set
scaler = StandardScaler()
X_ancillary_filtered = scaler.fit_transform(X_ancillary_filtered)
X_test_ancillary = scaler.transform(test_df[common_columns].values)

# Scaling test set
target_scaler = StandardScaler()
y_scaled_filtered = target_scaler.fit_transform(y_filtered)

# Downsampling
train_sample = train_sample_filtered.sample(frac=0.3, random_state=42)
X_train_anc = X_ancillary_filtered[train_sample.index]
y_train = y_scaled_filtered[train_sample.index]
image_ids_train = train_sample_filtered['id'].values[train_sample.index]

# Train-validation split
X_train_img, X_val_img, X_train_anc, X_val_anc, y_train, y_val = train_test_split(
    image_ids_train, X_train_anc, y_train, test_size=0.2, random_state=42
)

In [None]:
# Custom dataset class which inherits from PyTorch's Dataset class
class CustomDataset(Dataset):
    def __init__(self, image_ids, ancillary_features, targets=None, img_dir=None, transform=None):
        self.image_ids = image_ids
        self.ancillary_features = ancillary_features
        self.targets = targets
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_path = os.path.join(self.img_dir, f"{img_id}.jpeg")
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        ancillary = torch.tensor(self.ancillary_features[idx], dtype=torch.float32)

        if self.targets is not None:
            target = torch.tensor(self.targets[idx], dtype=torch.float32)
            return img, ancillary, target
        else:
            return img, ancillary

# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Image directories
train_img_dir = '/content/data/train_images'
test_img_dir = '/content/data/test_images'

# Datasets
train_dataset = CustomDataset(
    image_ids=X_train_img,
    ancillary_features=X_train_anc,
    targets=y_train,
    img_dir=train_img_dir,
    transform=transform
)

val_dataset = CustomDataset(
    image_ids=X_val_img,
    ancillary_features=X_val_anc,
    targets=y_val,
    img_dir=train_img_dir,
    transform=transform
)

test_dataset = CustomDataset(
    image_ids=test_df['id'].values,
    ancillary_features=X_test_ancillary,
    targets=None,
    img_dir=test_img_dir,
    transform=transform
)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=8, prefetch_factor=2)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=8, prefetch_factor=2)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=8, prefetch_factor=2)

In [None]:
# Vision Transformer model
class ViTForRegression(nn.Module):
    def __init__(self, img_feature_dim, anc_feature_dim, hidden_dim, num_classes):
        super(ViTForRegression, self).__init__()
        # Pre-trained Vision Transformer model from Hugging Face
        self.vit = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
        self.img_embedding = nn.Linear(self.vit.config.hidden_size, hidden_dim)
        self.anc_embedding = nn.Linear(anc_feature_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, img, anc_features):
        # Forward pass
        outputs = self.vit(pixel_values=img)
        vit_outputs = outputs.last_hidden_state
        img_emb = self.img_embedding(vit_outputs.mean(dim=1))

        # Process ancillary features
        anc_emb = self.anc_embedding(anc_features)
        combined = torch.cat((img_emb, anc_emb), dim=1)
        return self.fc(combined)

# Initializing model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hidden_dim = 256
num_classes = y_train.shape[1]

model = ViTForRegression(768, X_train_anc.shape[1], hidden_dim, num_classes).to(device)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

# Gradscaler for Mixed Precision Training
gradscaler = GradScaler()

def train_model(model, dataloader, criterion, optimizer, device, gradscaler, accumulation_steps=2):
    model.train()
    running_loss = 0.0
    optimizer.zero_grad()

    for step, (images, anc_features, targets) in enumerate(tqdm(dataloader, desc="Training")):
        images, anc_features, targets = images.to(device), anc_features.to(device), targets.to(device)

        with autocast():
            outputs = model(images, anc_features)
            loss = criterion(outputs, targets)

        gradscaler.scale(loss).backward()

        if (step + 1) % accumulation_steps == 0:
            gradscaler.step(optimizer)
            gradscaler.update()
            optimizer.zero_grad()

        running_loss += loss.item()

    return running_loss / len(dataloader)

# Training model
num_train_epochs = 10
for epoch in range(num_train_epochs):
    train_loss = train_model(model, train_loader, criterion, optimizer, device, gradscaler, accumulation_steps=4)
    print(f"Epoch {epoch+1}/{num_train_epochs}, Loss: {train_loss}")


In [None]:
# Predicting  on test set
def predict_model(model, dataloader, device):
    model.eval()
    predictions = []
    with torch.no_grad():
        for images, anc_features in tqdm(dataloader, desc="Predicting"):
            images, anc_features = images.to(device), anc_features.to(device)
            outputs = model(images, anc_features)
            predictions.extend(outputs.cpu().numpy())
    return np.array(predictions)

# RandomForestRegressor and XGBRegressor
rf_regressor = RandomForestRegressor(
    n_estimators=100,
    max_depth=15,
    max_features='sqrt',
    n_jobs=-1,
    random_state=42
)

xgb_regressor = XGBRegressor(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    tree_method='hist',
    n_jobs=-1,
    random_state=42,
    early_stopping_rounds=10,
)

# Training the regression models
rf_regressor.fit(X_train_anc, y_train)
xgb_regressor.fit(X_train_anc, y_train, eval_set=[(X_val_anc, y_val)], verbose=False)

# RandomForestRegressor Randomized Search
rf_param_dist = {
    'n_estimators': randint(100, 200),
    'max_depth': randint(10, 20),
    'max_features': ['auto', 'sqrt'],
    'min_samples_split': randint(2, 5),
    'min_samples_leaf': randint(1, 2),
    'bootstrap': [True, False]
}

rf_random_search = RandomizedSearchCV(
    rf_regressor, param_distributions=rf_param_dist, n_iter=10, cv=2, random_state=42, n_jobs=-1, scoring='r2'
)
rf_random_search.fit(X_train_anc, y_train)

# XGBRegressor Randomized Search
xgb_param_dist = {
    'n_estimators': randint(100, 200),
    'max_depth': randint(4, 10),
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
}

xgb_regressor = XGBRegressor(objective='reg:squarederror', n_jobs=-1)

xgb_random_search = RandomizedSearchCV(
    xgb_regressor, param_distributions=xgb_param_dist, n_iter=10, cv=3, random_state=42, n_jobs=-1, scoring='r2'
)
xgb_random_search.fit(X_train_anc, y_train, eval_set=[(X_val_anc, y_val)])

# Getting the best models
best_rf = rf_random_search.best_estimator_
best_xgb = xgb_random_search.best_estimator_

# Re-training the best models
best_rf.fit(X_train_anc, y_train)
best_xgb.fit(X_train_anc, y_train, eval_set=[(X_val_anc, y_val)], verbose=False)

# Combining predictions
cnn_predictions_scaled = predict_model(model, test_loader, device)
rf_predictions = best_rf.predict(X_test_ancillary)
xgb_predictions = best_xgb.predict(X_test_ancillary)

# Getting final predictions based on weightage
y_test_pred = (0.5 * cnn_predictions_scaled + 0.25 * rf_predictions + 0.25 * xgb_predictions)
y_test_pred_unscaled = target_scaler.inverse_transform(y_test_pred)

In [None]:
# Creating submission file
submission_df = pd.DataFrame(y_test_pred_unscaled, columns=['X4', 'X11', 'X18', 'X26', 'X50', 'X3112'])
submission_df['id'] = test_df['id']
submission_df = submission_df[['id', 'X4', 'X11', 'X18', 'X26', 'X50', 'X3112']]
submission_df.to_csv('CS480ProjectSubmission.csv', index=False)