# Final Model Submission 2: Deep Learning Ensemble

**Author:** Prajwal Kambale, Team Init to WinIt

## Approach Summary: The "Committee of Experts"
After initial experiments showed that simply combining text and image features into a single model did not yield significant improvements (indicating a feature redundancy problem), a more sophisticated ensembling strategy was adopted. 

This approach, which I call the "Committee of Experts," treats the text and image data as separate domains requiring specialized models:
1.  **The "Text Expert":** A LightGBM model trained on a rich set of engineered and TF-IDF features. It excels at understanding explicit details in the product description.
2.  **The "Image Expert":** A fine-tuned EfficientNet model. This deep learning model was trained in a prior step to specialize in predicting price directly from visual cues.

This notebook documents the final step: training the Text Expert on the full dataset, loading the pre-trained Image Expert, and blending their predictions to generate the final submission file.

In [None]:


import pandas as pd
import numpy as np
import re
import joblib
from tqdm import tqdm
import os


from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
import lightgbm as lgb


import torch
import torch.nn as nn
import timm
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms


tqdm.pandas(desc="Applying Features")

In [None]:


print("--- Building the Standalone Text Expert ---")
df_train = pd.read_feather('dataset/train_processed.feather')


df_train['item_quantity'] = df_train['catalog_content'].str.lower().str.extract(r'(?:pack of|count|pk|set of|pack)\s*:?\s*(\d+)').fillna(1).astype(int)


brand_counts = df_train['brand'].value_counts()
rare_brands = brand_counts[brand_counts < 10].index
df_train['brand'] = df_train['brand'].replace(rare_brands, 'Other')


text_preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', ['item_quantity']),
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['brand']),
        ('tfidf', TfidfVectorizer(max_features=15000, stop_words='english', ngram_range=(1, 2)), 'catalog_content')
    ], 
    remainder='drop'
)


text_model = lgb.LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=31, random_state=42, n_jobs=-1)

print("Training the Text Expert model on the full dataset...")
text_preprocessor.fit(df_train)
X_train_transformed = text_preprocessor.transform(df_train)
text_model.fit(X_train_transformed, df_train['log_price'])
print("Text Expert is trained and ready.")

In [None]:


print("--- Starting Final Submission Generation ---")
device = "cuda" if torch.cuda.is_available() else "cpu"


class FineTuningVisionModel(nn.Module):
    def __init__(self, model_name='efficientnet_b0', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        self.model.reset_classifier(num_classes=1)
    def forward(self, x):
        return self.model(x)

image_model = FineTuningVisionModel()
image_model.load_state_dict(torch.load('best_vision_model.pth'))
image_model.to(device)
image_model.eval()
print("Image Expert loaded successfully.")


df_test = pd.read_csv('dataset/test.csv')
df_test['item_quantity'] = df_test['catalog_content'].str.lower().str.extract(r'(?:pack of|count|pk|set of|pack)\s*:?\s*(\d+)').fillna(1).astype(int)
def extract_brand_fast(text):
    match = re.search(r'^Item Name:\s*(.+?)(?:,|$)', str(text))
    return match.group(1).strip() if match else "Unknown"
df_test['brand'] = df_test['catalog_content'].progress_apply(extract_brand_fast)
print("Test data prepared.")


print("\nGetting predictions from the Text Expert...")
X_test_text_transformed = text_preprocessor.transform(df_test)
log_preds_text = text_model.predict(X_test_text_transformed)
preds_text = np.expm1(log_preds_text)


print("Getting predictions from the Image Expert (this will take a while)...")
class ImagePredictionDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df, self.image_dir, self.transform = df, image_dir, transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = os.path.join(image_dir, f"{row['sample_id']}.jpg")
        try: image = Image.open(image_path).convert('RGB')
        except FileNotFoundError: image = Image.new('RGB', (224, 224), (0, 0, 0))
        if self.transform: image = self.transform(image)
        return image

image_transforms = timm.data.create_transform(**timm.data.resolve_model_data_config(image_model.model), is_training=False)
test_image_dataset = ImagePredictionDataset(df=df_test, image_dir='images/', transform=image_transforms)
test_loader = DataLoader(test_image_dataset, batch_size=64, shuffle=False, num_workers=0)

all_image_preds = []
with torch.no_grad():
    loop = tqdm(test_loader, desc="Image Expert Predicting on Test Set")
    for images in loop:
        images = images.to(device)
        log_preds = image_model(images).squeeze()
        all_image_preds.append(log_preds.cpu().numpy())

log_preds_image = np.concatenate(all_image_preds)
preds_image = np.expm1(log_preds_image)


print("\nBlending predictions and saving submission file...")
blend_weight_text = 0.90
blend_weight_image = 0.10
blended_preds = (preds_text * blend_weight_text) + (preds_image * blend_weight_image)
blended_preds[blended_preds < 0] = 0

submission_df = pd.DataFrame({
    'sample_id': df_test['sample_id'],
    'price': blended_preds
})
submission_df.to_csv('test_result2.csv', index=False)

print("\n--- DL ENSEMBLE SUBMISSION FILE CREATED SUCCESSFULLY! ---")
print("File 'test_result2.csv' is ready for submission.")
display(submission_df.head())