# Ensemble Model- Final

#  Load Pre-trained Models

## 1. Text Model (mBERT)

In [None]:
# =============================
# LOAD MBERT MODEL
# =============================
print("Loading mBERT model...")
mbert_tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
mbert_model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-multilingual-cased",
    num_labels=2
)

# Load trained weights
mbert_model.load_state_dict(torch.load(MBERT_MODEL_PATH, map_location=device))
mbert_model = mbert_model.to(device)
mbert_model.eval()

print("✓ mBERT model loaded successfully!")

## 2. Visual Model (CLIP)

In [None]:
# =============================
# LOAD CLIP MODEL
# =============================
print("Loading CLIP model...")

# CLIP Classifier class (same as training)
class CLIPClassifier(nn.Module):
    def __init__(self, clip_model, num_classes=2, dropout=0.3):
        super(CLIPClassifier, self).__init__()
        self.clip = clip_model
        hidden_size = self.clip.config.projection_dim
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, num_classes)
        )
    
    def forward(self, pixel_values):
        vision_outputs = self.clip.vision_model(pixel_values=pixel_values)
        image_embeds = vision_outputs.pooler_output
        image_embeds = self.clip.visual_projection(image_embeds)
        logits = self.classifier(image_embeds)
        return logits

# Load CLIP
clip_base = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model = CLIPClassifier(clip_base, num_classes=2)

# Load trained weights
clip_model.load_state_dict(torch.load(CLIP_MODEL_PATH, map_location=device))
clip_model = clip_model.to(device)
clip_model.eval()

print("✓ CLIP model loaded successfully!")

#  Generate Predictions from Base Models

## Helper Functions

In [None]:
# =============================
# PREDICTION FUNCTIONS
# =============================

def get_text_predictions(texts, model, tokenizer, device, batch_size=16):
    """Get probability predictions from text model (mBERT)"""
    model.eval()
    all_probs = []
    
    with torch.no_grad():
        for i in tqdm(range(0, len(texts), batch_size), desc="Text predictions"):
            batch_texts = texts[i:i+batch_size]
            
            # Tokenize
            encoding = tokenizer(
                batch_texts,
                add_special_tokens=True,
                max_length=MAX_LENGTH,
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )
            
            input_ids = encoding['input_ids'].to(device)
            attention_mask = encoding['attention_mask'].to(device)
            
            # Get predictions
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            probs = torch.softmax(outputs.logits, dim=1)
            all_probs.append(probs.cpu().numpy())
    
    return np.vstack(all_probs)


def get_image_predictions(image_names, img_dir, model, processor, device, batch_size=16):
    """Get probability predictions from visual model (CLIP)"""
    model.eval()
    all_probs = []
    
    with torch.no_grad():
        for i in tqdm(range(0, len(image_names), batch_size), desc="Image predictions"):
            batch_names = image_names[i:i+batch_size]
            batch_images = []
            
            # Load images
            for img_name in batch_names:
                img_path = os.path.join(img_dir, img_name)
                try:
                    img = Image.open(img_path).convert('RGB')
                except:
                    img = Image.new('RGB', (224, 224), (128, 128, 128))
                batch_images.append(img)
            
            # Process images
            inputs = processor(images=batch_images, return_tensors="pt")
            pixel_values = inputs['pixel_values'].to(device)
            
            # Get predictions
            outputs = model(pixel_values)
            probs = torch.softmax(outputs, dim=1)
            all_probs.append(probs.cpu().numpy())
    
    return np.vstack(all_probs)

print("Prediction functions defined!")

## Generate Validation Predictions

In [None]:
# =============================
# GENERATE VALIDATION PREDICTIONS
# =============================
print("Generating predictions on validation set...\n")

# Text predictions
val_texts = val_split['Processed_Text'].fillna('').tolist()
text_proba_val = get_text_predictions(val_texts, mbert_model, mbert_tokenizer, device)

# Image predictions
val_images = val_split['Image_name'].tolist()
image_proba_val = get_image_predictions(val_images, TRAIN_IMG_DIR, clip_model, clip_processor, device)

# True labels
y_val = (val_split['Label'] == 'Political').astype(int).values

print(f"\nValidation predictions generated!")
print(f"Text probabilities shape: {text_proba_val.shape}")
print(f"Image probabilities shape: {image_proba_val.shape}")
print(f"Labels shape: {y_val.shape}")

## Analyze Base Model Performance

In [None]:
# =============================
# BASE MODEL PERFORMANCE
# =============================
text_preds_val = np.argmax(text_proba_val, axis=1)
image_preds_val = np.argmax(image_proba_val, axis=1)

text_acc = accuracy_score(y_val, text_preds_val)
text_f1 = f1_score(y_val, text_preds_val, average='macro')

image_acc = accuracy_score(y_val, image_preds_val)
image_f1 = f1_score(y_val, image_preds_val, average='macro')

print("=" * 60)
print("BASE MODEL PERFORMANCE ON VALIDATION SET")
print("=" * 60)
print(f"\nText Model (mBERT):")
print(f"  Accuracy: {text_acc:.4f}")
print(f"  F1 Score: {text_f1:.4f}")

print(f"\nImage Model (CLIP):")
print(f"  Accuracy: {image_acc:.4f}")
print(f"  F1 Score: {image_f1:.4f}")

print(f"\n{'='*60}")

#  Engineer Meta-Features for Stacking

In [None]:
# =============================
# FEATURE ENGINEERING FOR META-MODEL
# =============================

def create_stacking_features(text_proba, image_proba):
    """
    Create engineered features for meta-model
    
    Features:
    1-2: Raw text probabilities [NonPol, Pol]
    3-4: Raw image probabilities [NonPol, Pol]
    5: Text confidence (max probability)
    6: Image confidence (max probability)
    7: Models agreement (binary: do they predict same class?)
    8: Disagreement magnitude (absolute difference in Political prob)
    9: Both predict Political (product of Political probs)
    10: Both predict NonPolitical (product of NonPolitical probs)
    11: Average Political probability
    12: Max Political probability
    13: Min Political probability
    14: Difference (Text Political - Image Political)
    """
    features = np.column_stack([
        # Base probabilities (4 features)
        text_proba[:, 0],              # Text: prob NonPolitical
        text_proba[:, 1],              # Text: prob Political
        image_proba[:, 0],             # Image: prob NonPolitical
        image_proba[:, 1],             # Image: prob Political
        
        # Confidence features (2 features)
        np.max(text_proba, axis=1),    # Text confidence
        np.max(image_proba, axis=1),   # Image confidence
        
        # Agreement features (2 features)
        (np.argmax(text_proba, axis=1) == np.argmax(image_proba, axis=1)).astype(float),  # Agreement
        np.abs(text_proba[:, 1] - image_proba[:, 1]),  # Disagreement magnitude
        
        # Interaction features (2 features)
        text_proba[:, 1] * image_proba[:, 1],  # Both say Political
        text_proba[:, 0] * image_proba[:, 0],  # Both say NonPolitical
        
        # Statistical features (4 features)
        (text_proba[:, 1] + image_proba[:, 1]) / 2,  # Average Political prob
        np.maximum(text_proba[:, 1], image_proba[:, 1]),  # Max Political prob
        np.minimum(text_proba[:, 1], image_proba[:, 1]),  # Min Political prob
        text_proba[:, 1] - image_proba[:, 1],  # Difference (Text - Image)
    ])
    
    return features

# Create validation features
X_val_stacking = create_stacking_features(text_proba_val, image_proba_val)

print(f"Stacking features shape: {X_val_stacking.shape}")
print(f"Total features: {X_val_stacking.shape[1]}")
print(f"\nFeature breakdown:")
print(f"  - Base probabilities: 4")
print(f"  - Confidence features: 2")
print(f"  - Agreement features: 2")
print(f"  - Interaction features: 2")
print(f"  - Statistical features: 4")
print(f"  - Total: 14 features")

# Train Meta-Model (Logistic Regression)

In [None]:
# =============================
# TRAIN META-MODEL WITH GRID SEARCH
# =============================
print("Training Logistic Regression meta-model with Grid Search...\n")

# Define parameter grid
param_grid = {
    'C': [0.01, 0.1, 1.0, 10.0, 100.0],  # Regularization strength
    'class_weight': ['balanced', None],
    'solver': ['lbfgs', 'liblinear']
}

# Create base meta-model
meta_model_base = LogisticRegression(max_iter=1000, random_state=42)

# Grid search with cross-validation
grid_search = GridSearchCV(
    meta_model_base,
    param_grid,
    cv=5,
    scoring='f1_macro',
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_val_stacking, y_val)

# Best model
meta_model = grid_search.best_estimator_

print(f"\n{'='*60}")
print("GRID SEARCH RESULTS")
print(f"{'='*60}")
print(f"\nBest parameters: {grid_search.best_params_}")
print(f"Best CV F1 Score: {grid_search.best_score_:.4f}")
print(f"\n{'='*60}")

## Analyze Meta-Model Performance

In [None]:
# =============================
# META-MODEL PERFORMANCE
# =============================
ensemble_preds_val = meta_model.predict(X_val_stacking)
ensemble_proba_val = meta_model.predict_proba(X_val_stacking)

ensemble_acc = accuracy_score(y_val, ensemble_preds_val)
ensemble_f1 = f1_score(y_val, ensemble_preds_val, average='macro')

print("=" * 60)
print("ENSEMBLE MODEL PERFORMANCE")
print("=" * 60)
print(f"\nEnsemble (Stacking):")
print(f"  Accuracy: {ensemble_acc:.4f}")
print(f"  F1 Score: {ensemble_f1:.4f}")

print(f"\n{'='*60}")
print("PERFORMANCE COMPARISON")
print(f"{'='*60}")
print(f"\nText Model:     Acc={text_acc:.4f}, F1={text_f1:.4f}")
print(f"Image Model:    Acc={image_acc:.4f}, F1={image_f1:.4f}")
print(f"Ensemble:       Acc={ensemble_acc:.4f}, F1={ensemble_f1:.4f}")

print(f"\n{'='*60}")
print("IMPROVEMENTS")
print(f"{'='*60}")
print(f"Accuracy improvement over Text:  {(ensemble_acc - text_acc)*100:+.2f}%")
print(f"Accuracy improvement over Image: {(ensemble_acc - image_acc)*100:+.2f}%")
print(f"F1 improvement over Text:        {(ensemble_f1 - text_f1)*100:+.2f}%")
print(f"F1 improvement over Image:       {(ensemble_f1 - image_f1)*100:+.2f}%")
print(f"\n{'='*60}")