In [5]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder

# Load the data
data = pd.read_csv('data2.csv')  # Path to your CSV file

# Define image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load multilingual BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

def load_image(image_id, image_folder='images'):
    image_path = os.path.join(image_folder, f"{image_id}.png")  # Adjust extension if needed
    if not os.path.exists(image_path):
        print(f"FileNotFoundError: Image {image_path} not found.")
        raise FileNotFoundError(f"Image {image_path} not found.")
    image = Image.open(image_path).convert("RGB")
    return transform(image)

# Initialize lists to store images, questions, and labels
images = []
questions = []
labels = []

# Create label mapping
unique_labels = data['answer'].unique()
label_map = {label: idx for idx, label in enumerate(unique_labels)}
label_encoder = LabelEncoder()
label_encoder.fit(data['answer'])

# Load images, questions, and labels
for idx, row in data.iterrows():
    try:
        # Load and transform the image
        image_tensor = load_image(row['image_id'], image_folder='images')
        images.append(image_tensor)
        
        # Concatenate questions
        combined_question = " ".join([row['englishquestion'], row['hausaquestion'], row['englishhausaquestion']])
        questions.append(combined_question)
        
        labels.append(label_map[row['answer']])
    except FileNotFoundError as e:
        print(e)
        continue

# Tokenize questions
question_tokens = tokenizer(questions, padding=True, truncation=True, return_tensors="pt")
print("Questions tokenized shape:", question_tokens['input_ids'].shape)

# Convert labels to a tensor
label_tensor = torch.tensor(labels)
print("Labels tensor shape:", label_tensor.shape)

# Stack images into a single tensor
image_stack = torch.stack(images)
print("Images stacked shape:", image_stack.shape)

print("Data preparation successful.")


Questions tokenized shape: torch.Size([500, 75])
Labels tensor shape: torch.Size([500])
Images stacked shape: torch.Size([500, 3, 224, 224])
Data preparation successful.


In [6]:
class CustomDataset(Dataset):
    def __init__(self, question_tokens, images, labels):
        self.input_ids = question_tokens['input_ids']
        self.attention_mask = question_tokens['attention_mask']
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_mask[idx], self.images[idx], self.labels[idx]

# Create dataset
dataset = CustomDataset(question_tokens, image_stack, label_tensor)

# Split dataset into train and eval sets
train_size = int(0.8 * len(dataset))
eval_size = len(dataset) - train_size
train_dataset, eval_dataset = random_split(dataset, [train_size, eval_size])

# Create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
eval_dataloader = DataLoader(eval_dataset, batch_size=8, shuffle=False)


In [8]:
import torch.nn as nn
import torch.nn.functional as F

class BertCNNModel(nn.Module):
    def __init__(self):
        super(BertCNNModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 112 * 112 + 768, 512)
        self.fc2 = nn.Linear(512, len(label_map))

    def forward(self, input_ids, attention_mask, images):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        bert_pooled_output = bert_outputs.pooler_output
        x = self.pool(F.relu(self.conv1(images)))
        x = x.view(x.size(0), -1)
        combined = torch.cat((x, bert_pooled_output), dim=1)
        x = F.relu(self.fc1(combined))
        x = self.fc2(x)
        return x

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BertCNNModel().to(device)


KeyboardInterrupt: 

In [None]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from sklearn.metrics import accuracy_score

# Initialize loss function and optimizer
loss_fn = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)

# Training loop
for epoch in range(1):  # Adjust number of epochs as needed
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        input_ids, attention_mask, images, labels = [x.to(device) for x in batch]
        
        # Forward pass
        outputs = model(input_ids, attention_mask, images)
        
        # Compute loss
        loss = loss_fn(outputs, labels)
        total_loss += loss.item()
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch+1}, Loss: {avg_loss}")

    # Evaluation
    model.eval()
    eval_preds = []
    eval_labels = []
    with torch.no_grad():
        for batch in eval_dataloader:
            input_ids, attention_mask, images, labels = [x.to(device) for x in batch]
            outputs = model(input_ids, attention_mask, images)
            _, preds = torch.max(outputs, dim=1)
            eval_preds.extend(preds.cpu().numpy())
            eval_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(eval_labels, eval_preds)
    print(f"Epoch {epoch+1}, Evaluation Accuracy: {accuracy}")


In [None]:
def predict(question1, question2, question3, image):
    model.eval()
    with torch.no_grad():
        # Combine questions
        combined_question = " ".join([question1, question2, question3])
        
        # Preprocess input
        question_tokens = tokenizer(combined_question, padding=True, truncation=True, return_tensors="pt").to(device)
        image_tensor = transform(image).unsqueeze(0).to(device)
        
        # Make prediction
        outputs = model(question_tokens['input_ids'], question_tokens['attention_mask'], image_tensor)
        _, preds = torch.max(outputs, dim=1)
        prediction = label_encoder.inverse_transform(preds.cpu().numpy())
        
    return prediction[0]

# Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter the first question here..."),
        gr.Textbox(lines=2, placeholder="Enter the second question here..."),
        gr.Textbox(lines=2, placeholder="Enter the third question here..."),
        gr.Image(type="pil")
    ],
    outputs="text"
)

iface.launch(share=True)


In [None]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Define image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load multilingual BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

def load_image(image_id, image_folder='images'):
    image_path = os.path.join(image_folder, f"{image_id}.png")  # Adjust extension if needed
    if not os.path.exists(image_path):
        print(f"FileNotFoundError: Image {image_path} not found.")
        raise FileNotFoundError(f"Image {image_path} not found.")
    image = Image.open(image_path).convert("RGB")
    return transform(image)

class CustomDataset(Dataset):
    def __init__(self, question_tokens, images, labels):
        self.input_ids = question_tokens['input_ids']
        self.attention_mask = question_tokens['attention_mask']
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_mask[idx], self.images[idx], self.labels[idx]

class BertCNNModel(nn.Module):
    def __init__(self):
        super(BertCNNModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 112 * 112 + 768, 512)
        self.fc2 = nn.Linear(512, len(label_map))

    def forward(self, input_ids, attention_mask, images):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        bert_pooled_output = bert_outputs.pooler_output
        x = self.pool(F.relu(self.conv1(images)))
        x = x.view(x.size(0), -1)
        combined = torch.cat((x, bert_pooled_output), dim=1)
        x = F.relu(self.fc1(combined))
        x = self.fc2(x)
        return x

# Load the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BertCNNModel().to(device)

# Load the test data
test_data = pd.read_csv('testdata.csv')  # Path to your test CSV file

# Initialize lists to store images, questions, and labels for the test set
test_images = []
test_questions = []
test_labels = []

# Load test images, questions, and labels
for idx, row in test_data.iterrows():
    try:
        # Load and transform the image
        image_tensor = load_image(row['image_id'], image_folder='test_images')
        test_images.append(image_tensor)
        
        # Concatenate questions
        combined_question = " ".join([row['englishquestion'], row['hausaquestion'], row['englishhausaquestion']])
        test_questions.append(combined_question)
        
        test_labels.append(label_map[row['answer']])
    except FileNotFoundError as e:
        print(e)
        continue

# Tokenize test questions
test_question_tokens = tokenizer(test_questions, padding=True, truncation=True, return_tensors="pt")
print("Test questions tokenized shape:", test_question_tokens['input_ids'].shape)

# Convert test labels to a tensor
test_label_tensor = torch.tensor(test_labels)
print("Test labels tensor shape:", test_label_tensor.shape)

# Stack test images into a single tensor
test_image_stack = torch.stack(test_images)
print("Test images stacked shape:", test_image_stack.shape)

print("Test data preparation successful.")

# Create test dataset and dataloader
test_dataset = CustomDataset(test_question_tokens, test_image_stack, test_label_tensor)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    eval_preds = []
    eval_labels = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids, attention_mask, images, labels = [x.to(device) for x in batch]
            outputs = model(input_ids, attention_mask, images)
            _, preds = torch.max(outputs, dim=1)
            eval_preds.extend(preds.cpu().numpy())
            eval_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(eval_labels, eval_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(eval_labels, eval_preds, average='weighted')
    
    print(f"Test Accuracy: {accuracy}")
    print(f"Test Precision: {precision}")
    print(f"Test Recall: {recall}")
    print(f"Test F1 Score: {f1}")

# Run evaluation
evaluate_model(model, test_dataloader)


In [None]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Define image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load multilingual BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

def load_image(image_id, image_folder='images'):
    image_path = os.path.join(image_folder, f"{image_id}.png")  # Adjust extension if needed
    if not os.path.exists(image_path):
        print(f"FileNotFoundError: Image {image_path} not found.")
        raise FileNotFoundError(f"Image {image_path} not found.")
    image = Image.open(image_path).convert("RGB")
    return transform(image)

class CustomDataset(Dataset):
    def __init__(self, question_tokens, images, labels):
        self.input_ids = question_tokens['input_ids']
        self.attention_mask = question_tokens['attention_mask']
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_mask[idx], self.images[idx], self.labels[idx]

class BertCNNModel(nn.Module):
    def __init__(self):
        super(BertCNNModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 112 * 112 + 768, 512)
        self.fc2 = nn.Linear(512, len(label_map))

    def forward(self, input_ids, attention_mask, images):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        bert_pooled_output = bert_outputs.pooler_output
        x = self.pool(F.relu(self.conv1(images)))
        x = x.view(x.size(0), -1)
        combined = torch.cat((x, bert_pooled_output), dim=1)
        x = F.relu(self.fc1(combined))
        x = self.fc2(x)
        return x

# Load the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BertCNNModel().to(device)

# Load the test data
test_data = pd.read_csv('/mnt/data/test_data.csv')  # Path to your test CSV file

# Initialize lists to store images, questions, and labels for the test set
test_images = []
test_questions = []
test_labels = []

# Load test images, questions, and labels
for idx, row in test_data.iterrows():
    try:
        # Load and transform the image
        image_tensor = load_image(row['image_id'], image_folder='test_images')
        test_images.append(image_tensor)
        
        # Concatenate questions
        combined_question = " ".join([row['englishquestion'], row['hausaquestion'], row['englishhausaquestion']])
        test_questions.append(combined_question)
        
        test_labels.append(label_map[row['answer']])
    except FileNotFoundError as e:
        print(e)
        continue

# Tokenize test questions
test_question_tokens = tokenizer(test_questions, padding=True, truncation=True, return_tensors="pt")
print("Test questions tokenized shape:", test_question_tokens['input_ids'].shape)

# Convert test labels to a tensor
test_label_tensor = torch.tensor(test_labels)
print("Test labels tensor shape:", test_label_tensor.shape)

# Stack test images into a single tensor
test_image_stack = torch.stack(test_images)
print("Test images stacked shape:", test_image_stack.shape)

print("Test data preparation successful.")

# Create test dataset and dataloader
test_dataset = CustomDataset(test_question_tokens, test_image_stack, test_label_tensor)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    eval_preds = []
    eval_labels = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids, attention_mask, images, labels = [x.to(device) for x in batch]
            outputs = model(input_ids, attention_mask, images)
            _, preds = torch.max(outputs, dim=1)
            eval_preds.extend(preds.cpu().numpy())
            eval_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(eval_labels, eval_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(eval_labels, eval_preds, average='weighted')
    
    print(f"Test Accuracy: {accuracy}")
    print(f"Test Precision: {precision}")
    print(f"Test Recall: {recall}")
    print(f"Test F1 Score: {f1}")

    # Calculate confusion matrix
    cm = confusion_matrix(eval_labels, eval_preds)
    print("Confusion Matrix:\n", cm)
    
    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_map.keys(), yticklabels=label_map.keys())
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()

# Run evaluation
evaluate_model(model, test_dataloader)
