<a href="https://colab.research.google.com/github/lokeshcv/ComputerVisionDeepLearning/blob/main/WNS_Triange_Hackquest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision matplotlib scikit-learn



In [18]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import models, transforms, datasets
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [10]:
class CustomDataset(Dataset):
    def __init__(self, data_folder, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.data_folder = data_folder
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data_folder, self.data.iloc[idx, 1])
        image = Image.open(img_name).convert('RGB')

        if self.transform:
            image = self.transform(image)

        label = torch.tensor(self.data.iloc[idx, 2], dtype=torch.long)
        return image, label


In [None]:
# /content/drive/MyDrive/Data/WNS/test.zip'
# /content/drive/MyDrive/Data/WNS/train.zip

In [8]:
import zipfile

def unzip_file(zip_path, extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# Example usage:
zip_file_path = '/content/drive/MyDrive/Data/WNS/train.zip'
extracted_path = '/content/drive/MyDrive/Data/WNS/train'

unzip_file(zip_file_path, extracted_path)

In [11]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


In [25]:
train_csv_path = '/content/drive/MyDrive/Data/WNS/train/train.csv'
train_df = pd.read_csv(train_csv_path)

# Display the first few rows of the DataFrame to understand its structure
print(train_df.head())

   image_id filename  label
0         1    1.jpg      0
1         2    2.jpg      0
2         3    3.jpg      0
3         4    4.jpg      0
4         5    5.jpg      0


In [30]:
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)
train_data.to_csv("/content/drive/MyDrive/Data/WNS/train/train_data.csv",index=False)
val_data.to_csv("/content/drive/MyDrive/Data/WNS/train/validation.csv",index=False)

In [31]:
train_dataset = CustomDataset(data_folder='/content/drive/MyDrive/Data/WNS/train/images',
                              csv_file="/content/drive/MyDrive/Data/WNS/train/train_data.csv", transform=transform)
val_dataset = CustomDataset(data_folder='/content/drive/MyDrive/Data/WNS/train/images',
                            csv_file="/content/drive/MyDrive/Data/WNS/train/validation.csv", transform=transform)

In [32]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [34]:
class FraudClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super(FraudClassifier, self).__init__()
        resnet = models.resnet18(pretrained=True)
        resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)
        self.resnet = resnet

    def forward(self, x):
        return self.resnet(x)

model = FraudClassifier()
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)




In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = []
        val_labels = []
        for val_images, val_labels_batch in val_loader:
            val_images, val_labels_batch = val_images.to(device), val_labels_batch.to(device)
            val_outputs_batch = model(val_images)
            val_outputs.extend(val_outputs_batch.cpu().numpy())
            val_labels.extend(val_labels_batch.cpu().numpy())

    val_outputs = torch.tensor(val_outputs)
    val_labels = torch.tensor(val_labels)

    # Evaluation Metrics
    accuracy = accuracy_score(val_labels, torch.argmax(val_outputs, dim=1))
    precision = precision_score(val_labels, torch.argmax(val_outputs, dim=1))
    recall = recall_score(val_labels, torch.argmax(val_outputs, dim=1))
    f1 = f1_score(val_labels, torch.argmax(val_outputs, dim=1))

    print(f"Epoch {epoch + 1}/{num_epochs}: "
          f"Loss: {loss.item():.4f}, "
          f"Accuracy: {accuracy:.4f}, "
          f"Precision: {precision:.4f}, "
          f"Recall: {recall:.4f}, "
          f"F1: {f1:.4f}")


  val_outputs = torch.tensor(val_outputs)


Epoch 1/10: Loss: 0.1149, Accuracy: 0.9449, Precision: 0.3333, Recall: 0.0114, F1: 0.0220


In [None]:
# Save the trained model
torch.save(model.state_dict(), '/content/drive/MyDrive/Data/WNS/fraud_classifier.pth')

# Load the saved model
model.load_state_dict(torch.load('/content/drive/MyDrive/Data/WNS/fraud_classifier.pth'))


In [None]:
# Assuming test.csv is in the same directory as your code
test_csv_path = '/content/drive/MyDrive/Data/WNS/test/test.csv'
test_df = pd.read_csv(test_csv_path)

# Display the first few rows of the DataFrame to understand its structure
print(test_df.head())


In [None]:
test_dataset = CustomDataset(data_folder='/content/drive/MyDrive/Data/WNS/test/images',
                             csv_file='/content/drive/MyDrive/Data/WNS/test/test.csv', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
test_outputs = []

with torch.no_grad():
    for test_images, _ in test_loader:
        test_images = test_images.to(device)
        test_outputs_batch = model(test_images)
        test_outputs.extend(test_outputs_batch.cpu().numpy())

test_outputs = torch.tensor(test_outputs)
predictions = torch.argmax(test_outputs, dim=1)

# Save predictions to a submission file
submission_df = pd.DataFrame({'image_id': test_df['image_id'], 'label': predictions})
submission_df.to_csv('submission.csv', index=False)
