In [None]:
import os
import zipfile

# Path to the zip file and the directory to extract
zip_path = '/content/drive/MyDrive/data/train.zip'
extract_path = '/content/train'

# Ensure the zip file exists
if not os.path.exists(zip_path):
    raise FileNotFoundError(f"Zip file '{zip_path}' does not exist. Please check the path.")

# Extract the zip file if the directory does not exist
if not os.path.exists(extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# List the contents of the extracted directory to debug
for root, dirs, files in os.walk(extract_path):
    level = root.replace(extract_path, '').count(os.sep)
    indent = ' ' * 4 * (level)
    print(f'{indent}{os.path.basename(root)}/')
    subindent = ' ' * 4 * (level + 1)
    for f in files:
        print(f'{subindent}{f}')


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                ._non_COVID (11918).png
                ._Normal-3591.png
                ._Normal-1550.png
                ._Normal-9723.png
                ._Normal-6912.png
                ._Normal-1515.png
                ._Normal-4083.png
                ._Normal-9551.png
                ._Normal-5987.png
                ._Normal-1136.png
                ._Normal-409.png
                ._Normal-6541.png
                ._Normal (9739).png
                ._Normal-2713.png
                ._Normal-9283.png
                ._Normal-3477.png
                ._Normal-5188.png
                ._Normal-7750.png
                ._Normal-5456.png
                ._Normal-4022.png
                ._Normal-2348.png
                ._Normal-6900.png
                ._Normal-4317.png
                ._Normal-5810.png
                ._Normal-3937.png
                ._non_COVID (3269).png
                ._07cdb450-541e-4810-90

In [12]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from google.colab import drive
import zipfile
from PIL import Image

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

path = '/content/drive/MyDrive/data/train.zip'
extract_path = '/content/drive/MyDrive/data/train'

# Ensure the zip file exists
if not os.path.exists(path):
    raise FileNotFoundError(f"Zip file '{path}' does not exist. Please check the path.")

# Extract the zip file if the directory does not exist
if not os.path.exists(extract_path):
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# Ensure subdirectories exist
classes = ['COVID', 'Normal', 'Pneumonia']
for cls in classes:
    if not os.path.exists(os.path.join(extract_path, 'train', cls)):
        raise FileNotFoundError(f"Subdirectory '{os.path.join(extract_path, 'train', cls)}' does not exist. Please check the path.")

# Custom Dataset class
class CustomImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load image paths and labels
image_paths = []
labels = []

for idx, cls in enumerate(classes):
    cls_dir = os.path.join(extract_path, 'train', cls)
    cls_paths = [os.path.join(cls_dir, img) for img in os.listdir(cls_dir) if img.endswith('.jpg')]
    image_paths.extend(cls_paths)
    labels.extend([idx] * len(cls_paths))

# Split the data into training and testing sets
train_paths, test_paths, train_labels, test_labels = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Define transforms
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Create datasets and dataloaders
train_dataset = CustomImageDataset(train_paths, train_labels, transform=transform)
test_dataset = CustomImageDataset(test_paths, test_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(128 * 18 * 18, 512)  # Adjust size based on the input image dimensions
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 3)  # Output layer for 3 classes

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 18 * 18)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Initialize the model, loss function, and optimizer
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 25

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

# Evaluating the model
model.eval()
y_pred = []
y_true = []

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        y_pred.extend(preds.tolist())
        y_true.extend(labels.tolist())

accuracy = accuracy_score(y_true, y_pred)
print(f'Test Accuracy: {accuracy}')

# Save the model
torch.save(model.state_dict(), '/content/drive/MyDrive/covid_detector_model_classification.pth')


Mounted at /content/drive
Epoch 1/25, Loss: 13.512894309484041
Epoch 2/25, Loss: 3.400822098438556
Epoch 3/25, Loss: 1.5995298394790063
Epoch 4/25, Loss: 1.8771762962524707
Epoch 5/25, Loss: 0.8753030586701173
Epoch 6/25, Loss: 0.3483987868978427
Epoch 7/25, Loss: 0.28763052133413464
Epoch 8/25, Loss: 0.3652550566654939
Epoch 9/25, Loss: 0.24057227888932595
Epoch 10/25, Loss: 0.16819527086157066
Epoch 11/25, Loss: 0.1448767580665075
Epoch 12/25, Loss: 0.12271648530776684
Epoch 13/25, Loss: 0.09723229448382671
Epoch 14/25, Loss: 0.0919546062270036
Epoch 15/25, Loss: 0.09696607592587288
Epoch 16/25, Loss: 0.08933584188791709
Epoch 17/25, Loss: 0.061149072188597456
Epoch 18/25, Loss: 0.06776239063877326
Epoch 19/25, Loss: 0.053233283500258737
Epoch 20/25, Loss: 0.052291215039216556
Epoch 21/25, Loss: 0.049033572968955226
Epoch 22/25, Loss: 0.06622668040486482
Epoch 23/25, Loss: 0.08196991116095048
Epoch 24/25, Loss: 0.0754284645215823
Epoch 25/25, Loss: 0.12844446419666594
Test Accuracy: 