In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [11]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 25)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [12]:
class MalimgDataset(Dataset):
    def __init__(self, root_dir):
        self.data = []
        self.labels = []
        self.encoder = LabelEncoder()

        classes = os.listdir(root_dir)
        all_labels = []
        for label in classes:
            folder = os.path.join(root_dir, label)
            for file in os.listdir(folder):
                img_path = os.path.join(folder, file)
                img = Image.open(img_path).convert('L').resize((64, 64))
                self.data.append(np.array(img))
                all_labels.append(label)

        self.labels = self.encoder.fit_transform(all_labels)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = torch.tensor(self.data[idx], dtype=torch.float32).unsqueeze(0) / 255.0
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return image, label

In [13]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle (2).json


{'kaggle (2).json': b'{"username":"maryamalikhasi","key":"cf7eff181666333f412dd804331c7c4f"}'}

In [14]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [15]:
import kagglehub
path = kagglehub.dataset_download("manmandes/malimg")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/malimg


In [17]:
batch_size = 32
learning_rate = 0.001

path = os.path.join(path, 'malimg_dataset', 'train')
dataset = MalimgDataset(path)
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)

model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [18]:
for epoch in range(15):
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}")

Epoch 1, Loss: 150.1159
Epoch 2, Loss: 28.3028
Epoch 3, Loss: 18.7760
Epoch 4, Loss: 14.6986
Epoch 5, Loss: 13.5837
Epoch 6, Loss: 12.3926
Epoch 7, Loss: 11.4317
Epoch 8, Loss: 10.0829
Epoch 9, Loss: 9.6953
Epoch 10, Loss: 11.1084
Epoch 11, Loss: 8.8447
Epoch 12, Loss: 9.8680
Epoch 13, Loss: 7.5294
Epoch 14, Loss: 8.0117
Epoch 15, Loss: 8.1843


In [19]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 97.18%
