In [64]:
# pip install torch torchvision

In [65]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import models

In [74]:
import torch.nn.functional as F

class NewsCNN(nn.Module):
    def __init__(self, num_classes):
        super(NewsCNN, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        # Adaptive pooling makes output fixed size
        self.adaptive_pool = nn.AdaptiveAvgPool2d((1,1))

        self.fc1 = nn.Linear(128, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = self.adaptive_pool(x)  # Output becomes (batch, 128, 1, 1)
        x = x.view(x.size(0), -1)  # Now (batch, 128)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        return x


In [75]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Preprocessing (From PDF Step 1)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [76]:
train_dataset = torchvision.datasets.ImageFolder(
    root="dataset/train",
    transform=transform,
    is_valid_file=lambda x: x.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))
)

In [77]:
test_dataset = torchvision.datasets.ImageFolder(
    root="dataset/test",
    transform=transform,
    is_valid_file=lambda x: x.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))
)

In [78]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [79]:
print(checkpoint['model_state_dict']['fc2.weight'].shape)


torch.Size([3, 256])


In [81]:
# Load Pretrained CNN (Feature Extraction)
# model = models.resnet50(weights=None)
# model = NewsCNN(num_classes=2)
model = NewsCNN(num_classes=len(train_dataset.classes)).to(device)


In [56]:
# Replace final layer (Softmax Classification Layer - Equation 4 in PDF)
# num_classes = 2  # sports and disaster
# model.fc = nn.Linear(model.fc.in_features, num_classes)

# model = model.to(device)

In [82]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training Loop
epochs = 15

In [83]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Save model
torch.save(model.state_dict(), "news_model.pth")
print("Training complete. Model saved.")

Epoch [1/15], Loss: 0.6940
Epoch [2/15], Loss: 0.6935
Epoch [3/15], Loss: 0.6929
Epoch [4/15], Loss: 0.6924
Epoch [5/15], Loss: 0.6921
Epoch [6/15], Loss: 0.6917
Epoch [7/15], Loss: 0.6915
Epoch [8/15], Loss: 0.6912
Epoch [9/15], Loss: 0.6910
Epoch [10/15], Loss: 0.6907
Epoch [11/15], Loss: 0.6904
Epoch [12/15], Loss: 0.6902
Epoch [13/15], Loss: 0.6898
Epoch [14/15], Loss: 0.6896
Epoch [15/15], Loss: 0.6892
Training complete. Model saved.


In [84]:
print(train_dataset.classes)


['disaster', 'politics']


In [85]:
from collections import Counter
print(Counter(train_dataset.targets))


Counter({0: 8, 1: 8})


In [86]:
torch.save({
    'model_state_dict': model.state_dict(),
    'class_names': train_dataset.classes
}, "news_model.pth")
