In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import DataLoader, Dataset
import numpy as np
import os
from glob import glob

#Loading the files
class CloudDataset(Dataset):
    def __init__(self, data_dir):
        self.data_files = glob(os.path.join(data_dir, '*.npy'))

    def __len__(self):
        return len(self.data_files)

    def __getitem__(self, idx):
        img = np.load(self.data_files[idx])

        #Checks if image is of proper size
        if img.shape != (128, 128):
            return None

        #Replace values greater than 10 with 0
        img[img > 10] = 0

        #Find the dominant cloud type (ignoring 0)
        filtered_img = img[img != 0]
        label = np.bincount(filtered_img.flatten()).argmax() if len(filtered_img) > 0 else 0

        # Normalize and add channel dimension
        img = img / 10.0
        img = torch.tensor(img, dtype=torch.float32).unsqueeze(0)  # Shape [1, 128, 128]
        
        return img, label



# Initialize dataset and dataloaders
data_dir = './data/raw_data/cloudcast/CloudCastSmall/TrainCloud'
dataset = CloudDataset(data_dir)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


In [2]:
import torch.nn as nn
import torch
from torch.utils.data import DataLoader

class CloudClassifierCNN(nn.Module):
    def __init__(self):
        super(CloudClassifierCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 11)  # 11 classes for cloud types 0–10

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 32 * 32)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Assuming `CloudDataset` is your dataset class
train_data_dir = './data/raw_data/cloudcast/CloudCastSmall/TrainCloud'
train_dataset = CloudDataset(train_data_dir)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)


NameError: name 'collate_fn' is not defined

In [4]:
import torch.optim as optim

model = CloudClassifierCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [6]:
import torch
from tqdm import tqdm

import torch
from torch.utils.data import DataLoader, default_collate  # Import default_collate
from tqdm import tqdm

def collate_fn(batch):
    # Filter out any None values from the batch
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:
        return None  # Skip this batch by returning None
    return default_collate(batch)  # Use default_collate for valid batches

def train_model(model, dataloader, criterion, optimizer, num_epochs=3, save_path="best_model.pth"):
    best_loss = float("inf")  # Initialize best loss as infinity
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        # Wrap the dataloader with tqdm to show a progress bar
        for batch in tqdm(dataloader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False):
            if batch is None:  # Skip None batches
                continue
            
            inputs, labels = batch  # Unpack inputs and labels
            optimizer.zero_grad()  # Reset gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Calculate loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights
            running_loss += loss.item()

        # Calculate average loss for the epoch
        avg_loss = running_loss / len(dataloader)
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}')

        # Check if this is the best model so far, and save it
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), save_path)  # Save the model
            print(f"New best model saved at epoch {epoch+1} with loss {best_loss:.4f}")


# Train the model and save the best one
train_model(model, train_loader, criterion, optimizer, num_epochs=3, save_path="best_model.pth")

# Load the test data
test_data_dir = './data/raw_data/cloudcast/CloudCastSmall/TestCloud'
test_dataset = CloudDataset(test_data_dir)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Load the best model
best_model = CloudClassifierCNN()  # Ensure this matches the architecture of the model you saved
best_model.load_state_dict(torch.load("best_model.pth"))

# Evaluation function
def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    # Use tqdm to wrap the evaluation dataloader
    with torch.no_grad():  # Disable gradient computation for evaluation
        for inputs, labels in tqdm(dataloader, desc='Evaluating', leave=False):
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get predicted class
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy

# Evaluate the model on the test data
print("Evaluating on Test Data:")
test_accuracy = evaluate_model(best_model, test_loader)


                                                                                

KeyboardInterrupt: 

In [125]:
from tqdm import tqdm

def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    # Use tqdm to wrap the evaluation dataloader
    with torch.no_grad():  # Disable gradient computation for evaluation
        for inputs, labels in tqdm(dataloader, desc='Evaluating', leave=False):
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get predicted class
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy


In [48]:
best_model.load_state_dict(torch.load("best_model.pth", weights_only=True))

<All keys matched successfully>

In [124]:

#Loads the best model
best_model = CloudClassifierCNN()  # Ensure this matches the architecture of the model you saved
best_model.load_state_dict(torch.load("best_model.pth"))

#Evaluates the best model with progress bar
evaluate_model(best_model, train_loader)

  best_model.load_state_dict(torch.load("best_model.pth"))


Accuracy: 95.29532967032966%


In [58]:
#Refers to the test dataset
test_data_dir = './data/raw_data/cloudcast/CloudCastSmall/TestCloud'
test_dataset = CloudDataset(test_data_dir)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)


In [60]:
#Loads the best model
best_model = CloudClassifierCNN()
best_model.load_state_dict(torch.load("best_model.pth"))

#Evalutes best midel
print("Evaluating on Test Data:")
test_accuracy = evaluate_model(best_model, test_loader)


  best_model.load_state_dict(torch.load("best_model.pth"))


Evaluating on Test Data:


                                                                                

Accuracy: 57.14%


