# Imports

In [132]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F


# Data Loading

In [133]:
# from torch.utils.data import Dataset

# class imageDataset(Dataset):
#     #Subclass datasets in order to create a custom one that i dont have to load everything in till im ready
    
#     #All neeccesary methods to implement: Inits it with data
#     def __init__(self, folder_path):
#         self.data = #Stuff from folder path

#     def __len__(self):
#         return len(self.data)
    
#     def __getitem__(self, idx):
#         #Gets the item at idx in form (data, target label) or (X, Y)

In [134]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()]
)

data_dir = "data/cloud_data" 
dataset = datasets.ImageFolder(root=data_dir, transform=transform) #Note. Add a transforms=transforms

train_data, test_data, val_data = random_split(dataset, [0.75, .15, .10])

batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True) #prolly dont have to shuffle these 2 but might as well

human_labels = ["Altocumulus", "Altostratus", "Cumulonimbus","Cirrocumulus", "Cirrus", "Cirrostratus", "Contrail", "Cumulus",
                 "Nimbus", "Stratocumulus", "Stratus" ] #human readable labels

In [135]:
print(type(dataset))

<class 'torchvision.datasets.folder.ImageFolder'>


In [136]:
for data in dataloader:
    # print(torch.squeeze(data[0][0]).shape) #Data appears to be from 0 to 1
    img = transforms.ToPILImage()(torch.squeeze(data[0][0]))
    
    img.show()
    break


# Basic CNN (no transfer learning)

In [None]:
class convNet(nn.Module):
    def __init__(self):
        super().__init__()
        #nn.conv2d -> in_channels, out_channels, kernel_size
        #Stride defaults to 1
        #Halves on wxh which is done 2x.
        self.all_pooling = nn.MaxPool2d(2, 2)

        #After this: 256-4 -> 252
        self.conv1 = nn.Conv2d(3, 6, 3) #Conv layer 1. Padding is 0

        #Decreases by another 4 
        self.conv2 = nn.Conv2d(6, 16, 3)

        self.conv3 = nn.Conv2d(16, 32, 5)

        self.conv4 = nn.Conv2d(32, 64, 5)
        self.conv_layers = [self.conv1, self.conv2, self.conv3] #Put all conv layers and fcs into a list for ease of iteration through later?

        self.flatten = nn.Flatten()
        
        flattened_size = 26912
        self.fc1 = nn.Linear(flattened_size, 120)
        self.fc2 = nn.Linear(120, 180)
        self.fcs = [self.fc1, self.fc2]
        self.final_fc = nn.Linear(180, 11)


    def forward(self, x):
        for conv_layer in self.conv_layers:
            x = self.all_pooling(F.relu(conv_layer(x)))
        # x = self.all_pooling(F.relu(self.conv1(x)))
        # x = self.all_pooling(F.relu(self.conv2(x)))    
        x = self.flatten(x)

        # print(x.shape)

        for fc in self.fcs:
            x = F.relu(fc(x))
        x = self.final_fc(x)

        # x = F.relu(self.fc1(x))
        # x = F.relu(self.fc2(x)) 
        
        #UGH I was probably supposed to softmax them
        return x

In [148]:
model  = convNet()
for data in train_loader:
    model.forward(data[0])
    break


In [139]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss() #Multi class loss
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)



In [140]:
def eval_model(model, test_loader):
    running_acc = 0

    for inputs, labels in test_loader:
        outputs = model(inputs)
        running_acc += torch.sum(torch.argmax(outputs, dim = 1) == labels) / len(labels)

    return running_acc / len(test_loader)

In [149]:
batches_to_print = 20
print("starting training")
for epoch in range(50): 

    running_loss = 0
    running_acc = 0

    for i, data in enumerate(train_loader):
        inputs, labels = data

        model.train() #if we ever decide to batchnorm later. jujst tells it that we're in training mode rn
        optimizer.zero_grad() #zero grad
        
        outputs = model(inputs) #fd it
 
        acc = torch.sum(torch.argmax(outputs, dim = 1) == labels) / len(labels)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        
        running_acc += acc.item()
        running_loss += loss.item()

        if i % batches_to_print == batches_to_print - 1:    # print every 20 batches
            print(f'Epoch {epoch + 1} batcj {i + 1} loss: {running_loss / batches_to_print:.3f} acc {running_acc / batches_to_print:.3f}')
            running_acc = 0
            running_loss = 0 

print('Finished Training') 

starting training
Epoch 1 batcj 20 loss: 2.396 acc 0.087
Epoch 1 batcj 40 loss: 2.397 acc 0.086
Epoch 1 batcj 60 loss: 2.396 acc 0.110
Epoch 2 batcj 20 loss: 2.395 acc 0.095
Epoch 2 batcj 40 loss: 2.397 acc 0.098
Epoch 2 batcj 60 loss: 2.398 acc 0.089
Epoch 3 batcj 20 loss: 2.396 acc 0.100
Epoch 3 batcj 40 loss: 2.395 acc 0.095
Epoch 3 batcj 60 loss: 2.398 acc 0.088
Epoch 4 batcj 20 loss: 2.395 acc 0.098
Epoch 4 batcj 40 loss: 2.396 acc 0.091
Epoch 4 batcj 60 loss: 2.398 acc 0.095
Epoch 5 batcj 20 loss: 2.395 acc 0.103
Epoch 5 batcj 40 loss: 2.396 acc 0.095
Epoch 5 batcj 60 loss: 2.398 acc 0.087
Epoch 6 batcj 20 loss: 2.396 acc 0.080
Epoch 6 batcj 40 loss: 2.396 acc 0.102
Epoch 6 batcj 60 loss: 2.397 acc 0.105
Epoch 7 batcj 20 loss: 2.398 acc 0.100
Epoch 7 batcj 40 loss: 2.395 acc 0.080
Epoch 7 batcj 60 loss: 2.396 acc 0.106
Epoch 8 batcj 20 loss: 2.397 acc 0.092
Epoch 8 batcj 40 loss: 2.397 acc 0.094
Epoch 8 batcj 60 loss: 2.396 acc 0.097
Epoch 9 batcj 20 loss: 2.398 acc 0.097
Epoch 9

KeyboardInterrupt: 

In [None]:
#what the f*** how is it 93 acc when i just threw some random stuff together???
print(eval_model(model, test_loader))

tensor(0.9291)


In [None]:
torch.save(model, "clouds_1.pt")

# Resnet