<a href="https://colab.research.google.com/github/qn19325/individualProject/blob/main/cnn_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [43]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision
import pandas as pd
import numpy as np

In [44]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [45]:
class ImageDataLoader(Dataset):
    def __init__(self, dir_=None):
        self.data_df = pd.read_csv('gdrive/MyDrive/data.csv')
        self.dataset_len = len(self.data_df) # read the number of len of your csv files
    def __getitem__(self, idx):
        # load the next image
        f_name_t = self.data_df['Filename'][idx]
        f_name_tp1 = self.data_df['Filename'][idx+1]
        label = self.data_df['Label'][idx]
        label = label.astype(np.float32) 
        label = np.true_divide(label, 10)
        img_t = torchvision.io.read_image('gdrive/MyDrive/trainingData/{}'.format(f_name_t))
        img_tp1 = torchvision.io.read_image('gdrive/MyDrive/trainingData/{}'.format(f_name_tp1))
        img_t = img_t.float().div_(255.0)
        img_tp1 = img_tp1.float().div_(255.0)
        return img_t, img_tp1, label
    def __len__(self):
        return self.dataset_len - 1

In [46]:
dataset = ImageDataLoader()
dataloader = DataLoader(dataset, shuffle=False, batch_size=8)

In [47]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 8, 1, 1),
            nn.ReLU(),
            nn.Conv2d(8, 16, 1, 1),
            nn.ReLU(),
            nn.Flatten()
        )
        self.fc1 = nn.Linear(65536, 128)
    def forward(self, x):
        state = self.cnn(x)
        state = self.fc1(state)
        # print('size of the state after CNN ',state.size())
        return state

In [48]:
encoder = Encoder().to(device)

In [79]:
batch_size = 8
num_epochs = 1000
learning_rate = 0.0009
input_size = 128
output_size = 1
sequence_length = 2
hidden_size = 128
num_layers = 1

In [80]:
class RNN(nn.Module):
    def __init__(self, batch_size, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.rnn = nn.RNN(self.input_size, self.hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, self.output_size)
    def init_hidden(self):
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(device))
    def forward(self, x):
        self.batch_size = x.size(0)
        self.hidden = self.init_hidden()
        out, self.hidden = self.rnn(x, self.hidden)
        out = self.fc(out)
        return out

In [81]:
model = RNN(batch_size=batch_size, input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size).to(device)

In [82]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [83]:
n_total_steps = len(dataloader)
for epoch in range(num_epochs):
    runningLoss = 0
    for i, (image1, image2, label) in enumerate(dataloader):
        output1 = encoder(image1.to(device))
        output2 = encoder(image2.to(device))
        # print('o1 shape:', output1.size())
        batch_size1 = len(output1)
        batch_size2 = len(output2)
        output1 = output1.reshape(batch_size1,1,-1)
        output2 = output2.reshape(batch_size2,1,-1)
        # print('o1 after shape:', output1.size())
        seq = torch.cat((output1, output2.detach()), dim=1)
        # print('seq size: ', seq.size())
        label = (label.float())
        # Forward pass
        outputs = model(seq.to(device))
        loss = criterion(outputs[:,-1].squeeze(), label.to(device))
        # print(outputs[:,-1].squeeze())
        # print(label)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        runningLoss += loss
    if epoch%10 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Running Loss: {runningLoss:.4f}, Average Loss: {runningLoss/len(dataloader.dataset):.4f}')
    runningLoss = 0

Epoch [1/1000], Running Loss: 110.9203, Average Loss: 0.4419
Epoch [11/1000], Running Loss: 361.1469, Average Loss: 1.4388
Epoch [21/1000], Running Loss: 362.1001, Average Loss: 1.4426
Epoch [31/1000], Running Loss: 362.3694, Average Loss: 1.4437
Epoch [41/1000], Running Loss: 361.2331, Average Loss: 1.4392
Epoch [51/1000], Running Loss: 361.0255, Average Loss: 1.4383
Epoch [61/1000], Running Loss: 360.3768, Average Loss: 1.4358
Epoch [71/1000], Running Loss: 360.4241, Average Loss: 1.4360
Epoch [81/1000], Running Loss: 359.8125, Average Loss: 1.4335
Epoch [91/1000], Running Loss: 359.8152, Average Loss: 1.4335
Epoch [101/1000], Running Loss: 359.1110, Average Loss: 1.4307
Epoch [111/1000], Running Loss: 358.8795, Average Loss: 1.4298
Epoch [121/1000], Running Loss: 366.8887, Average Loss: 1.4617
Epoch [131/1000], Running Loss: 357.3786, Average Loss: 1.4238
Epoch [141/1000], Running Loss: 358.8650, Average Loss: 1.4297
Epoch [151/1000], Running Loss: 357.6178, Average Loss: 1.4248
Epo

KeyboardInterrupt: ignored