In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, utils
import torch.nn as nn
import torch.optim as optim
import os
from PIL import Image
import matplotlib.pyplot as plt
from img2vec_pytorch import Img2Vec
import timm
from tqdm.notebook import tqdm
%matplotlib inline

In [2]:
class AgarIoImagesLoader(DataLoader):
    def __init__(self, history_len =5):
        self.images

In [3]:
history_len = 10

In [4]:
class RNNRegression(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNRegression, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # Take the last time step's output
        return out
    
class Embeddings(torch.nn.Module):
    def __init__(self, num_encoder_features = 512):
        super().__init__()
        self.encoder = timm.create_model('mobilenetv3_small', num_encoder_features)
        
    def forward(self, x):
        return self.encoder(x)
    
class HistoryEmbedding(torch.nn.Module):
    def __init__(self, num_encoder_features = 512, history_len = 10):
        super().__init__()
        self.history_len = history_len
        self.num_encoder_features = num_encoder_features
        self.encoder = timm.create_model('resnet18', num_classes=num_encoder_features)
        self.regressor = RNNRegression(num_encoder_features, self.history_len, 3)
        
    def forward(self, x):
        x = torch.stack([self.encoder(x[:,i,:,:,:]) for i in range(self.history_len)]).view(-1, self.history_len, self.num_encoder_features)
        return self.regressor(x)

In [5]:
class CustomImageDataset(Dataset):
    def __init__(self, data_dir,history_len, transform=None):
        
        self.num_images_per_folder = 1000
        self.num_folders = 10
        self.history_len = history_len
        self.dataset_length = self.num_images_per_folder * self.num_folders
        self.data_dir = data_dir
        self.transform = transform

    def __len__(self):
        return self.dataset_length

    def __getitem__(self, idx):
        img_in_folder_idx = idx % self.num_images_per_folder
        folder_idx = idx // self.num_images_per_folder
        images = []
        actions = []
        
        for i in range(img_in_folder_idx - self.history_len +1, img_in_folder_idx+1):
            if(i < 0):
                images.append(np.zeros((224,224,3), dtype=np.uint8))
                actions.append(np.zeros((3), dtype=np.float32))
            else:
                img = Image.open(os.path.join(self.data_dir, f'episode_{folder_idx}','image', f'{i}.jpg'))
                images.append(np.array(img))
                actions.append(np.load(os.path.join(self.data_dir, f'episode_{folder_idx}','arr', f'{i}.npy'),allow_pickle=True)[0]['action'])
            
        if self.transform:
            for i in range(len(images)):
                images[i] = self.transform(images[i])
                actions[i] = torch.tensor([0,0,0])#torch.from_numpy(actions[i])
        images = torch.stack(images)
        # In this example, we're just returning images without labels. 
        # You would typically also return labels in a real-world scenario.
        actions = actions[-1].float()
        return images, actions

In [6]:
def plot_sample(data):
    fig, axs = plt.subplots(1, history_len, figsize=(20, 20))
    for i in range(history_len):
        axs[i].imshow(data[0][i].permute(1, 2, 0))

In [7]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((224,224)),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
dataset = CustomImageDataset('data', history_len=history_len, transform=transform)

In [8]:
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=8)

In [9]:
model = HistoryEmbedding()

In [10]:
torch.cuda.is_available()

True

In [11]:
loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # You can use other optimizers like SGD if desired
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
moedl = model.to(device)
# Number of epochs
num_epochs = 50

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    total_loss = 0.0
    
    for batch_data, batch_labels in tqdm(data_loader):
        # Transfer data and labels to GPU if one is available
        
        batch_data = batch_data.to(device)
        batch_labels = batch_labels.to(device)

        # 2. Training Loop
        optimizer.zero_grad()  # Zero the gradients
        
        predictions = model(batch_data)  # Get predictions from the model
        
        loss = loss_function(predictions, batch_labels)  # Compute the loss
        loss.backward()  # Backpropagate the loss
        optimizer.step()  # Update the model parameters
        
        total_loss += loss.item()  # Accumulate loss for monitoring
    
    # Print the average loss for this epoch
    avg_loss = total_loss / len(data_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

  0%|          | 0/625 [00:00<?, ?it/s]