<a href="https://colab.research.google.com/github/siondaniel/Benetech/blob/main/Benetech.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy

# function to count number of parameters
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
!git clone https://github.com/siondaniel/Benetech.git

fatal: destination path 'Benetech' already exists and is not an empty directory.


In [39]:
import os

# Define paths to your image and json files
images_dir = "Benetech/train/images"
json_dir = "Benetech/train/annotations"

# Define the list that will hold your data
data = []

# Loop through all the files in the images directory
i = 0
for filename in os.listdir(images_dir):
    if i == 1000: break
    if filename.endswith(".jpg"):
        # Load the image using PIL
        img = Image.open(os.path.join(images_dir, filename))

        # Resize the image to a common size and convert to NumPy array
        img = numpy.array(transforms.functional.resize(img, (256, 256)))
        
        # Load the corresponding json file
        json_filename = filename.split(".")[0] + ".json"
        with open(os.path.join(json_dir, json_filename), "r") as f:
            json_data = json.load(f)
        
        # Ensure all JSON data has the same structure
        for key in ["source", "chart-type", "plot-bb", "text"]:
            if key not in json_data:
                json_data[key] = None
        for text_item in json_data["text"]:
            for key in ["id", "polygon", "text", "role"]:
                if key not in text_item:
                    text_item[key] = None
        
        # Append the image and json data to your list
        data.append({
            "image": img,
            "json": json_data
        })
    i+=1

# Print the length of your data
print("Number of data samples:", len(data))

Number of data samples: 1000


In [40]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
train_data = data[:800]
val_data = data[800:]

# Convert the data into PyTorch tensors
train_images = torch.stack([torch.Tensor(d["image"]) for d in train_data])
train_jsons = [d["json"] for d in train_data]
val_images = torch.stack([torch.Tensor(d["image"]) for d in val_data])
val_jsons = [d["json"] for d in val_data]

# Print the length of your data
print("Number of training samples:", len(train_data))
print("Number of validation samples:", len(val_data))

Number of training samples: 800
Number of validation samples: 200


In [61]:
from torch.utils.data import DataLoader, Dataset

class MyDataset(Dataset):
    def __init__(self, images, jsons):
        if len(images) != len(jsons):
            raise ValueError("The number of images and JSONs must be the same.")
        self.images = images
        self.jsons = jsons
        
    def __len__(self):
        return len(self.images)
        
    def __getitem__(self, index):
        return self.images[index], self.jsons[index]

def my_collate_fn(batch):
    images, jsons = zip(*batch)
    images = torch.stack(images)
    jsons = [j for j in jsons]
    return images, jsons

train_data = MyDataset(train_images, train_jsons)
val_data = MyDataset(val_images, val_jsons)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, collate_fn=my_collate_fn)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=False, collate_fn=my_collate_fn)

In [62]:
for batch_idx, (images, jsons) in enumerate(train_loader):
    print(f"Batch {batch_idx}:")
    for i in range(len(images)):
        print(f"Image {i} size: {images[i].size()}")
        print(f"Json {i} size: {len(jsons[i])}")

Batch 0:
Image 0 size: torch.Size([256, 256, 3])
Json 0 size: 6
Image 1 size: torch.Size([256, 256, 3])
Json 1 size: 6
Image 2 size: torch.Size([256, 256, 3])
Json 2 size: 6
Image 3 size: torch.Size([256, 256, 3])
Json 3 size: 6
Image 4 size: torch.Size([256, 256, 3])
Json 4 size: 6
Image 5 size: torch.Size([256, 256, 3])
Json 5 size: 6
Image 6 size: torch.Size([256, 256, 3])
Json 6 size: 6
Image 7 size: torch.Size([256, 256, 3])
Json 7 size: 6
Image 8 size: torch.Size([256, 256, 3])
Json 8 size: 6
Image 9 size: torch.Size([256, 256, 3])
Json 9 size: 6
Image 10 size: torch.Size([256, 256, 3])
Json 10 size: 6
Image 11 size: torch.Size([256, 256, 3])
Json 11 size: 6
Image 12 size: torch.Size([256, 256, 3])
Json 12 size: 6
Image 13 size: torch.Size([256, 256, 3])
Json 13 size: 6
Image 14 size: torch.Size([256, 256, 3])
Json 14 size: 6
Image 15 size: torch.Size([256, 256, 3])
Json 15 size: 6
Image 16 size: torch.Size([256, 256, 3])
Json 16 size: 6
Image 17 size: torch.Size([256, 256, 3])
J

In [63]:
class CNN(nn.Module):
    def __init__(self, input_size, n_feature, output_size):
        super(CNN, self).__init__()
        self.n_feature = n_feature
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=n_feature, kernel_size=5)
        self.conv2 = nn.Conv2d(n_feature, n_feature, kernel_size=5)
        self.fc1 = nn.Linear(n_feature*4*4, 50)
        self.fc2 = nn.Linear(50, 10)
        
    def forward(self, x, verbose=False):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = x.view(-1, self.n_feature*4*4)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [64]:
model = CNN(input_size=(3, 224, 224), n_feature=32, output_size=1)

In [65]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [43]:
for (images, jsons) in train_loader:
  print(len(images), len(jsons))

RuntimeError: ignored

In [66]:
# NEED TO MAKE JSONS EQUAL SIZE AT BEGINNING

num_epochs = 10

for epoch in range(num_epochs):
    for i, (images, jsons) in enumerate(train_loader):
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, jsons)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Print statistics
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

RuntimeError: ignored