# 1. Define Some Libraries

In [1]:
import torch
import torchvision
import torch.utils.data
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler

In [2]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import numpy as np
import math, random
import pandas as pd
import pathlib
import glob
import cv2 as cv
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
import copy
import time 
from collections import defaultdict
from tqdm import tqdm
import os
import json

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed = 1999
seed_everything(seed)

In [4]:
class CFG:
    isOneHot = False
    label_map = {'dot': 0, 'horizontal_bar' : 1, 'vertical_bar': 2, 'line': 3, 'scatter': 4}
    num_classes = 5
    batchSize = 32
    fold_train = 0

# 2. Meta Data

In [15]:
df = pd.read_csv('./df_train.csv')
df

Unnamed: 0,filename,chart_type
0,/kaggle/input/benetech-making-graphs-accessibl...,line
1,/kaggle/input/benetech-making-graphs-accessibl...,line
2,/kaggle/input/benetech-making-graphs-accessibl...,line
3,/kaggle/input/benetech-making-graphs-accessibl...,line
4,/kaggle/input/benetech-making-graphs-accessibl...,dot
...,...,...
60573,/kaggle/input/benetech-making-graphs-accessibl...,line
60574,/kaggle/input/benetech-making-graphs-accessibl...,line
60575,/kaggle/input/benetech-making-graphs-accessibl...,line
60576,/kaggle/input/benetech-making-graphs-accessibl...,line


In [6]:
df.chart_type.value_counts()

line              24942
vertical_bar      19189
scatter           11243
dot                5131
horizontal_bar       73
Name: chart_type, dtype: int64

In [7]:
files_and_labels = {}
for dirname, _, filenames in os.walk('./test/images'):
    for filename in filenames:
        files_and_labels[os.path.join(dirname, filename)] = 'line'
df_test = pd.DataFrame(files_and_labels, index = [0]).T.reset_index().rename(columns = {'index':'filename', 0 :'chart_type'})
df_test

Unnamed: 0,filename,chart_type
0,./test/images/00dcf883a459.jpg,line
1,./test/images/007a18eb4e09.jpg,line
2,./test/images/000b92c3b098.jpg,line
3,./test/images/00f5404753cf.jpg,line
4,./test/images/01b45b831589.jpg,line


# 3. Define Dataset 

In [8]:
transforms_train = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
transforms_test = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [9]:
class ImageCharts(torch.utils.data.Dataset):
    def __init__(self, df, transforms  = None): ### phase = train/test
        self.df = df
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self,idx):
        name_img = self.df.iloc[idx]['filename']
        img = cv.imread(name_img)
        img = cv.cvtColor(img,cv.COLOR_BGR2RGB)
        img = cv.resize(img,(500,300))
        img = img.astype(np.float32)/255.0

        label = np.array(self.df.iloc[idx]['label'])

        if(self.transforms is not None):
            img = self.transforms(img)
        return img, label

In [10]:
def cv_split(Xtrain, ytrain, n_folds, seed):
    kfold = StratifiedKFold(n_splits = n_folds, shuffle = True, random_state = seed)
    for num, (train_index, val_index) in enumerate(kfold.split(Xtrain, ytrain)):
        Xtrain.loc[val_index, 'fold'] = int(num)
    Xtrain['fold'] = Xtrain['fold'].astype(int)
    return Xtrain

meta_df = cv_split(df,df['chart_type'], 5, 42)
meta_df['label'] = meta_df['chart_type'].apply(lambda x: CFG.label_map[x])
meta_df.head(2)

Unnamed: 0,filename,chart_type,fold,label
0,/kaggle/input/benetech-making-graphs-accessibl...,line,4,3
1,/kaggle/input/benetech-making-graphs-accessibl...,line,3,3


In [11]:
train_ds = ImageCharts(meta_df[meta_df.fold != CFG.fold_train].reset_index(), transforms=transforms_train)
val_ds = ImageCharts(meta_df[meta_df.fold == CFG.fold_train].reset_index(), transforms=transforms_train)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size= CFG.batchSize, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size= CFG.batchSize, shuffle=True)

In [12]:
a,b = next(iter(train_dl))
print(a.shape)
print(b.shape)
print('chart_type:',list(CFG.label_map)[b[0].detach().numpy()])
plt.imshow(a[0].permute(1,2,0))

[ WARN:0@354.625] global loadsave.cpp:244 findDecoder imread_('/kaggle/input/benetech-making-graphs-accessible/train/images/3959732c290a.jpg'): can't open/read file: check file path/integrity


error: OpenCV(4.7.0) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


# 4. Define Model - ResNet50

In [13]:
model = torchvision.models.resnet50(pretrained=False)


num_features = model.fc.in_features
model.fc = nn.Linear(num_features, CFG.num_classes)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
myModel = model.to(device)
next(myModel.parameters()).device

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


device(type='cuda', index=0)

# 5. Training

In [14]:
def training(model, train_dl, val_dl, num_epochs):
  # Loss Function, Optimizer and Scheduler
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')

  # Repeat for each epoch
  best_acc = -1
  for epoch in range(num_epochs):
    running_loss = 0.0
    correct_prediction = 0
    total_prediction = 0

    # Repeat for each batch in the training set
    for i, data in enumerate(train_dl):
        # Get the input features and target labels, and put them on the GPU
        inputs, labels = data[0].to(device), data[1].to(device)

        # Normalize the inputs
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s

        # Zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Keep stats for Loss and Accuracy
        running_loss += loss.item()

        # Get the predicted class with the highest score
        _, prediction = torch.max(outputs,1)
        # Count of predictions that matched the target label
        correct_prediction += (prediction == labels).sum().item()
        total_prediction += prediction.shape[0]

        if (i + 1) % 50 == 0:    # print every 10 mini-batches
           print('Epoch [{}/{}], Step [{}/{}], Loss : {:.4f}'
            .format(epoch + 1, num_epochs, i + 1, len(train_dl), running_loss/(i + 1)))
    
    # Print stats at the end of the epoch
    num_batches = len(train_dl)
    avg_loss = running_loss / num_batches
    acc = correct_prediction/total_prediction
    print(f'Epoch: {epoch + 1}, Loss: {avg_loss:.4f}, Accuracy: {acc:.4f}')

    gt = []
    pred = []

    with torch.no_grad():
        correct = 0
        total = 0
        val_loss = 0 
        for idx, data_ in enumerate(val_dl):
            inputs, labels = data_[0].to(device), data_[1].to(device)

            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Keep stats for Loss and Accuracy
            val_loss += loss.item()

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            predi = torch.softmax(outputs, dim = -1)
            gt.append(labels)
            pred.append(predi[:,1])
            # Count of predictions that matched the target label
            correct += (prediction == labels).sum().item()
            total += prediction.shape[0]
        print('Accuracy of the network val: {:.4f} %'.format(100 * correct / total))

        final_score = 100 * correct / total


        if (best_acc < final_score):
            best_acc = final_score
            print("Saving best model!")
            torch.save(model.state_dict(), f'Benetech _ResNet50_fold0.pth')

  print('Finished Training')
  
num_epochs = 10   # Just for demo, adjust this higher.
training(myModel, train_dl, val_dl, num_epochs)

Epoch [1/10], Step [50/1515], Loss : 1.0709
Epoch [1/10], Step [100/1515], Loss : 0.8168
Epoch [1/10], Step [150/1515], Loss : 0.6644
Epoch [1/10], Step [200/1515], Loss : 0.5792
Epoch [1/10], Step [250/1515], Loss : 0.5098
Epoch [1/10], Step [300/1515], Loss : 0.4659
Epoch [1/10], Step [350/1515], Loss : 0.4302
Epoch [1/10], Step [400/1515], Loss : 0.3976
Epoch [1/10], Step [450/1515], Loss : 0.3678
Epoch [1/10], Step [500/1515], Loss : 0.3444
Epoch [1/10], Step [550/1515], Loss : 0.3236
Epoch [1/10], Step [600/1515], Loss : 0.3068
Epoch [1/10], Step [650/1515], Loss : 0.2913
Epoch [1/10], Step [700/1515], Loss : 0.2781
Epoch [1/10], Step [750/1515], Loss : 0.2710
Epoch [1/10], Step [800/1515], Loss : 0.2585
Epoch [1/10], Step [850/1515], Loss : 0.2476
Epoch [1/10], Step [900/1515], Loss : 0.2377
Epoch [1/10], Step [950/1515], Loss : 0.2290
Epoch [1/10], Step [1000/1515], Loss : 0.2205
Epoch [1/10], Step [1050/1515], Loss : 0.2123
Epoch [1/10], Step [1100/1515], Loss : 0.2056
Epoch [1