In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_imgs = os.listdir("/kaggle/input/tpu-getting-started/tfrecords-jpeg-192x192/train")
test_imgs = os.listdir("/kaggle/input/tpu-getting-started/tfrecords-jpeg-192x192/test")
val_imgs = os.listdir("/kaggle/input/tpu-getting-started/tfrecords-jpeg-192x192/val")

In [None]:
!pip install cloud-tpu-client==0.10 torch==1.11.0 https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-1.11-cp37-cp37m-linux_x86_64.whl

In [None]:
import pandas as pd
from PIL import Image
import tensorflow as tf
import io

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset , DataLoader
from torchvision import transforms
from torch.optim import SGD

import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp

In [None]:
train_feature_description = {
    'class': tf.io.FixedLenFeature([], tf.int64),
    'id': tf.io.FixedLenFeature([], tf.string),
    'image': tf.io.FixedLenFeature([], tf.string),
}
test_feature_description = {
    'id': tf.io.FixedLenFeature([], tf.string),
    'image': tf.io.FixedLenFeature([], tf.string),
}

In [None]:
def _parse_image_function(example_proto):
 return tf.io.parse_single_example(example_proto, train_feature_description)
def second_parse_image_function(example_proto):
 return tf.io.parse_single_example(example_proto, test_feature_description)

In [None]:
train_lis = []
val_lis = []
test_lis = []

for i in train_imgs:
  train_lis.append(tf.data.TFRecordDataset("/kaggle/input/tpu-getting-started/tfrecords-jpeg-192x192/train/"+i))

for i in val_imgs:
  val_lis.append(tf.data.TFRecordDataset("/kaggle/input/tpu-getting-started/tfrecords-jpeg-192x192/val/"+i))

for i in test_imgs:
  test_lis.append(tf.data.TFRecordDataset("/kaggle/input/tpu-getting-started/tfrecords-jpeg-192x192/test/"+i))

In [None]:
train_ids = []
train_classes = []
train_images = []

val_ids = []
val_classes = []
val_images = []

test_ids = []
test_images = []

for tl in train_lis:
  temp = tl.map(_parse_image_function)
  for t in temp:
    train_classes.append(t["class"].numpy())
    train_ids.append(str(t["id"].numpy())[2:-1])
    train_images.append(t["image"].numpy())

for tl in val_lis:
  temp = tl.map(_parse_image_function)
  for t in temp:
    val_classes.append(t["class"].numpy())
    val_ids.append(str(t["id"].numpy())[2:-1])
    val_images.append(t["image"].numpy())

for tl in test_lis:
  temp = tl.map(second_parse_image_function)
  for t in temp:
    test_ids.append(str(t["id"].numpy())[2:-1])
    test_images.append(t["image"].numpy())

In [None]:
class CustDat(Dataset):
    def __init__(self , images , classes , ids , transform , mode):
        self.images = images
        self.classes = classes
        self.ids = ids
        self.transform = transform
        self.mode = mode
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self , idx):
        img = Image.open(io.BytesIO(self.images[idx]))
        img = self.transform(img)
        if self.mode == "test":
            idd = self.ids[idx]
            return (img , idd)
        else:
            label = self.classes[idx]
            return (img , label)

In [None]:
transform = transforms.Compose([
    transforms.Resize((120 , 120)) , 
    transforms.ToTensor() , 
    transforms.Normalize((0 , 0 , 0) , (1 , 1 , 1))
])

In [None]:
train_cust = CustDat(train_images , train_classes , None , transform , "train")
val_cust = CustDat(val_images , val_classes , None , transform , "val")
test_cust = CustDat(test_images , None , test_ids , transform , "test")

In [None]:
class Net(nn.Module):
  def __init__(self , n_classes):
    super(Net , self).__init__()
    self.conv1 = nn.Conv2d(3 , 16 , 3 , padding = 1 , stride = 1)
    self.conv2 = nn.Conv2d(16 , 32 , 3 , padding = 1 , stride = 1)
    self.fc1 = nn.Linear(32*20*20 , 1028)
    self.fc2 = nn.Linear(1028 , n_classes)

  def forward(self , x):
    x = self.conv1(x)
    x = F.relu(x)
    x = F.max_pool2d(x , 3)
    x = self.conv2(x)
    x = F.relu(x)
    x = F.max_pool2d(x , 2)
    x = torch.flatten(x , 1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    return x

In [None]:
SERIAL_EXEC = xmp.MpSerialExecutor()
WRAPPED_MODEL = xmp.MpModelWrapper(Net(len(train_classes)))

In [None]:
def run(rank):
  train_sampler = torch.utils.data.distributed.DistributedSampler(
      train_cust , 
      num_replicas = 8 , 
      rank = xm.get_ordinal() , 
      shuffle = True , 
      seed = 0
  )
  val_sampler = torch.utils.data.distributed.DistributedSampler(
      val_cust , 
      num_replicas = 8 , 
      rank = xm.get_ordinal() , 
      shuffle = True , 
      seed = 0
  )
  train_loader = DataLoader(
      train_cust , 
      batch_size = 16 , 
      sampler = train_sampler , 
      num_workers = 1 , 
      drop_last = False
  )
  val_loader = DataLoader(
      val_cust , 
      batch_size = 16 , 
      sampler = val_sampler , 
      num_workers = 1 , 
      drop_last = False
  )
  test_loader = DataLoader(
      test_cust , 
      batch_size = 16 ,
      num_workers = 1 , 
      drop_last = False
  )
  device = xm.xla_device()
  model = WRAPPED_MODEL.to(device)
  lr = 0.01 * xm.xrt_world_size()
  optimizer = SGD(model.parameters() , lr = lr)
  loss_fn = nn.CrossEntropyLoss()

  num_epochs = 10

  for epoch in range(num_epochs):

    #training
    para_loader = pl.ParallelLoader(train_loader , [device])
    train_loss = []
    train_corr = 0
    train_sam = 0
    model.train()
    for x , (data , label) in enumerate(para_loader.per_device_loader(device)):
      optimizer.zero_grad()
      output = model(data)
      loss = loss_fn(output , label)
      #accuracy
      _ , pred = torch.max(output , 1)
      train_corr += (pred == label).sum()
      train_sam += label.shape[0]
      loss.backward()
      train_loss.append(loss.item())
      xm.optimizer_step(optimizer)
      
    #evaluation
    model.eval()
    val_loss = []
    val_corr = 0
    val_sam = 0
    with torch.no_grad():
      para_loader = pl.ParallelLoader(val_loader , [device])
      for x , (data , label) in enumerate(para_loader.per_device_loader(device)):
        output = model(data)
        loss = loss_fn(output , label)
        #accuracy
        _ , pred = torch.max(output , 1)
        val_corr += (pred == label).sum()
        val_sam += label.shape[0]
        val_loss.append(loss.item())

    t_ac = 100.0 * train_corr / train_sam
    v_ac = 100.0 * val_corr / val_sam
    t_lo = torch.sum(torch.Tensor(train_loss))
    v_lo = torch.sum(torch.Tensor(val_loss))
    
    print("epoch is ",epoch," train accu ",t_ac," train loss ",t_lo," val accu ",v_ac," val loss ",v_lo)
  
  if xm.is_master_ordinal():
    dic = {}
    model.eval()
    with torch.no_grad():
      para_loader = pl.ParallelLoader(test_loader , [device])
      for x , (data , ids) in enumerate(para_loader.per_device_loader(device)):
        output = model(data)
        _ , pred = torch.max(output , 1)
        for i in range(pred.shape[0]):
          dic[ids[i]] = int(pred[i].cpu().detach().numpy())
      df = pd.DataFrame(dic.items())
      df.to_csv("fin_sub.csv" , index = False)

In [None]:
xmp.spawn(run , nprocs = 8 , start_method = "fork")

In [None]:
os.listdir()

In [None]:
dff = pd.read_csv('fin_sub.csv')
dff.head()

In [None]:
fin = pd.DataFrame({"id":dff["0"].values , "label":dff["1"].values})

In [None]:
fin.to_csv('submission.csv' , index = False)

In [None]:
os.listdir()

In [None]:
os.listdir()

In [None]:
os.remove("fin_sub.csv")

In [None]:
os.listdir()