# Example template for pytorch
* Note that the data import is only one option (use of torch datasets and dataloaders)
* Output predictions do need to be formatted properly as described later in this notebook

In [1]:
import numpy as np
import pandas as pd
import torch as pt
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
import os

#paths for train annotations
#/kaggle/input/containerid/train_annotations.csv

#paths for train images
#/kaggle/input/containerid/train/train/

#path for testing images
#/kaggle/input/containerid/test/test

In [2]:
# Use the pytorch dataset class to structure the data
# find more here: https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

class LimboDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        #img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 1])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label


class LimboDatasetTest(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.files = os.listdir(img_dir)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.files[idx])
        image = read_image(img_path)
        if self.transform:
            image = self.transform(image)
        ID = self.files[idx]
        return image, ID

In [3]:
train_dataset = LimboDataset(annotations_file='/kaggle/input/containerid/train_annotations.csv',
                            img_dir='/kaggle/input/containerid/train/train/')
print('Training dataset size: '+str(len(train_dataset)))

test_dataset = LimboDatasetTest(img_dir='/kaggle/input/containerid/test/test/')
print('Test dataset size: ' + str(len(test_dataset)))

Training dataset size: 3100
Test dataset size: 1000


# Here's where you do some ML magic

# Submission steps

### Create dummy arrays for imageID and label
* In practice, your labels will be generated by evaluating on the test set

In [4]:
# Create a list of imageIDs
imageID = np.empty(1000,dtype="<U12")
for i in range(1000):
    imageID[i] = 'image'+str(i)+'.png'
    
print(imageID[-5:])

['image995.png' 'image996.png' 'image997.png' 'image998.png'
 'image999.png']


In [5]:
# Create a random list of labels
imageLabel = np.random.randint(size=(1000,),low=0,high=2) #high is exclusive
print(imageLabel[:5])

[0 1 1 0 1]



# Prepare Submission File

We make submissions in CSV files. Your submissions usually have two columns: an ID column and a prediction column. The ID field comes from the test data (keeping whatever name the ID field had in that data, which for this competition is 'Label'). The prediction column will use the name of the target field.

We will create a DataFrame with this data, and then use the dataframe's to_csv method to write our submission file. Explicitly include the argument index=False to prevent pandas from adding another column in our csv file.


In [6]:
#prepare submission file
my_submission = pd.DataFrame({'Label': imageLabel, 'ImageID': imageID})
# you could use any filename. We choose submission here
my_submission.to_csv('submission.csv', index=False)

# Submission
Submit by pressing submit in the right side panel under Competitions

# Sharing notebook

To share your notebook go to file, share, and make public. 