In [4]:
# Downloading dataset from kaggle to google drive
# mounting gdrive, and downloading data; will take a bit to execute

from google.colab import drive
import os
import json
import zipfile
import pandas as pd
import shutil
from tqdm import tqdm

drive.mount('/content/drive', force_remount=True)

# !pip install kaggle
# !mkdir ~/.kaggle
# !touch ~/.kaggle/kaggle.json


# with open('/root/.kaggle/kaggle.json', 'w') as file:
#     json.dump({"username":"kylemontgomery","key":"93286c4a7534e7a9055eabbc78034464"}, file)

# !chmod 600 ~/.kaggle/kaggle.json

# !kaggle competitions download -c applications-of-deep-learning-wustl-fall-2023 -p "/content/drive/My Drive/Colab Notebooks" --force

os.chdir("/content/drive/My Drive/Colab Notebooks")

# !unzip -o "applications-of-deep-learning-wustl-fall-2023.zip"

# !rm -f "applications-of-deep-learning-wustl-fall-2023.zip"

# def move(split):
#   data = pd.read_csv(f"{split}.csv")
#   for f in tqdm(data["file"]):
#     shutil.move(f, f"{split}")

# !mkdir train
# move('train')

# !mkdir test
# move('test')


Mounted at /content/drive


In [5]:
# create a dataset - define properties and provide method for loading the image and extracting the pixel values
from torch.utils.data import Dataset
from PIL import Image
import os
import torch

class GlassesDataset(Dataset):
    def __init__(self, dataframe, root_dir, is_test = False, transform=None):
        self.data = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, str(self.data.iloc[idx, 1]))
        image = Image.open(img_name)
        sample = {"pixel_values": image, "labels": self.data.iloc[idx, 2] if not self.is_test else -1}
        if self.transform:
            sample["pixel_values"] = self.transform(sample["pixel_values"])
        return sample

In [15]:
# Transforming the images - resize them to the size the model expects and normalize the rbg values
from torchvision import transforms

transforms = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [7]:
# train validation split
import pandas as pd

df = pd.read_csv('train.csv')
df = df.sample(frac=1).reset_index(drop=True)

TRAIN_PCT = 0.9
TRAIN_CUT = int(len(df) * TRAIN_PCT)

df_train = df[0:TRAIN_CUT]
train_dataset = GlassesDataset(df_train, "train", transform=transforms)

df_validate = df[TRAIN_CUT:]
val_dataset = GlassesDataset(df_validate, "train", transform=transforms)

In [8]:
# defines how to batch images during training and evaluation
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [6]:
!pip install accelerate transformers[torch] datasets

Collecting accelerate
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow-hotfix, dill, 

In [9]:
# Loading a default accuracy metric to monitor accuracy during training
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

  metric = load_metric("accuracy")


In [12]:
# Setting up the training
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./resnet-50",
    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,
    evaluation_strategy="steps",
    logging_strategy="steps",
    save_strategy="steps",
    logging_steps=50,
    save_steps=50,
    eval_steps=50,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    remove_unused_columns=False,
    load_best_model_at_end=True,
)

In [11]:
# Load the model to the GPU
from transformers import ResNetForImageClassification, AutoImageProcessor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50",
                                                    num_labels=2,
                                                    id2label={"0": "no_glasses", "1": "glasses"},
                                                    label2id={"no_glasses": "0", "glasses": "1"},
                                                    ignore_mismatched_sizes=True,
                                                    ).to(device)

config.json:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/103M [00:00<?, ?B/s]

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([2, 2048]) in the model instantiated
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# Defining the trainer
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

In [14]:
# Training
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)

Step,Training Loss,Validation Loss,Accuracy
50,0.3573,0.010278,0.9988
100,0.012,0.005044,0.9992
150,0.0058,0.005955,0.9984
200,0.0046,0.002861,0.9996
250,0.0033,0.002491,0.9996
300,0.0033,0.002086,0.9996
350,0.0014,0.001914,1.0
400,0.0014,0.002242,0.9992
450,0.0011,0.00176,0.9992
500,0.0013,0.001692,0.9996


***** train metrics *****
  epoch                    =          3.0
  total_flos               = 1334888884GF
  train_loss               =       0.0372
  train_runtime            =   0:17:20.06
  train_samples_per_second =       64.897
  train_steps_per_second   =        0.508


In [16]:
# Run inference on test set

from torch.utils.data import DataLoader

df_test = pd.read_csv('test.csv')
test_dataset = GlassesDataset(df_test, "test", transform=transforms, is_test=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model = ResNetForImageClassification.from_pretrained("./resnet-50/checkpoint-350",  # pick the best and change the path accordingly
                                                    num_labels=2,
                                                    id2label={"0": "no_glasses", "1": "glasses"},
                                                    label2id={"no_glasses": "0", "glasses": "1"},
                                                    ignore_mismatched_sizes=True,
                                                    ).to(device)
preds = []
model.eval()

with torch.no_grad():
  for inputs in tqdm(test_dataloader):
    outputs = model(inputs["pixel_values"].to(device))
    p = torch.nn.functional.softmax(outputs["logits"], dim=1)[0][1].item()
    preds.append(p)

submission_df = pd.DataFrame({
    'id': df_test.id,
    'glasses': preds
})

submission_df.head()

100%|██████████| 6865/6865 [02:21<00:00, 48.39it/s]


Unnamed: 0,id,glasses
0,25000,0.999438
1,25001,0.000171
2,25002,0.00031
3,25003,0.999094
4,25004,0.999014


In [17]:
# export submission
submission_df.to_csv('/content/submission.csv', index=False) # download from files over on the left