In [3]:
import torchvision.models as models
import boto3
import sagemaker
import pandas as pd 
import torch 
import re
import glob
import os
from torch.utils.data import Dataset
from torchvision.io import read_image
import numpy as np
from PIL import Image
import torchvision.transforms as transforms

In [4]:
model = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [5]:
session = sagemaker.Session()

In [6]:
bucket_name = session.default_bucket()

In [7]:
s3_client = boto3.client('s3')
s3_resource = boto3.resource('s3')

In [8]:
bucket_obj = s3_resource.Bucket(bucket_name)

In [15]:
def annotation_maker(image_dir: str, output_filename: str) -> None:
    
    files = glob.glob(f'{image_dir}/*/*')
    
    classes = [int(re.findall(r'\d+', file)[0]) for file in files]
    short_files = [os.path.join(file.split('/')[-2],file.split('/')[-1]) for file in files]
    
    (pd.DataFrame({'files': short_files,
                   'classes': classes})
     .to_csv(os.path.join(image_dir,output_filename), index=False, encoding = 'utf-8', header=False))
    
    return None
    
    

In [16]:
for folder in glob.glob('dogImages/*'):
    annotation_maker(folder, 'annotation.csv')

In [17]:
classes = [int(re.findall(r'\d+',item.key)[0]) for item in bucket_obj.objects.filter(Prefix = 'dog-breed-data//train')]
keys = [item.key for item in bucket_obj.objects.filter(Prefix = 'dog-breed-data//train')]
class_names = [item.key.split('.')[1].split('/')[0] for item in bucket_obj.objects.filter(Prefix = 'dog-breed-data//train')]

In [18]:
image_classes = dict(zip(keys, classes))

In [5]:


class dogDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [6]:



#https://pytorch.org/vision/stable/models/generated/torchvision.models.resnet50.html#torchvision.models.resnet50

transform_train = transforms.Compose(
        [
            transforms.RandomCrop(256, pad_if_needed = True),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.2023, 0.1994, 0.2010])
        ])

transform_test_valid = transforms.Compose(
        [
            transforms.CenterCrop(256),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.2023, 0.1994, 0.2010])
        ])


In [7]:
train_dataset = dogDataset(annotations_file = './dogImages/train/annotation.csv', img_dir='./dogImages/train/', transform = transform_train)
valid_dataset = dogDataset(annotations_file = './dogImages/valid/annotation.csv', img_dir='./dogImages/valid/', transform = transform_test_valid)
test_dataset = dogDataset(annotations_file = './dogImages/test/annotation.csv', img_dir = './dogImages/valid/', transform = transform_test_valid)

In [8]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 30, shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size = 30, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 30, shuffle = True)


In [None]:
for images in valid_loader:
    for image in images:
        model(image)

In [138]:
tensor = read_image('./dogImages/train/001.Affenpinscher/Affenpinscher_00001.jpg')

In [139]:
tensor.max()

tensor(255, dtype=torch.uint8)

In [1]:
valid_loader[1]

NameError: name 'valid_loader' is not defined

In [9]:
example = list(valid_loader)[0][0]

[2022-07-23 05:23:40.797 1-8-1-cpu-py36-ml-t3-medium-05a4a7868130c7575335c53b16c7:903 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2022-07-23 05:23:41.066 1-8-1-cpu-py36-ml-t3-medium-05a4a7868130c7575335c53b16c7:903 INFO profiler_config_parser.py:102] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.


In [11]:
example_response = model(example)

In [12]:
example_response.shape

torch.Size([30, 1000])

In [1]:
print('duck')

duck
