In [None]:
import urllib.request
import tarfile
import os
import splitfolders
import pandas as pd
import cv2
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OrdinalEncoder

In [None]:
# Download the data
if not os.path.exists('./data'):
    os.mkdir('./data')
if not os.path.exists('data/raw'):
    url = 'http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar'
    urllib.request.urlretrieve(url,filename='data/dogs.tar')
    # Open the tar file
    with tarfile.open('data/dogs.tar', 'r') as tar:
        # Extract all files to the 'data/raw' directory
        tar.extractall(path='data/raw')

In [None]:
for subdir, dirs, files in os.walk(os.getcwd() + '/data/raw'):
    for file in files:
        filepath = subdir + os.sep + file

        if filepath.endswith(".jpg"):
            
            folder = filepath.split("\\")[-2].split("-")[1]

            if not os.path.isdir(os.getcwd() + "/data/raw/images_folder/" + folder): 
                os.makedirs(os.getcwd() + "/data/raw/images_folder/" + folder) 
            
            os.rename(filepath, os.getcwd() + "/data/raw/images_folder/" + folder + "/" + filepath.split("\\")[-1])


In [None]:
splitfolders.ratio('data/raw/images_folder', output='data/processed', seed=1337, ratio=(.8, 0.0,0.2)) 

In [None]:
def create_image_dict(img, breed):
    i = 0 
    image_dict= {}
    for i_row in range(len(img)):
        for i_col in img[i_row]:
            image_dict[f'pixel_{i}'] = i_col
            i+=1
    image_dict['Breed'] = breed
    
    return image_dict

In [None]:
def ordinal_encode(X,cols):
    """
    Takes a dataframe as an input and applies ordinal encoding to the 
    specified columns

    Inputs:
        X(pd.DataFrame): dataframe
        cols(list): list of columns to ordinal encode
    
    Return:
        X(pd.DataFrame): dataframe
        enc: ordinal encoder
    """
    enc = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    # Fit the encoder on training data and transform it.  We can also use it to transform test data
    X[cols] = enc.fit_transform(X[cols])
    return X,enc

In [None]:
img_array = []
for subdir, dirs, files in os.walk(os.getcwd() + '/data/processed/val/'):
    for file in files:
        filepath = subdir + os.sep + file

        if filepath.endswith(".jpg"):
            
            breed = filepath.split("\\")[-2].split('/')[-1]

            # Read your images
            img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (225, 225))
            img_array.append(create_image_dict(img, breed))

# Create an empty DataFrame
df = pd.DataFrame(img_array)

In [None]:
df, enc = ordinal_encode(df, ['Breed'])

In [None]:
# Load your feature vectors (extracted from images) and labels
X = df.drop(columns='Breed')
y= df['Breed']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the logistic regression model
model = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
model.fit(X_train, y_train)

# Evaluate the model
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy:.2f}")


In [None]:
import numpy as np
import pandas as pd
import torch
from torchvision import datasets, transforms
import torchvision
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load model
resnet50 = torch.load('models/resnet50.pt',map_location=device)

In [None]:
data_dir = 'output'

# Set up transformations for training and validation (test) data
# For training data we will do randomized cropping to get to 224 * 224, randomized horizontal flipping, and normalization
# For test set we will do only center cropping to get to 224 * 224 and normalization
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Create Datasets for training and validation sets
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'),
                                          data_transforms['train'])
val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'),
                                          data_transforms['val'])

# Create DataLoaders for training and validation sets
batch_size = 4
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                             shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size,
                                             shuffle=False, num_workers=2)

# Set up dict for dataloaders
dataloaders = {'train':train_loader,'val':val_loader}

# Store size of training and validation sets
dataset_sizes = {'train':len(train_dataset),'val':len(val_dataset)}
# Get class names associated with labels
class_names = train_dataset.classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
print(class_names)

In [None]:
# Display a batch of predictions
def visualize_results(model,dataloader,device):
    model = model.to(device) # Send model to GPU if available
    with torch.no_grad():
        model.eval()
        # Get a batch of validation images
        images, labels = next(iter(val_loader))
        images, labels = images.to(device), labels.to(device)
        # Get predictions
        _,preds = torch.max(model(images), 1)
        preds = np.squeeze(preds.cpu().numpy())
        images = images.cpu().numpy()

    # Plot the images in the batch, along with predicted and true labels
    fig = plt.figure(figsize=(15, 10))
    for idx in np.arange(len(preds)):
        ax = fig.add_subplot(2, len(preds)//2, idx+1, xticks=[], yticks=[])
        image = images[idx]
        image = image.transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = std * image + mean
        image = np.clip(image, 0, 1)
        ax.imshow(image)
        ax.set_title("{} ({})".format(class_names[preds[idx]], class_names[labels[idx]]),
                    color=("green" if preds[idx]==labels[idx] else "red"))
    return

visualize_results(resnet50,val_loader,device)