In [1]:
import os 
import torch 
import torch.nn 
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F 
import torchvision.utils as utils
import cv2 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
from PIL import Image
import argparse

In [2]:
main_df = pd.read_csv("archive/instruments.csv")

In [3]:
df1 = main_df[main_df['data set'] == 'test']
test_df = df1[df1['labels'] != 'piano']
df2 = main_df[main_df['data set'] == 'train']
train_df = df2[df2['labels'] != 'piano']

In [4]:
test_labels = test_df['labels']
train_labels = train_df['labels']

In [5]:
train_df.head()

Unnamed: 0,filepaths,labels,data set
0,train/acordian/001.jpg,acordian,train
1,train/acordian/002.jpg,acordian,train
2,train/acordian/003.jpg,acordian,train
3,train/acordian/004.jpg,acordian,train
4,train/acordian/005.jpg,acordian,train


In [6]:
test_df.head()

Unnamed: 0,filepaths,labels,data set
4793,test/acordian/1.jpg,acordian,test
4794,test/acordian/2.jpg,acordian,test
4795,test/acordian/3.jpg,acordian,test
4796,test/acordian/4.jpg,acordian,test
4797,test/acordian/5.jpg,acordian,test


In [7]:
transform = transforms.Compose([        # Defining a variable transforms
transforms.Resize(256),                # Resize the image to 256×256 pixels
transforms.CenterCrop(224),            # Crop the image to 224×224 pixels about the center
transforms.ToTensor(),                 # Convert the image to PyTorch Tensor data type
transforms.Normalize(                  # Normalize the image
mean=[0.485, 0.456, 0.406],            # Mean and std of image as also used when training the network
std=[0.229, 0.224, 0.225]      
)])

In [8]:
train_imgs = []
test_imgs = []

In [9]:
for filepath in train_df['filepaths']:
    
    filepath = "archive/" + str(filepath)
    img = Image.open(filepath)
    transformed_img = transform(img)
    batch_img = torch.unsqueeze(transformed_img, 0)
    train_imgs.append(batch_img)
    

In [10]:
for filepath in test_df['filepaths']:
    filepath = "archive/" + str(filepath)
    img = Image.open(filepath)
    transformed_img = transform(img)
    batch_img = torch.unsqueeze(transformed_img, 0)
    test_imgs.append(batch_img)
    

In [11]:
print(train_imgs[0].shape)
print(test_imgs[0].shape)

torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])


In [12]:
alexnet = models.alexnet(pretrained = True)
alexnet.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [13]:
train_features = []
test_features = []

In [14]:
for img in train_imgs:
    with torch.no_grad():
        feature = alexnet(img).detach().numpy()
    train_features.append(feature)

In [15]:
for img in test_imgs:
    with torch.no_grad():
        feature = alexnet(img).detach().numpy()
    test_features.append(feature)

In [16]:
train_features[0].shape

(1, 1000)

In [17]:
print(len(train_features))
print(len(train_labels))

4674
4674


In [18]:
print(len(test_features))
print(len(test_labels))

145
145


In [19]:
for i in range(len(train_features)):
    train_features[i] = train_features[i].T.reshape((1000,))

In [20]:
train_features1 = np.array(train_features)
train_features1.shape

(4674, 1000)

In [21]:
for i in range(len(test_features)):
    test_features[i] = test_features[i].T.reshape((1000,))

## Model Prediction

In [22]:
from sklearn import svm
model = svm.SVC(C=0.0005,kernel='linear', class_weight='balanced', gamma='scale')
model.fit(train_features,train_labels)

SVC(C=0.0005, class_weight='balanced', kernel='linear')

In [23]:
acc = model.score(test_features,test_labels)
print(acc)

0.9862068965517241


In [24]:
from sklearn.linear_model import LogisticRegression
model_lr = LogisticRegression(max_iter=1000)
model_lr.fit(train_features,train_labels)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(max_iter=1000)

In [26]:
acc1 = model_lr.score(test_features,test_labels)
print(acc1)

0.9793103448275862
