In [None]:
import os
import json
from PIL import Image
import cv2
from skimage import feature
from skimage.io import imshow, imread

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn

from tqdm import tqdm


In [None]:
imagedata = []
for dirname, _, filenames in os.walk('/kaggle/input/amazonimage'):
    for filename in filenames:
        imagedata.append(os.path.join(dirname, filename))
    
f = open('/kaggle/input/updated-quantities/quantities.json')
quant_dict = json.load(f)

In [None]:
to_int = lambda x: int(x)
sorted_ind = map(to_int, list(quant_dict.keys()))
indices = list(sorted_ind)
indices.sort()

In [None]:
import pickle
import bz2

images = []
labels = []
count_quantities = {1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0, 11:0, 12:0}
for i, img in tqdm(enumerate(imagedata)):
    if i%30==0:
        num = int(img.split("/")[-1].split('.')[0])
        try:
            label = quant_dict[str(num)]
            if label <= 12:
                count_quantities[label] += 1 
                img1 = Image.open(img).convert("RGB")
                img1 = img1.resize((100,100),Image.BILINEAR)
                img1.save("Image1.jpg")
                image = imread("Image1.jpg",as_gray = True)
                
                images.append(image)
                labels.append(label)
        except:
            continue

In [None]:
print(count_quantities)

In [None]:
newimages = []
for img in images:
    newimg = img.flatten()
    newimages.append(newimg)

## Model Training

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
# Logistic Regression Classifier

im_train, im_test, lab_train, lab_test = train_test_split(newimages, labels)

print("Split. Done")

clf = LogisticRegression(random_state=0, multi_class='ovr').fit(im_train, lab_train)

print("Fit. Done.")

predictions = clf.predict(im_test)

print('Predict. Done')

In [None]:
def loosen_error(predictions, real_values, thresh = 0):
    """A loosen error definition. If the model predicts a near value (the difference of the prediction and real label
    is at most 1), then a credit is given to the model. """
    correct = 0
    somewhat_correct = 0
    total = 0
    for i in range(len(real_values)):
        total += 1
        if real_values[i] >= 2 or predictions[i] >= 2: 
            if real_values[i] - predictions[i] == 0:
                correct += 1
                somewhat_correct += 1
            if abs(real_values[i] - predictions[i]) <= 1:
                if thresh == 0:
                    somewhat_correct += 1 - 1/real_values[i]
                else:
                    somewhat_correct += thresh
    return somewhat_correct/total, correct/total

In [None]:
sw_corr, corr = loosen_error(predictions, lab_test, 1/2)
print("The accuracy of the model is", corr)
print("The loosen accuracy of the model is", sw_corr)

In [None]:
sns.set(rc = {'figure.figsize':(15,8)})
conf_mat = confusion_matrix(lab_test, predictions)
ax = sns.heatmap(conf_mat, annot=True, cmap='Blues')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values')

In [None]:
# GaussianDB Classifier.

im_train, im_test, lab_train, lab_test = train_test_split(newimages, labels)

print("Split. Done")

gnb = GaussianNB().fit(im_train, lab_train)

print("Fit. Done.")

predictions = gnb.predict(im_test)

print('Predict. Done')

In [None]:
sw_corr, corr = loosen_error(predictions, lab_test, 1/2)
print("The accuracy of the model is", corr)
print("The loosen accuracy of the model is", sw_corr)

In [None]:
sns.set(rc = {'figure.figsize':(15,8)})
conf_mat = confusion_matrix(lab_test, predictions)
ax = sns.heatmap(conf_mat, annot=True, cmap='Blues')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values')

## - - - - - - - - - - - - - - - - - - - - - - - - -

In [None]:
new_images_sigma = []
new_images_sigma_two = []
for img in images:
    edges1 = feature.canny(img)
    edges2 = feature.canny(img, sigma=2)

    new_images_sigma.append(edges1)
    new_images_sigma_two.append(edges2)

In [None]:
images_sigma = []
for img in new_images_sigma:
    newimg = img.flatten()
    images_sigma.append(newimg)
    
images_sigma_two = []
for img in new_images_sigma_two:
    newimg = img.flatten()
    images_sigma_two.append(newimg)

In [None]:
im_train, im_test, lab_train, lab_test = train_test_split(images_sigma, labels)
gnb = GaussianNB().fit(im_train, lab_train)

predictions = gnb.predict(im_test)

sw_corr, corr = loosen_error(predictions, lab_test, 1/2)
print("The accuracy of the model is", corr)
print("The loosen accuracy of the model is", sw_corr)

sns.set(rc = {'figure.figsize':(15,8)})
conf_mat = confusion_matrix(lab_test, predictions)
ax = sns.heatmap(conf_mat, annot=True, cmap='Blues')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values')

In [None]:
im_train, im_test, lab_train, lab_test = train_test_split(images_sigma_two, labels)
clf = LogisticRegression(random_state=0, multi_class='ovr').fit(im_train, lab_train)

predictions = clf.predict(im_test)

sw_corr, corr = loosen_error(predictions, lab_test, 1/2)
print("The accuracy of the model is", corr)
print("The loosen accuracy of the model is", sw_corr)

sns.set(rc = {'figure.figsize':(15,8)})
conf_mat = confusion_matrix(lab_test, predictions)
ax = sns.heatmap(conf_mat, annot=True, cmap='Blues')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values')

## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

In [None]:
edges1 = feature.canny(img2d)
edges2 = feature.canny(img2d, sigma=1.5)
imshow(edges1)

In [None]:
imshow(edges2)