# Shopee Product Classification 

Notebook to experiment over several Neural Networks over the product dataset obtained from Shopee and evaluate results.

The following models are evaluated as part of this notebook:

Baseline 1: CNN <br>
Baseline 2: CNN with augmented layers <br>
Improvement 1: Adding ANN <br>
Improvement 2: Adding RNN <br>

## Imports and Config

In [1]:
!pip install scikit-image
!pip install shopee_crawler
!pip install torchvision
!pip install opencv-python

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting shopee_crawler
  Downloading shopee_crawler-0.2.2.tar.gz (6.5 kB)
Building wheels for collected packages: shopee-crawler
  Building wheel for shopee-crawler (setup.py): started
  Building wheel for shopee-crawler (setup.py): finished with status 'done'
  Created wheel for shopee-crawler: filename=shopee_crawler-0.2.2-py3-none-any.whl size=8850 sha256=94c1f97140bd5bf0c2137c9d3b5a4b1f233ff48b0ce6ea4e12fe26f559902cb7
  Stored in directory: c:\users\admin\appdata\local\pip\cache\wheels\01\5a\8b\78e5127b61e918331821ce78d30173ea113f1c49409d58992d
Successfully built shopee-crawler
Installing collected packages: shopee-crawler
Successfully installed shopee-crawler-0.2.2
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collec

In [1]:
import os
import sys
import json
import importlib
from tqdm import tqdm

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from utils import file_utils, dataset
from scripts import crawler

import ipywidgets as widgets

In [2]:
crawler

<module 'scripts.crawler' from 'C:\\Users\\admin\\Documents\\GitHub\\cs5242-project\\scripts\\crawler.py'>

In [None]:
# Can use this to reload file when editing for faster changes
importlib.reload(dataset)

## Data Load

In [None]:
image_dir = 'data/images'

In [None]:
if(file_utils.check_images_dir()):
    print(f'Images already exist at: {file_utils.images}')
elif(file_utils.check_tar_exists()):
    print(f'Images not yet extracted')
    print(f'Image zip exists at: {file_utils.images_zip}')
    file_utils.extract_tar()
else:
    print(f'Images need to be downloaded')
    for c in category_urls:
        get_category_data('data', c)
    download_images('data')

## Note: To be in accordance with other folders, changes to make:
- Add "-cat" to Women's Apparel directory after unzipping (Maybe we handle when we're zipping itself? Otherwise we can add a condition for this in the dataloader too)

## Dataset

In [None]:
data = dataset.DataSet()

In [None]:
cats, items = zip(*data.image_count_per_category().items())
fig, ax = plt.subplots(figsize=(5, 8))
ax.barh(cats, items)

In [None]:
data.load_all()

In [None]:
category = data.categories[0]
cat_files = [f for f in self.data_files if category in f]
img = read_image(cat_files[0])

In [22]:
import cv2
from torchvision.io import read_image

f = data.data_files[0:3]
f
img = read_image(f[0])
img

tensor([[[255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         ...,
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255]],

        [[255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         ...,
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255]],

        [[255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         ...,
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255]]], dtype=torch.uint8)

In [11]:
data.data_files

['data/images\\Automotive-cat\\10001357946.png',
 'data/images\\Automotive-cat\\10013443456.png',
 'data/images\\Automotive-cat\\10018208160.png',
 'data/images\\Automotive-cat\\10027861295.png',
 'data/images\\Automotive-cat\\1009237014.png',
 'data/images\\Automotive-cat\\10103393899.png',
 'data/images\\Automotive-cat\\10103699491.png',
 'data/images\\Automotive-cat\\10116684466.png',
 'data/images\\Automotive-cat\\10125579876.png',
 'data/images\\Automotive-cat\\10127844231.png',
 'data/images\\Automotive-cat\\10139648036.png',
 'data/images\\Automotive-cat\\10150954829.png',
 'data/images\\Automotive-cat\\10183106552.png',
 'data/images\\Automotive-cat\\10203844101.png',
 'data/images\\Automotive-cat\\10207189556.png',
 'data/images\\Automotive-cat\\10210893169.png',
 'data/images\\Automotive-cat\\10212948302.png',
 'data/images\\Automotive-cat\\1022030487.png',
 'data/images\\Automotive-cat\\1022195866.png',
 'data/images\\Automotive-cat\\10223387960.png',
 'data/images\\Automoti

In [10]:
all_categories = data.categories
widgets.interact_manual.opts['manual_name'] = 'Show samples'
im = widgets.interact_manual(lambda category: data.plot_samples(category), category=all_categories)

interactive(children=(Dropdown(description='category', options=('Automotive', 'Beauty-Personal-Care', 'Cameras…

In [18]:
#TODO: Check if need to normalize data or convert shapes
all_categories

['Automotive',
 'Beauty-Personal-Care',
 'Cameras-Drones',
 'Computers-Peripherals',
 'Dining-Travel-Services',
 'Food-Beverages',
 'Health-Wellness',
 'Hobbies-Books',
 'Home-Appliances',
 'Home-Living',
 'Jewellery-Accessories',
 'Kids-Fashion',
 "Men's-Bags",
 "Men's-Shoes",
 "Men's-Wear",
 'Miscellaneous',
 'Mobile-Gadgets',
 'Pet-Food-Supplies',
 'ShopeePay-Near-Me',
 'Sports-Outdoors',
 'Toys-Kids-Babies',
 'Travel-Luggage',
 'Video-Games',
 'Watches',
 "Women's-Apparel",
 "Women's-Bags",
 "Women's-Shoes"]

In [20]:
cat_map = dict(zip(all_categories, range(0, len(all_categories))))
cat_map

{'Automotive': 0,
 'Beauty-Personal-Care': 1,
 'Cameras-Drones': 2,
 'Computers-Peripherals': 3,
 'Dining-Travel-Services': 4,
 'Food-Beverages': 5,
 'Health-Wellness': 6,
 'Hobbies-Books': 7,
 'Home-Appliances': 8,
 'Home-Living': 9,
 'Jewellery-Accessories': 10,
 'Kids-Fashion': 11,
 "Men's-Bags": 12,
 "Men's-Shoes": 13,
 "Men's-Wear": 14,
 'Miscellaneous': 15,
 'Mobile-Gadgets': 16,
 'Pet-Food-Supplies': 17,
 'ShopeePay-Near-Me': 18,
 'Sports-Outdoors': 19,
 'Toys-Kids-Babies': 20,
 'Travel-Luggage': 21,
 'Video-Games': 22,
 'Watches': 23,
 "Women's-Apparel": 24,
 "Women's-Bags": 25,
 "Women's-Shoes": 26}