# Introduction

This notebooks presents **ConvNet** in PyTorch used to solve **Street View House Numbers** task.

This is replication of _Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks_

**Contents**
* [Imports](#Imports)
* [Dataset](#Dataset)
* [Model](#Model)

# Config

Download and extract [SVHN](http://ufldl.stanford.edu/housenumbers/) dataset in **Format 1** (train.tar.gz, test.tar.gz, extra.tar.gz)

In [1]:
dataset_location = '/home/marcin/Datasets/SVHN'  # .../train/1.png

# Imports

In [2]:
import os
import pickle
import pathlib

import numpy as np

import h5py  # required to open .mat files in SVHN dataset

In [3]:
import torch
import torch.nn as nn

# Config

In [4]:
dataset_location = '/home/marcin/Datasets/SVHN'

# Dataset

In [5]:
dataset_path = pathlib.Path(dataset_location)
assert os.path.isfile(dataset_path / 'extra/1.png')
assert os.path.isfile(dataset_path / 'train/1.png')
assert os.path.isfile(dataset_path / 'test/1.png')

In [21]:
def read_name(f, index):
    ref = f['/digitStruct/name'][index][0]
    return ''.join(chr(v[0]) for v in f[ref])

In [22]:
def read_digits_raw(f, index):
    
    ref = f['/digitStruct/bbox'][index].item()
    
    ddd = {}
    for key in ['label', 'left', 'top', 'width', 'height']:
        dset = f[ref][key]
        if len(dset) == 1:
            ddd[key] = [ int(dset[0][0]) ]
        else:
            ddd[key] = []
            for i in range(len(dset)):
                ref2 = dset[i][0]
                ddd[key].append( int(f[ref2][0][0]) )
    return ddd

In [23]:
def get_label(ddict):
    label = ddict['label'].copy()
    label = label + [0]*(5-len(label))  # image '210' -> [2, 1, 10, 0, 0]
    left = min(ddict['left'])
    top = min(ddict['top'])
    right = max(l+w for l, w in zip(ddict['left'], ddict['width']))
    bottom = max(t+h for t, h in zip(ddict['top'], ddict['height']))
    return tuple(label), (left, top, right, bottom)

In [None]:
def read_mat_file(filepath):

    meta = {'names':[], 'labels':[], 'bboxes':[]}
    
    with h5py.File(filepath) as f:
        length = len(f['/digitStruct/name'])
        for i in range(length):
            name = read_name(f, i)
            ddict = read_digits_raw(f, i)
            label, bbox = get_label(ddict)
            meta['names'].append(name)
            meta['labels'].append(label)
            meta['bboxes'].append(bbox)
            
            if i % 1000 == 0 or i == length-1:
                print(f'{i:6d} / {length}')
    
    return meta

In [None]:
def open_or_generate(name):
    assert name in ('extra', 'test', 'train')
    
    fname = name+'.pkl'

    if os.path.exists(dataset_path / fname):
        with open(dataset_path / fname, 'rb') as f:
            meta = pickle.load(f)
            print(f'Loaded:{fname}')
    else:
        print(f'Generating {fname}:')
        meta = read_mat_file(dataset_path / name / 'digitStruct.mat')
        with open(dataset_path / fname, 'wb') as f:
            pickle.dump(meta, f)
    
    return meta

In [None]:
train_meta = open_or_generate('train')

Generating train.pkl:
     0 / 33402
  1000 / 33402
  2000 / 33402
  3000 / 33402
  4000 / 33402
  5000 / 33402
  6000 / 33402
  7000 / 33402
  8000 / 33402
  9000 / 33402
 10000 / 33402
 11000 / 33402
 12000 / 33402
 13000 / 33402
 14000 / 33402


In [None]:
extra_meta = open_or_generate('extra')

In [None]:
test_meta = open_or_generate('test')