In [2]:
import os
try:
  import wget
except:
  !pip install wget
  import wget
import tarfile


out_dir = 'data/svhn'

train_32_32 = ('http://ufldl.stanford.edu/housenumbers/train_32x32.mat', 'train_32x32.mat')
test_32_32 = ('http://ufldl.stanford.edu/housenumbers/test_32x32.mat', 'test_32x32.mat')
extra_32_32 = ('http://ufldl.stanford.edu/housenumbers/extra_32x32.mat', 'extra_32x32.mat')

train_large = ('http://ufldl.stanford.edu/housenumbers/train.tar.gz', 'train.tar.gz')
test_large = ('http://ufldl.stanford.edu/housenumbers/test.tar.gz', 'test.tar.gz')
extra_large = ('http://ufldl.stanford.edu/housenumbers/extra.tar.gz', 'extra.tar.gz')

Collecting wget
  Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-cp36-none-any.whl size=9682 sha256=9c5ce1f99ba63d91e08bb5c50058c1edbbf7467535112679ec92b386f0a6d12c
  Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [3]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [4]:
def download_data(url, filename, out_dir=out_dir):
    filename = os.path.join(out_dir, filename)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    if not os.path.exists(filename):
        print(f"Downloading {filename}.")
        wget.download(url, filename)
        print()
    else:
        print(f"Skipping {filename} download (already exists)")


def extract_data(filename, out_dir=out_dir):
    filename = os.path.join(out_dir, filename)

    print(f"Extracting {filename}")
    with tarfile.open(filename) as tar:
        tar.extractall(out_dir)

download_data(*train_32_32)
download_data(*test_32_32)
download_data(*extra_32_32)

download_data(*train_large)
download_data(*test_large)
download_data(*extra_large)

extract_data(train_large[1])
extract_data(test_large[1])
extract_data(extra_large[1])


Downloading data/svhn/train_32x32.mat.

Downloading data/svhn/test_32x32.mat.

Downloading data/svhn/extra_32x32.mat.

Downloading data/svhn/train.tar.gz.

Downloading data/svhn/test.tar.gz.

Downloading data/svhn/extra.tar.gz.

Extracting data/svhn/train.tar.gz
Extracting data/svhn/test.tar.gz
Extracting data/svhn/extra.tar.gz


In [0]:
# -*- coding: utf-8 -*-
from tensorflow import keras
import numpy as np
from PIL import Image
from pathlib import Path
from scipy import io
import h5py
import json

def to_one_hot(a, n):
    result = np.zeros(shape=(a.shape[0], n))
    result[np.arange(len(a)), a] = 1
    return result

def load_multiple_digits_data(dir='data/svhn', train=True, extra=False):

    def parse_digit_struct(file):
        print('file - ' + str(file))
        if Path(f"{file}.cache.json").exists() and os.stat(f"{file}.cache.json").st_size != 0:
            print('exist')
            with open(f"{file}.cache.json", "r") as f:
                images = json.load(f)
                print(f'Loaded cached image attrs from {file}.cache.json')
                return images

        f = h5py.File(file, 'r')
        print(f'Opened file {file}')

        names = f['digitStruct']['name']
        bbox = f['digitStruct']['bbox']

        def extract_name(i):
            return ''.join([chr(c[0]) for c in f[names[i][0]].value])

        def extract_attr(i, attr):
            attr = f[bbox[i].item()][attr]
            if len(attr) > 1:
                return [f[attr.value[j].item()].value[0][0] for j in range(len(attr))]
            else:
                return [attr.value[0][0]]

        images = {}
        print(f'Extracting image attrs from {file}: ', end='')
        for i in range(len(names)):
            name = extract_name(i)
            images[name] = {
                "label": extract_attr(i, 'label'),
                "top": extract_attr(i, 'top'),
                "left": extract_attr(i, 'left'),
                "height": extract_attr(i, 'height'),
                "width": extract_attr(i, 'width')
            }
            if i % 1000 == 0:
                print('.', end='', flush=True)
        print()

        with open(f"{file}.cache.json", 'w+') as f:
            json.dump(images, f)
        return images

    def process_images(dir):
        cache_file = Path(dir) / 'cache.npz'
        if cache_file.exists():
            f = np.load(cache_file)
            print(f'Loaded cached arrays for {dir}')
            return [v for k, v in f.items()]

        attrs = parse_digit_struct(Path(dir) / 'digitStruct.mat')

        x, y = [], []
        print(f'Processing images from {dir}: ', end='', flush=True)
        for i, name in enumerate(os.listdir(dir)):
            if name not in attrs:
                print('s', end='', flush=True)
                continue

            img = Image.open(Path(dir) / name)

            height = int(max(attrs[name]['height']))
            width = int(max(attrs[name]['width']))
            left = max(int(min(attrs[name]['left'])) - 0.5 * width, 0)
            top = max(int(min(attrs[name]['top'])) - 0.5 * height, 0)
            right = min(int(max(attrs[name]['left'])) + 1.5 * width, img.size[0])
            bottom = min(int(max(attrs[name]['top'])) + 1.5 * height, img.size[1])

            img = img.crop(box=(left, top, right, bottom))
            img = img.resize((96, 96))

            label = [d % 10 for d in attrs[name]['label']]
            if len(label) > 6:
                print('e', end='', flush=True)
                continue

            label += [10] * (6 - len(label))
            label = to_one_hot(np.array(label, dtype=np.int), 11)

            x.append(np.array(img))
            y.append(np.array(label))

            if i % 1000 == 0:
                print('.', end='', flush=True)
        print()

        x = np.array(x, dtype=np.uint8)
        y = np.array(y, dtype=np.uint8)
        np.savez(Path(dir) / "cache.npz", x, y)
        return x, y

    x_test, y_test = process_images(Path(dir) / 'test/')

    x_train, y_train = None, None
    if train:
        x_train, y_train = process_images(Path(dir) / 'train/')

    x_extra, y_extra = None, None
    if extra:
        x_extra, y_extra = process_images(Path(dir) / 'extra/')

    return (
        x_train, y_train,
        x_test, y_test,
        x_extra, y_extra
    )

    x_test, y_test = process_images(Path(dir) / 'test/')

    x_train, y_train = None, None
    if train:
        x_train, y_train = process_images(Path(dir) / 'train/')

    x_extra, y_extra = None, None
    if extra:
        x_extra, y_extra = process_images(Path(dir) / 'extra/')

    return (
        x_train, y_train,
        x_test, y_test,
        x_extra, y_extra
    )




In [6]:
x_train, y_train, x_test, y_test, _, _ = load_multiple_digits_data()
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)

def to_y(a, n):
    return [a[:,i,:] for i in range(n)]

if y_train is not None and y_val is not None and y_test is not None:
            y_train = to_y(y_train, 6)
            y_val = to_y(y_val, 6)
            y_test = to_y(y_test, 6)


file - data/svhn/test/digitStruct.mat
Opened file data/svhn/test/digitStruct.mat
Extracting image attrs from data/svhn/test/digitStruct.mat: .



.............
Processing images from data/svhn/test: ...........s.s.s.
file - data/svhn/train/digitStruct.mat
Opened file data/svhn/train/digitStruct.mat
Extracting image attrs from data/svhn/train/digitStruct.mat: ..................................
Processing images from data/svhn/train: ...........................s...s..s..


In [7]:
y_train[1].shape

(30061, 11)

In [8]:
import tensorflow as tf

from tensorflow import keras

input = keras.layers.Input(shape=(96, 96, 3))

x = keras.layers.Conv2D(16, 5, activation='relu', padding='same')(input)
x = keras.layers.MaxPool2D(pool_size=(2, 2), padding = 'same')(x)
x = keras.layers.Conv2D(32, 5, activation='relu', padding='same')(x)
x = keras.layers.MaxPool2D(pool_size=(2, 2), padding='same')(x)
x = keras.layers.Conv2D(64, 5, activation='relu', padding='same')(x)
x = keras.layers.MaxPool2D(pool_size=(2, 2), padding='same')(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dropout(rate=0.1)(x)
x = keras.layers.Dense(100, activation='relu')(x)
x = keras.layers.Dropout(rate=0.1)(x)

out1 = keras.layers.Dense(11,  activation='linear')(x)
out2 = keras.layers.Dense(11,  activation='linear')(x)
out3 = keras.layers.Dense(11,  activation='linear')(x)
out4 = keras.layers.Dense(11,  activation='linear')(x)
out5 = keras.layers.Dense(11,  activation='linear')(x)
out6 = keras.layers.Dense(11,  activation='linear')(x)

model = keras.models.Model(
   inputs=[input],
   outputs=[out1,out2,out3,out4,out5,out6]
)
model.compile(
   optimizer=keras.optimizers.Adam(lr=0.001),
   loss='categorical_crossentropy',
   metrics=['categorical_accuracy']#,
   #loss_weights=[1, 1, 0.5, 0.3, 0.1, 0.05]
)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 96, 96, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 96, 96, 16)   1216        input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 48, 48, 16)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 48, 48, 32)   12832       max_pooling2d[0][0]              
______________________________________________________________________________________________

In [9]:
model.fit(
                x_train,
                y_train,
                epochs=100,
                batch_size=32,
                verbose=2,
                validation_split=0.1,
                callbacks=[
                    keras.callbacks.EarlyStopping(
                        patience=10,
                        restore_best_weights=True
                    )
                ]
            )

Epoch 1/100
846/846 - 9s - loss: 63.4353 - dense_1_loss: 8.0610 - dense_2_loss: 8.1101 - dense_3_loss: 8.2673 - dense_4_loss: 7.9674 - dense_5_loss: 15.6339 - dense_6_loss: 15.3956 - dense_1_categorical_accuracy: 0.1286 - dense_2_categorical_accuracy: 0.0956 - dense_3_categorical_accuracy: 0.5422 - dense_4_categorical_accuracy: 0.0051 - dense_5_categorical_accuracy: 0.8513 - dense_6_categorical_accuracy: 0.1139 - val_loss: 61.1908 - val_dense_1_loss: 7.9438 - val_dense_2_loss: 6.7803 - val_dense_3_loss: 13.6837 - val_dense_4_loss: 0.5469 - val_dense_5_loss: 16.1181 - val_dense_6_loss: 16.1181 - val_dense_1_categorical_accuracy: 0.1107 - val_dense_2_categorical_accuracy: 0.0971 - val_dense_3_categorical_accuracy: 0.6864 - val_dense_4_categorical_accuracy: 0.0063 - val_dense_5_categorical_accuracy: 1.0000 - val_dense_6_categorical_accuracy: 0.0000e+00
Epoch 2/100
846/846 - 9s - loss: 64.1255 - dense_1_loss: 8.8686 - dense_2_loss: 8.1161 - dense_3_loss: 9.2715 - dense_4_loss: 6.1974 - den

<tensorflow.python.keras.callbacks.History at 0x7f64d0f5f2b0>

In [10]:
y_pred = model.predict(x_test)

total = np.array([True] * len(x_test))
for i, (y1, y2) in enumerate(zip(y_test, y_pred)):
  cur = np.argmax(y1, axis=1) == np.argmax(y2, axis=1)
  total = np.logical_and(total, cur)

  acc = np.mean(cur.astype(np.int))
  #history[f'test_out_{i}_acc'] = acc
  print(f'Accuracy of out_{i} = {acc:.5f}')

acc = np.mean(total.astype(np.int))
#history['test_acc'] = acc
print(f'Accuracy = {acc:.5f}')

Accuracy of out_0 = 0.00145
Accuracy of out_1 = 0.19001
Accuracy of out_2 = 0.82943
Accuracy of out_3 = 0.00222
Accuracy of out_4 = 0.99985
Accuracy of out_5 = 1.00000
Accuracy = 0.00000


In [11]:
!ls data/svhn/

extra		 extra.tar.gz  test_32x32.mat  train		train.tar.gz
extra_32x32.mat  test	       test.tar.gz     train_32x32.mat


In [12]:
_, _, x_test, y_test, x_train, y_train = load_multiple_digits_data(extra=True, train=False)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)
if y_train is not None and y_val is not None and y_test is not None:
            y_train = to_y(y_train, 6)
            y_val = to_y(y_val, 6)
            y_test = to_y(y_test, 6)

Loaded cached arrays for data/svhn/test
file - data/svhn/extra/digitStruct.mat
Opened file data/svhn/extra/digitStruct.mat
Extracting image attrs from data/svhn/extra/digitStruct.mat: .



..........................................................................................................................................................................................................
Processing images from data/svhn/extra: .................................................................................................................................................................s..................s...............s.........


In [13]:
model.fit(
                x_train,
                y_train,
                epochs=100,
                batch_size=32,
                verbose=2,
                validation_split=0.1,
                callbacks=[
                    keras.callbacks.EarlyStopping(
                        patience=10,
                        restore_best_weights=True
                    )
                ]
            )

Epoch 1/100
5123/5123 - 51s - loss: 60.4374 - dense_1_loss: 8.1377 - dense_2_loss: 8.4863 - dense_3_loss: 7.6266 - dense_4_loss: 4.4006 - dense_5_loss: 15.8552 - dense_6_loss: 15.9293 - dense_1_categorical_accuracy: 0.0785 - dense_2_categorical_accuracy: 0.1058 - dense_3_categorical_accuracy: 0.0663 - dense_4_categorical_accuracy: 0.0066 - dense_5_categorical_accuracy: 0.4659 - dense_6_categorical_accuracy: 0.9619 - val_loss: 58.9354 - val_dense_1_loss: 9.3872 - val_dense_2_loss: 9.2767 - val_dense_3_loss: 6.7380 - val_dense_4_loss: 1.3034 - val_dense_5_loss: 16.1120 - val_dense_6_loss: 16.1181 - val_dense_1_categorical_accuracy: 0.0639 - val_dense_2_categorical_accuracy: 0.1020 - val_dense_3_categorical_accuracy: 0.0512 - val_dense_4_categorical_accuracy: 0.0057 - val_dense_5_categorical_accuracy: 0.0000e+00 - val_dense_6_categorical_accuracy: 1.0000
Epoch 2/100
5123/5123 - 51s - loss: 59.4880 - dense_1_loss: 8.4343 - dense_2_loss: 8.6262 - dense_3_loss: 8.2436 - dense_4_loss: 2.3401 

<tensorflow.python.keras.callbacks.History at 0x7f64d21dbcc0>

In [14]:
y_pred = model.predict(x_test)

total = np.array([True] * len(x_test))
for i, (y1, y2) in enumerate(zip(y_test, y_pred)):
  cur = np.argmax(y1, axis=1) == np.argmax(y2, axis=1)
  total = np.logical_and(total, cur)

  acc = np.mean(cur.astype(np.int))
  #history[f'test_out_{i}_acc'] = acc
  print(f'Accuracy of out_{i} = {acc:.5f}')

acc = np.mean(total.astype(np.int))
#history['test_acc'] = acc
print(f'Accuracy = {acc:.5f}')

Accuracy of out_0 = 0.20332
Accuracy of out_1 = 0.07859
Accuracy of out_2 = 0.01852
Accuracy of out_3 = 0.00122
Accuracy of out_4 = 0.00000
Accuracy of out_5 = 0.00000
Accuracy = 0.00000


In [15]:
input = keras.layers.Input(shape=(96, 96, 3))

mobile_net = keras.applications.mobilenet_v2.MobileNetV2(
  include_top=False,
  weights='imagenet',
  input_shape=(96, 96, 3),
  input_tensor=input,
  pooling='avg'
)

dropout = keras.layers.Dropout(rate=0.1)(mobile_net.output)

outputs = [
  keras.layers.Dense(11, activation='softmax', name=f'out_{i}')(dropout)
  for i in range(6)
]

model1 = keras.models.Model(
  inputs=[input],
  outputs=outputs
)
model1.compile(
  optimizer=keras.optimizers.Adam(lr=0.001),
  loss='categorical_crossentropy',
  metrics=['categorical_accuracy'],
  loss_weights=[1, 1, 0.5, 0.3, 0.1, 0.05]
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5


In [16]:
model.fit(
                x_train,
                y_train,
                epochs=100,
                batch_size=32,
                verbose=2,
                validation_split=0.1,
                callbacks=[
                    keras.callbacks.EarlyStopping(
                        patience=10,
                        restore_best_weights=True
                    )
                ]
            )

Epoch 1/100
5123/5123 - 51s - loss: 46.1482 - dense_1_loss: 0.0013 - dense_2_loss: 8.5820 - dense_3_loss: 5.8069 - dense_4_loss: 0.9042 - dense_5_loss: 14.7355 - dense_6_loss: 16.1174 - dense_1_categorical_accuracy: 0.0027 - dense_2_categorical_accuracy: 0.0993 - dense_3_categorical_accuracy: 0.4006 - dense_4_categorical_accuracy: 0.0067 - dense_5_categorical_accuracy: 1.8303e-05 - dense_6_categorical_accuracy: 0.9998 - val_loss: 47.2816 - val_dense_1_loss: 1.1921e-07 - val_dense_2_loss: 8.4885 - val_dense_3_loss: 5.7571 - val_dense_4_loss: 0.8050 - val_dense_5_loss: 16.1128 - val_dense_6_loss: 16.1181 - val_dense_1_categorical_accuracy: 0.0025 - val_dense_2_categorical_accuracy: 0.0983 - val_dense_3_categorical_accuracy: 0.3997 - val_dense_4_categorical_accuracy: 0.0065 - val_dense_5_categorical_accuracy: 0.0000e+00 - val_dense_6_categorical_accuracy: 1.0000
Epoch 2/100
5123/5123 - 51s - loss: 47.4463 - dense_1_loss: 1.1921e-07 - dense_2_loss: 8.5879 - dense_3_loss: 5.7935 - dense_4_l

<tensorflow.python.keras.callbacks.History at 0x7f647595e0b8>

In [17]:
y_pred = model1.predict(x_test)

total = np.array([True] * len(x_test))
for i, (y1, y2) in enumerate(zip(y_test, y_pred)):
  cur = np.argmax(y1, axis=1) == np.argmax(y2, axis=1)
  total = np.logical_and(total, cur)

  acc = np.mean(cur.astype(np.int))
  #history[f'test_out_{i}_acc'] = acc
  print(f'Accuracy of out_{i} = {acc:.5f}')

acc = np.mean(total.astype(np.int))
#history['test_acc'] = acc
print(f'Accuracy = {acc:.5f}')

Accuracy of out_0 = 0.03972
Accuracy of out_1 = 0.15810
Accuracy of out_2 = 0.04966
Accuracy of out_3 = 0.00964
Accuracy of out_4 = 0.30081
Accuracy of out_5 = 0.00031
Accuracy = 0.00000


In [0]:
model.save_weights('models/svhn_multiple_mobile_net_extra/model')

In [19]:
!ls models/svhn_multiple_mobile_net_extra/

checkpoint  model.data-00000-of-00002  model.data-00001-of-00002  model.index
