In [None]:
import numpy as np 
import pandas as pd
from fastai.vision.all import *

## Load and Examine the Data

In [None]:
datadir = Path.cwd().parent / 'input' / 'street-view-getting-started-with-julia'
datadir.ls()

In [None]:
output_dir = Path.cwd().parent/'output'/'working'
output_dir

In [None]:
file_extract(datadir/"train.zip", dest=output_dir)
train_image_dir = output_dir/'train'

In [None]:
train_image_dir.ls()

In [None]:
train_labels = pd.read_csv(datadir/"trainLabels.csv")

In [None]:
train_labels.head()

In [None]:
im = Image.open([f for f in train_image_dir.iterdir()][0])
im.to_thumb(128,128)

In [None]:
fns = get_image_files(train_image_dir)
fns

In [None]:
failed = verify_images(fns)
failed

## Build Data Block and Train Model

In [None]:
def label_func(fname):
    the_id = int(re.findall("[0-9]+",fname.name)[0])
    label = train_labels.loc[train_labels['ID'] == the_id, "Class"].values.item()
    return label

In [None]:
fns[0]

In [None]:
label_func(fns[0])

In [None]:
db = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=label_func,
    item_tfms=Resize(128, method='squish'),
    batch_tfms=aug_transforms(do_flip=False))

In [None]:
dls = db.dataloaders(train_image_dir)

In [None]:
dls.show_batch(nrows=1, ncols=3)

In [None]:
learn = cnn_learner(dls, resnet34, metrics=error_rate)

In [None]:
lr_min,lr_steep = learn.lr_find()
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [None]:
learn.fine_tune(4, base_lr=1.32e-02)

## Interpret the Model

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.most_confused(min_val=5)

## Make Prediction

In [None]:
file_extract(datadir/"test.zip", dest=output_dir)
test_image_dir = output_dir/'test'

In [None]:
tst_files = get_image_files(test_image_dir)
tst_files

In [None]:
tst_dl = dls.test_dl(tst_files)

In [None]:
def make_prediction(learn):
    result = learn.get_preds(dl = tst_dl)
    the_pred_num = np.argmax(result[0], axis=1)
    the_pred_label = [learn.dls.vocab[i] for i in the_pred_num]
    return the_pred_label

In [None]:
prediction = make_prediction(learn)
pred_ids = [int(re.findall("[0-9]+", i.name)[0]) for i in tst_files] 

In [None]:
df_pred = pd.DataFrame({"ID":pred_ids, "Class":prediction}).sort_values("ID", ignore_index=True)

In [None]:
df_pred.head()

In [None]:
df_pred.to_csv("submission.csv",index=False)