In [2]:
import numpy as np
import os, shutil
import pickle
import torch
import torchtext
import csv
from core_dl.train_params import TrainParameters
from trainbox import DPCNNTrainBox
from core_dl.batch_utils import *

# Parameters

In [3]:
test_params = TrainParameters()
test_params.DEV_IDS = [0]
test_params.LOADER_NUM_THREADS = 0
test_params.VALID_STEPS = 250
test_params.MAX_VALID_BATCHES_NUM = 50
test_params.VERBOSE_MODE = False
test_params.LOADER_VALID_BATCH_SIZE = 85

#  Get Model 

In [4]:
# Download model to source folder
# https://drive.google.com/file/d/1-ap9imLHGnnvum30ekCLLThTH4zNj58S/view?usp=sharing
checkpoint_dict = {'ckpt': r"iter_034376.pth.tar"}
log_dir = None

# Load Dataset (~2mins)

In [5]:
data_path = "data"
if not os.path.exists(data_path): 
    os.makedirs(data_path)

_, test_dataset = torchtext.datasets.DBpedia(ngrams=1, root=data_path)

vocab_len = len(test_dataset.get_vocab())
label_len = len(test_dataset.get_labels())
print("vocab len:", vocab_len, "label_len:", label_len)

sample_length = 50

data\dbpedia_csv.tar.gz: 68.3MB [00:05, 11.8MB/s]
560000lines [00:37, 14777.39lines/s]
560000lines [00:59, 9401.45lines/s]
70000lines [00:07, 9115.80lines/s]vocab len: 802999 label_len: 14



# Testing

In [6]:
# model hyper parameters
channels = 128
embed_dim = 128

test_box = DPCNNTrainBox(train_params=test_params,
                            vocab_size=vocab_len, 
                            label_size=label_len,
                            text_length=sample_length, 
                            batchsize=test_params.LOADER_BATCH_SIZE,
                            log_dir=log_dir,
                            ckpt_path_dict=checkpoint_dict,
                            channels=channels,
                            embed_dim=embed_dim
                          )


acc, p, g = test_box.test_loop(test_dataset, generate_batch)
print("Test Accuracy:", acc)

[42m ## ┏━━ FORGIVE HAT ━━┓##[0m
 #    ┏┓     ┏┓  
 #   ┏┛┻━━━━━┛┻┓ 
 #   ┃         ┃ 
 #   ┃    ━    ┃ 
 #   ┃ ┳┛   ┗┳ ┃ 
 #   ┃         ┃ 
 #   ┃    ┻    ┃ 
 #   ┃         ┃ 
 #   ┗━┓     ┏━┛ 
 #     ┃     ┃   
 #     ┃     ┃   
 #     ┃     ┗━━━┓  
 #     ┃         ┣┓ 
 #     ┃         ┏┛ 
 #     ┗┓┓┏━━┳┓┏━┛  
 #      ┃┫┫  ┃┫┫    
 #      ┗┻┛  ┗┻┛    

[42m ## This code is far away from bug with the animal protecting ##[0m
[Training Parameters Overview] ------------------------------------------------------------------------
dev_id :  [0]
max_epochs :  4
loader_batch_size :  6
loader_valid_batch_size :  85
loader_shuffle :  True
start_learning_rate :  0.0001
lr_decay_factor :  0.5
lr_decay_epoch_size :  1
loader_num_threads :  0
verbose :  False
valid_per_batches :  250
valid_max_batch_num :  50
checkpoint_per_iterations :  5000
visualize_per_iterations :  100
log_continue_step :  0
description :  
name_tag :  
log_continue_dir :  
[Optimizer Overview] --------------------------

# Analysis

## Experimental Setup

<ol>
    <li> We used a mini-batch RMSprop instead of SGD (as in the paper) since we obtained better results with RMSProp. </li>
    <li> The number of epochs used in our experiments were 4-10 in contrast with the 30 epochs in the paper, we observed that our model was able to converge sooner and also get a good score. </li>
    <li> We also used a learning rate decay factor of 0.1 after 8 epochs ((4/5)*number of epochs) as used in the paper.
    
    

## Analysis with Dbpedia

Comparing the test accuracy and the training and validation accuracy plots below we see they all roughly converge to 98%. Not only is this a great score but it shows that the model is not overfitting or underfitting the data. Furthermore, the continuously decreasing validation loss also backup the claims of not overfitting the data. The proper fit of the model can be attributed to the architecture of the model, the normalization components of the model (drop out layer), the general training parameters and setup, and the L2 regularization (weight decay) during training.

<img src="plots\dbpedia-acc-train.png" width="480"> 
<img src="plots\dbpedia-acc-valid.png" width="480">
<img src="plots\dbpedia-loss-valid.png" width="480">

## Examples of predictions:

1. *The   James   Charnley   Residenceis located in Chicago’s Gold Coast neighborhoodin  the  1300  block  of  North  Astor  Street.    Thehouse is now called the Charnley–Persky Houseand  is  operated  as  a  museum  and  organizationheadquarters  by  The  Society  of  ArchitecturalHistorians (SAH). An Adler   Sullivan design thetownhouse  is  the  work  of  Louis  Sullivan  and  ayoung  Frank  Lloyd  Wright  who  was  a  juniordraftsman in Sullivan’s office at the time.*

**Prediction**: 7 (Building)
\
**Reference**: 7 (Building)


2. *Film Magazine was a film weekly newsmagazine   published   in   Malayalam   Languagefrom Kerala India.  It was printed at Thiruvanan-thapuram and distributed throughout Kerala by Kalakaumudi publications private limited.  Even though  the  magazine  had  leniages  with  Kerala Kaumudi  news  paper,  it  was  an  independent company. It highlights the doings and happenings of the Mollywood film scene.*

**Prediction**: 13 (Film)
\
**Reference**: 14 (Written Work)

## Results of the model on other datasets:

|                     | DBpedia | AG   | Sogou | Yelp.p | Yelp.f | Yahoo | Ama.f | Ama.p |
|---------------------|---------|------|-------|--------|--------|-------|-------|-------|
| Training Accuracy   | 0.99    | 0.99 | 0.93  | 0.97   | 0.77   | 0.74  | 0.77  | 0.92  |
| Validation Accuracy | 0.99    | 0.90 | 0.95  | 0.97   | 0.57   | 00.70 | 0.52  | 0.92  |
| Test Accuracy       | 0.98    | 0.91 | 0.95  | 0.94   | 0.77   | 0.72  | 0.72  | 0.92  |