## Task3

In [1]:
from data import HASOCData
from model import HindiLSTMAttentionClassifier
from train import train_model 
from eval import evaluate_test_set
import torch
import pickle
from config import config_dict
from torch import nn




In [2]:
def main():
    data = HASOCData(config_dict['file_paths'])
    with open(config_dict['file_paths']['embeddings_path'], 'rb') as f:
        embedding_weights = pickle.load(f)

    ## check whether the pre-trained embeddings are the same shape as of train vocabulary
    assert embedding_weights.T.shape == (len(data.vocab), config_dict['embedding_size']), "Pre-trained embeddings size not equal to size of embedding layer"

    ## create model instance  with configurations coming from config file
    model = HindiLSTMAttentionClassifier(batch_size=config_dict['batch_size'], output_size=config_dict['num_classes'], 
                                vocab_size=len(data.vocab), hidden_size=config_dict['hidden_size'], 
                                embedding_size=config_dict['embedding_size'], weights=torch.FloatTensor(embedding_weights.T),
                                lstm_layers=config_dict['lstm_layers'], device=config_dict['device'], dropout=config_dict['dropout'],
                                bidirectional=config_dict['is_bi_lstm'], self_attention_config=config_dict['self_attention_config'], 
                                fc_hidden_size=config_dict['fc_hidden_size']).to(config_dict['device'])

    

    ## get dataloaders for train and test set
    hasoc_dataloader = data.get_data_loader(batch_size=config_dict['batch_size'])

    ## filtering out embedding weights since they won't be optimized
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))

    ## try loading model if it exists as pre-trained on disk
    try:
        model.load_state_dict(torch.load('{}.pth'.format(config_dict['model_name']), map_location=torch.device(config_dict['device'])))
        print('model loaded...')
    except Exception as e:
        print('no prior model')
    
    ## training the model on train set
    #train_model(model, optimizer, hasoc_dataloader, data, max_epochs=config_dict['epochs'],config_dict=config_dict)

    ## evaluate model on test set
    evaluate_test_set(model, data, hasoc_dataloader, device=config_dict['device'])



In [3]:
main()

INFO:root:loading and preprocessing data...
INFO:root:reading and preprocessing data completed...
INFO:root:creating vocabulary...
INFO:root:creating vocabulary completed...
INFO:root:Evaluating accuracy on test set


model loaded...
Test acc: 0.7542918454935622



                 precision    recall  f1-score   support

non hate speech       0.74      0.74      0.74       435
    hate speech       0.77      0.77      0.77       497

       accuracy                           0.75       932
      macro avg       0.75      0.75      0.75       932
   weighted avg       0.75      0.75      0.75       932

