## Classification using Perceptron

In [1]:
%load_ext nb_black

import collections
import numpy as np
import pandas as pd
import re
from argparse import Namespace
import os

import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F

import utils
from dataset import TweetDataset
from classifiers import TweetPerceptronClassifier

args = Namespace(
    frequency_cutoff=25,
    model_state_file="model.pth",
    tweets_csv="data/train_with_splits.csv",
    save_dir="models/perceptron",
    vectorizer_file="vectorizer.json",
    batch_size=128,
    early_stopping_criteria=5,
    learning_rate=0.001,
    num_epochs=100,
    seed=1337,
    catch_keyboard_interrupt=True,
    cuda=True,
    expand_filepaths_to_save_dir=True,
    reload_from_files=False,
)

if args.expand_filepaths_to_save_dir:
    args.vectorizer_file = os.path.join(args.save_dir, args.vectorizer_file)
    args.model_state_file = os.path.join(args.save_dir, args.model_state_file)
    print("Expanded filepaths: ")
    print(f"\t{args.vectorizer_file}")
    print(f"\t{args.model_state_file}")


if not torch.cuda.is_available():
    args.cuda = False
args.device = torch.device("cuda" if args.cuda else "cpu")

print(f"Using Cuda: {args.cuda}")

utils.set_seed_everywhere(args.seed, args.cuda)
utils.handle_dirs(args.save_dir)

Expanded filepaths: 
	models/perceptron/vectorizer.json
	models/perceptron/model.pth
Using Cuda: False




<IPython.core.display.Javascript object>

In [2]:
if args.reload_from_files:
    print("Loading Dataset & Vectorizer")
    dataset = TweetDataset.load_dataset_and_load_vectorizer(
        args.tweets_csv, args.vectrozier_file
    )
else:
    print("Loading dataset & Creating vectorizer")
    dataset = TweetDataset.load_dataset_and_make_vectorizer(
        args.tweets_csv, args.vectorizer_file
    )
    dataset.save_vectorizer(args.vectorizer_file)
    vectorizer = dataset.get_vectorizer()

classifier = TweetPerceptronClassifier(num_features=len(vectorizer.tweet_vocab))
print(classifier)
loss_func = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer=optimizer, mode="min", factor=0.5, patience=1
)

Loading dataset & Creating vectorizer
TweetPerceptronClassifier(
  (fc1): Linear(in_features=3108, out_features=1, bias=True)
)


<IPython.core.display.Javascript object>

In [3]:
train_state = utils.train_model(
    classifier, loss_func, optimizer, scheduler, dataset, args
)
train_state = utils.evaluate_test_split(
    classifier, dataset, loss_func, train_state, args
)

Training Routine:   0%|          | 0/100 [00:00<?, ?it/s]

split=train:   0%|          | 0/41 [00:00<?, ?it/s]

split=val:   0%|          | 0/8 [00:00<?, ?it/s]

--------------- 0th Epoch Stats---------------
Training Loss=0.681884057638122, Training Accuracy=66.88262195121949
Validation Loss=0.6708768680691719, Validation Accuracy=71.19140625.
------------------------------------------------------------
--------------- 10th Epoch Stats---------------
Training Loss=0.520560126479079, Training Accuracy=81.85975609756099
Validation Loss=0.5529165044426918, Validation Accuracy=78.515625.
------------------------------------------------------------
--------------- 20th Epoch Stats---------------
Training Loss=0.447150328537313, Training Accuracy=84.71798780487804
Validation Loss=0.5127294063568115, Validation Accuracy=79.58984374999999.
------------------------------------------------------------
--------------- 30th Epoch Stats---------------
Training Loss=0.4019845971247045, Training Accuracy=85.78506097560975
Validation Loss=0.48284977674484253, Validation Accuracy=80.17578125.
------------------------------------------------------------
-------

<IPython.core.display.Javascript object>

In [4]:
import utils

tweet = "The Campaign: Will Ferrell and Zach Galifianakis commit comic mayhem in this hilarious political farce. 4* http://t.co/tQ3j2qGtZQ'"
utils.predict_class(classifier, dataset.get_vectorizer(), tweet)

AttributeError: 'list' object has no attribute 'lower'

<IPython.core.display.Javascript object>