# Classifying Dengue-Related Tweets with a Neural Network

In [None]:
%cd /home/manoelribeiro/PycharmProjects/GeoDiseaseTwitter/
from sklearn.model_selection import KFold, StratifiedShuffleSplit
from sklearn.metrics import classification_report
from nnet.models.char_cnn_zhang import CharCNNZhang
from nnet.models.char_cnn_kim import CharCNNKim
from nnet.models.char_tcn import CharTCN
from nnet.data_utils import Data
import numpy as np
import json

In [None]:
values = ["Campaign", "Personal", "Information", "Opinion", "Joke"]
nclasses = 5
model_name = "zhang" # kim # zhang

In [None]:
# Load configurations
config = json.load(open("./nnet/dengue.json"))

# Load all data
data = Data(data_source=config["data"]["data_source"], alphabet=config["data"]["alphabet"],
                     input_size=config["data"]["input_size"], num_of_classes=config["data"]["num_of_classes"])
data.load_data()
inputs, labels = data.get_all_data()

# Load model configurations and build model
if model_name == "kim":
    _model = CharCNNKim
    name = "char_cnn_kim"

elif model_name == 'tcn':
    _model = CharTCN
    name = "char_tcn"

elif model_name == "zhang":
    _model = CharCNNZhang
    name = "char_cnn_zhang"

In [None]:
print(inputs.shape)

# Cross Fold Validation

In [None]:
skf = KFold(n_splits=4, random_state=1, shuffle=True)

for train_index, test_index in skf.split(inputs):
    
    training_inputs, validation_inputs = inputs[train_index], inputs[test_index]
    training_labels, validation_labels = labels[train_index], labels[test_index]
    
    model = _model(input_size=config["data"]["input_size"], alphabet_size=config["data"]["alphabet_size"],
                       num_of_classes=config["data"]["num_of_classes"], **config[name])

    # Train model
    model.train(training_inputs=training_inputs, training_labels=training_labels,
                validation_inputs=validation_inputs, validation_labels=validation_labels,
                epochs=config["training"]["epochs"], batch_size=config["training"]["batch_size"])
    
    # Test model
    y_score = model.model.predict(validation_inputs)
    mask = y_score.max(axis=1,keepdims=1) == y_score
    y_pred = np.zeros_like(y_score)
    y_pred[mask] = 1

    # Print sumary of the imported data
    print("Input Shape")
    print("-----------")
    print("Shape: y_true = " + str(np.shape(validation_labels)))
    print("Shape: y_pred = " + str(np.shape(y_score)))
    print("--------------------------------------")
    print(classification_report(validation_labels, y_pred, target_names=values))

# Train with Entire Data

In [None]:
sss = StratifiedShuffleSplit(n_splits=1, train_size=0.8, test_size=0.2)

for train_index, test_index in sss.split(inputs,labels):

    training_inputs, validation_inputs = inputs[train_index], inputs[test_index]
    training_labels, validation_labels = labels[train_index], labels[test_index]
    
    model = _model(input_size=config["data"]["input_size"], alphabet_size=config["data"]["alphabet_size"],
                       num_of_classes=config["data"]["num_of_classes"], **config[name])

    # Train model
    model.train(training_inputs=training_inputs, training_labels=training_labels,
                validation_inputs=validation_inputs, validation_labels=validation_labels,
                epochs=config["training"]["epochs"], batch_size=config["training"]["batch_size"])
    
    # Test model
    y_score = model.model.predict(validation_inputs)
    mask = y_score.max(axis=1,keepdims=1) == y_score
    y_pred = np.zeros_like(y_score)
    y_pred[mask] = 1

    # Print sumary of the imported data
    print("Input Shape")
    print("-----------")
    print("Shape: y_true = " + str(np.shape(validation_labels)))
    print("Shape: y_pred = " + str(np.shape(y_score)))
    print("--------------------------------------")
    print(classification_report(validation_labels, y_pred, target_names=values))

---