In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

# read the dataset into pandas dataframe
df = pd.read_csv('./../datasets/training_data', delim_whitespace=False)

In [None]:
# display the first five rows
df.head() 

In [None]:
# dummy values for now - get these interactively later...
best_model = True
if best_model:
    num_hidden_layers = 3
    num_hidden_layer_nodes = [20, 10, 5]
    train_ratio = .7
    hidden_layer_activations = ['sigmoid', 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid']
    optimizer = 'sgd'
    learning_rate = .005
    loss = 'mean_squared_error'
    metrics = [tf.keras.metrics.Accuracy(),tf.keras.metrics.Recall(),tf.keras.metrics.Precision()]
    metrics_names = ["accuracy","recall","precision"]
    epochs = 300
    batch_size = 200
else:
    # build a custom model
    num_hidden_layers = 3
    num_hidden_layer_nodes = [20, 10, 5]
    train_ratio = .7
    hidden_layer_activations = ['sigmoid', 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid']
    optimizer = 'sgd'
    learning_rate = .005
    loss = 'mean_squared_error'
    metrics = ["accuracy"]
    metrics_names = ["accuracy"]
    epochs = 300
    batch_size = 200

In [None]:
# Split the data into training and testing set by 70:30
train, test = train_test_split(df, train_size=train_ratio)
train.reset_index(inplace=True, drop=True)
test.reset_index(inplace=True, drop=True)

In [None]:
# separate data into x and y - just random y for now..
train_X = train.loc[:,train.columns != 'trending']
train_Y = train['trending']
test_X = test.loc[:,test.columns != 'trending']
test_Y = test['trending']

In [None]:
# build the logistic regression model - need clean data...
from sklearn.linear_model import LogisticRegression
LR_model = LogisticRegression(multi_class='ovr')
LR_model.fit(train_X, train_Y)

In [None]:
# confusion matrix
from sklearn.metrics import confusion_matrix

pred_Y = LR_model.predict(test_X);
confusion_matrix(test_Y, pred_Y)

In [None]:
# evaluate the accuracy of the LR model
accuracy = LR_model.score(test_X, test_Y)
print(accuracy)

In [None]:
# build the ANN model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input

ANN_model = keras.Sequential()

# add input layerr
ANN_model.add(Input(shape=train_X.shape[1]))

# add hidden layers
for i in range(num_hidden_layers):
    ANN_model.add(Dense(num_hidden_layer_nodes[i], activation=hidden_layer_activations[i + 1]))

# add output layers
ANN_model.add(Dense(1, activation=hidden_layer_activations[len(hidden_layer_activations) - 1]))

ANN_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
ANN_model.summary()

In [None]:
# train the model
ANN_model.fit(train_X, train_Y, epochs=epochs, batch_size=batch_size)

In [None]:
# confusion matrix
pred_Y = ANN_model.predict_classes(test_X);
confusion_matrix(test_Y, pred_Y)

In [None]:
# report evaluation metrics 
evaluated_metrics = ANN_model.evaluate(test_X, test_Y)
for i in range(len(metrics)):
    print(metrics_names[i] + ": %.2f" % evaluated_metrics[i])