## Restoring Model
1. Load Tokenizer
2. Load Model Architecture
3. Load Model Weights

In [None]:
STORE_PATH = 'C:\\Users\\basharm\\PythonJupyter\\CoVID19CodeGit\\StoredModels\\CNN\\'

In [None]:
import pickle

### 1. Loading Tokenizer

In [None]:
with open(STORE_PATH+'tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

### 2. Load Model Architecture

In [None]:
from keras.models import model_from_json
# load json and create model
json_file = open(STORE_PATH+'model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

### 3. Load Weights

In [None]:
# load weights into new model
loaded_model.load_weights(STORE_PATH+"model.h5")
print("Loaded model from disk")

### 4. Load maxlen

In [None]:
import numpy as np
maxlen = np.load(STORE_PATH+'maxlen.npy')
maxlen

# Load Test Data

In [None]:
BASE = 'C:\\Users\\basharm\\PythonJupyter\\CoVID19CodeGit\\data\\sentiment_data\\'

In [None]:
print(BASE+'test_pp.csv')

In [None]:
import pandas as pd
df_test = pd.read_csv(BASE+'test_pp.csv', encoding='utf8')
df_test.sample(5)

In [None]:
df_test = df_test[['target', 'text']]
df_test.head(5)

In [None]:
df_test['target'].value_counts()

In [None]:
X_test = list(df_test['text'].astype(str))

In [None]:
xtest = tokenizer.texts_to_sequences(X_test)
from keras.preprocessing.sequence import pad_sequences
xtest = pad_sequences(xtest, maxlen=maxlen)

In [None]:
ytest = (np.array(df_test['target'])/4).astype(int)
ytest

## Evaluate Model

In [None]:
# evaluate loaded model on test data
loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
p = loaded_model.predict(xtest,verbose=0)
p[:10]

In [None]:
predicted = [np.argmax(x) for x in p]
predicted[:25]

In [None]:
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_curve, auc, roc_auc_score

predicted = np.array(predicted)
actual = ytest

tp = np.count_nonzero(predicted * actual)
tn = np.count_nonzero((predicted - 1) * (actual - 1))
fp = np.count_nonzero(predicted * (actual - 1))
fn = np.count_nonzero((predicted - 1) * actual)

print('True Positive', tp)
print('True Negative', tn)
print('False Positive', fp)
print('False Negative', fn)

accuracy = (tp + tn) / (tp + fp + fn + tn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
fmeasure = (2 * precision * recall) / (precision + recall)
cohen_kappa_score = cohen_kappa_score(predicted, actual)
false_positive_rate, true_positive_rate, thresholds = roc_curve(actual, predicted)
auc_val = auc(false_positive_rate, true_positive_rate)
roc_auc_val = roc_auc_score(actual, predicted)

print('Accuracy', accuracy)
print('Precision', precision)
print('Recall', recall)
print('f-measure', fmeasure)
print('cohen_kappa_score', cohen_kappa_score)
print('auc', auc_val)
print('roc_auc', roc_auc_val)

## Predict
Predict on unlabelled data

#### Load Unlabelled Data

In [None]:
BASE_UNLABELLED = 'C:\\Users\\basharm\\PythonJupyter\\CoVID19CodeGit\\data\\australian_space\\'
BASE_UNLABELLED

In [None]:
columns = []
rows = []
with open(BASE_UNLABELLED+'au_tweet_out.txt') as FI:
    for i, line in enumerate(FI):
        if i==0:
            columns = line.strip().split('\t')
        else:
            row = line.strip().split('\t')
            rows.append(row)

In [None]:
df_un = pd.DataFrame(rows)
df_un.head()

In [None]:
df_un = df_un.drop(3,axis=1)

In [None]:
df_un.columns = ['Text', 'Time', 'Location', 'Name']
df_un.head(5)

In [None]:
xun = tokenizer.texts_to_sequences(list(df_un['Text'].astype(str)))
xun = pad_sequences(xun, maxlen=maxlen)

In [None]:
pun = loaded_model.predict(xun,verbose=0)
pun[:3]

In [None]:
len(pun)

In [None]:
predicted_un = [np.argmax(x) for x in pun]
predicted_un[:5]

In [None]:
df_un['Sentiment'] = predicted_un

In [None]:
df_un.head(10)

In [None]:
df_un['Time'] =pd.to_datetime(df_un['Time'])

In [None]:
df_un = df_un.sort_values(by='Time')

In [None]:
df_un = df_un.reset_index(drop=True)

In [None]:
(len(df_un)-sum(df_un.Sentiment))/len(df_un)*100

In [None]:
df_un.to_csv(BASE_UNLABELLED+'au_tweet_sentiment.csv', index=None, encoding='utf8')