In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


This notebook is going test the CNN models in the TI_CNN dataset.

In [None]:
import pandas as pd
import numpy as np

#Progress bars
from tqdm import tqdm
tqdm.pandas()

#Paralelize pandas apply on multiple cores

import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns

from numpy.random import seed

import tensorflow as tf
from tensorflow import keras
from keras.models import load_model


In [None]:

old_cnn_model = load_model('/content/drive/Shareddrives/DS4A_Fakenews/notebook/model/old_cnn_model.h5')
new_cnn_model = load_model('/content/drive/Shareddrives/DS4A_Fakenews/notebook/model/new_cnn_model.h5')
retrain_cnn_model = load_model('/content/drive/Shareddrives/DS4A_Fakenews/notebook/model/retrain_cnn_model.h5')

In [None]:
df = pd.read_pickle('/content/drive/Shareddrives/DS4A_Fakenews/data/clean_data/clean_TI_CNN.pickle')
df = df.dropna()

In [None]:
df.head()

Unnamed: 0,title,text,type,clean_text,vectorized_clean_text,vectorized_title,one_hot_label
0,properties showcase brand,"They stood in line at Trump Tower, sometimes u...",real,stood line trump tower sometimes half hour han...,"[2379, 318, 25752, 6172, 1631, 242, 957, 7565,...","[2315, 5844, 1449, 0, 0, 0, 0, 0, 0, 0]",[1]
1,trump foundation tells new stopped soliciting ...,Donald J. Trump <U+2019>s foundation informed ...,real,trump us foundation informed attorney general ...,"[25752, 164, 3113, 2826, 1171, 580, 65, 17332,...","[25752, 3113, 2710, 65, 1541, 19799, 111, 555,...",[1]
2,trump prepares white house move tower may still,President-elect Donald J. Trump won the White ...,real,trump white house populist promise prepares as...,"[25752, 1191, 541, 17745, 3102, 8213, 4315, 50...","[25752, 8213, 1191, 541, 408, 6172, 137, 151, ...",[1]
3,luring investors name little else,An investment pitch for a new Texas hotel is t...,real,investment pitch new hotel trying lure invokin...,"[615, 1924, 65, 1639, 469, 8793, 31093, 521, 4...","[22579, 786, 521, 286, 1249, 0, 0, 0, 0, 0]",[1]
4,trump immediately move white house,President-elect Donald J. Trump <U+2019>s wife...,real,trump us wife son immediately move white house...,"[25752, 164, 783, 831, 1045, 408, 1191, 541, 2...","[25752, 1045, 408, 1191, 541, 0, 0, 0, 0, 0]",[1]


In [None]:
len_text = len(df["vectorized_clean_text"][0])
len_title = len(df['vectorized_title'][0])

print("Max len content: {}".format(len_text))
print("Max len title: {}".format(len_title))

Max len content: 1088
Max len title: 10


In [None]:
def pad_array(array, token_len):
    '''
    Parameters:
        array: The input array that needs to be padded or truncated.
        token_len: The desired length of the array after padding or truncation.
    Return value:
        The function returns the padded or truncated array.
    '''
    diff_token = token_len - len(array)
    if diff_token < 0:
        array = array[:token_len]  # Truncate
    else:
        # Pad
        array += [0] * diff_token  # Pad

    return array


In [None]:
# adjust to the input lenght of the model
max_len_title = 12
max_len_text = 523

In [None]:
df["vectorized_title"] = df.apply(lambda r: pad_array(r["vectorized_title"], max_len_title) , axis=1)

In [None]:
df["vectorized_clean_text"] = df.apply(lambda r: pad_array(r["vectorized_clean_text"], max_len_text) , axis=1)

In [None]:
X_test_title = df["vectorized_title"]
X_test_text = df["vectorized_clean_text"]
y_test = df["one_hot_label"]

In [None]:
test_fit = [np.asarray(X_test_title.tolist()),
             np.asarray(X_test_text.tolist())]

# **old_cnn_model**

In [None]:
old_cnn_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 title_input (InputLayer)       [(None, 12)]         0           []                               
                                                                                                  
 content_input (InputLayer)     [(None, 523)]        0           []                               
                                                                                                  
 embed_title (Embedding)        (None, 12, 300)      15000000    ['title_input[0][0]']            
                                                                                                  
 embed_content (Embedding)      (None, 523, 300)     15000000    ['content_input[0][0]']          
                                                                                              

In [None]:
test_pred = old_cnn_model.predict(test_fit)
accuracy_score(np.array(y_test.tolist()), test_pred.round())



0.729021879021879

# **new_cnn_model**

In [None]:
new_cnn_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 title_input (InputLayer)       [(None, 12)]         0           []                               
                                                                                                  
 content_input (InputLayer)     [(None, 523)]        0           []                               
                                                                                                  
 embed_title (Embedding)        (None, 12, 300)      15000000    ['title_input[0][0]']            
                                                                                                  
 embed_content (Embedding)      (None, 523, 300)     15000000    ['content_input[0][0]']          
                                                                                            

In [None]:
test_pred = new_cnn_model.predict(test_fit)
accuracy_score(np.array(y_test.tolist()), test_pred.round())



0.7741956241956242

# **retrain cnn model : with all data**

In [None]:
retrain_cnn_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 title_input (InputLayer)       [(None, 12)]         0           []                               
                                                                                                  
 content_input (InputLayer)     [(None, 523)]        0           []                               
                                                                                                  
 embed_title (Embedding)        (None, 12, 300)      15000000    ['title_input[0][0]']            
                                                                                                  
 embed_content (Embedding)      (None, 523, 300)     15000000    ['content_input[0][0]']          
                                                                                            

In [None]:
test_pred = retrain_cnn_model.predict(test_fit)
accuracy_score(np.array(y_test.tolist()), test_pred.round())



0.7759330759330759