In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
import keras
from sklearn.metrics import accuracy_score

In [3]:
data_file = pd.read_csv("file1.csv") #the whole dataset

In [4]:
data_file.head()

Unnamed: 0,tomorrow,normalized_title,move_close,move_open,move_high,move_low,move_close_open
0,1,respawn titanfall server stability microsoft a...,1,1,1,0,1
1,1,titanfall launches tuesday microsoft hopes boo...,1,1,1,0,1
2,1,microsoft titanfall could game changer xbox one,1,1,1,0,1
3,1,titanfall xbox one launch microsoft releases c...,1,1,1,0,1
4,1,microsoft xbox one titanfall another non 1080p...,1,1,1,0,1


In [5]:
def class_predict(df):
    """
    This function predicts the label of the data using tensorflow saved model
    
    parameters:
        df (dataframe) : The shape of the dataframe should be (None, 6) and it contains the news data and encoded data
    
    returns:
        array: returns the predicted label for each data point
    """
    tokenizer = Tokenizer(2000)       #tokenizing the text data set for top 2000 words
    tokenizer.fit_on_texts(df.normalized_title)
    all_news_x = tokenizer.texts_to_sequences(df.normalized_title) #tokenizing the normalized_title
    all_cat_x = df[['move_close', 'move_open', 'move_high', 'move_low', 'move_close_open']].values[:] #getting categorical vars
    max_review_length = 100
    all_news_x_pad = sequence.pad_sequences(all_news_x, maxlen=max_review_length)# pad the sentences such that max_len is 100
    
    new_model = keras.models.load_model('saved_model/final_model') #loading the saved model
    
    y_predict_prob =  new_model.predict([all_news_x_pad, all_cat_x]) #predicting the label with text and categorical data
    class_label = lambda x: 1 if(x>=0.5) else 0   #lambda function which converts the prob to class label
    y_predict = np.array([class_label(xi) for xi in y_predict_prob]) #calling the lambda function to convert to class labels
    
    return y_predict

In [6]:
y_predict = class_predict(data_file)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [12]:
def accuracy_calc(x_data, y_data):
    """
    This function predicts label using x_data and then finds the accuracy for predicted and actual label(y_data)
    
    parameters:
        x_data (df): Shape of array is (None, 6) and it contains the text normalised data
        y_data (df): shape of the array is (None,) and it contains the actual label of the data

    returns:
        float: accuracy_score of the predicted label and true label
    """
    tokenizer = Tokenizer(2000)       #tokenizing the text data set for top 2000 words
    tokenizer.fit_on_texts(x_data.normalized_title)
    all_news_x = tokenizer.texts_to_sequences(x_data.normalized_title) #tokenizing the normalized_title
    all_cat_x = x_data[['move_close', 'move_open', 'move_high', 'move_low', 'move_close_open']].values[:] #getting categorical vars
    max_review_length = 100
    all_news_x_pad = sequence.pad_sequences(all_news_x, maxlen=max_review_length)# pad the sentences such that max_len is 100
    
    new_model = keras.models.load_model('saved_model/final_model') #loading the saved model
    
    y_predict_prob =  new_model.predict([all_news_x_pad, all_cat_x]) #predicting the label with text and categorical data
    class_label = lambda x: 1 if(x>=0.5) else 0   #lambda function which converts the prob to class label
    y_predict = np.array([class_label(xi) for xi in y_predict_prob]) #calling the lambda function to convert to class labels
    
    accuracy = accuracy_score(y_data, y_predict)
    
    return accuracy

In [13]:
accuracy = accuracy_calc(data_file.iloc[:, 1:], data_file[["tomorrow"]])

In [14]:
#accuracy of the whole dataset
accuracy

0.9644906187005737