In [1]:
# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py

# Cufflinks wrapper on plotly
import cufflinks

# Data science imports
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 30

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:

from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')

## Lets explore datasets

In [3]:
import tensorflow_datasets as tfds
from src.datasets import download

train_data, validation_data, test_data = download()

train_df = pd.DataFrame(tfds.as_numpy(train_data), columns=['text', 'type'])

label_categories = ['bad', 'good']

train_df['type'] = train_df['type'].apply(lambda x: label_categories[x])

train_df.head()

Unnamed: 0,text,type
0,"b""This was an absolutely terrible movie. Don't...",bad
1,b'I have been known to fall asleep during film...,bad
2,b'Mann photographs the Alberta Rocky Mountains...,bad
3,b'This is the kind of film for a snowy Sunday ...,good
4,"b'As others have mentioned, all the women that...",good


### Training data distribution

In [4]:
train_df['type'].iplot(
    kind='hist',
    yTitle='count',
    xTitle='Type',
    title='Training data distribution'
)

## Explore training metrics

In [5]:
df = pd.read_csv('./metrics/training.csv')
df.head()

Unnamed: 0,epoch,accuracy,loss,val_accuracy,val_loss
0,0,0.64935,0.627438,0.7732,0.521206
1,1,0.7306,0.548754,0.7834,0.493263
2,2,0.76405,0.507899,0.6394,0.656753
3,3,0.66895,0.610746,0.7024,0.579639
4,4,0.75915,0.522665,0.819,0.44037


In [6]:
df[['epoch', 'accuracy', 'val_accuracy']].iplot(
    x='epoch',
    mode='lines+markers',
    xTitle='epoch',
    yTitle='accuracy', 
    title='Training accuracy',
    linecolor='black',
)

In [7]:
df[['epoch', 'loss', 'val_loss']].iplot(
    x='epoch',
    mode='lines+markers',
    xTitle='epoch',
    yTitle='accuracy', 
    title='Losses'
)

## Predictions

### Load probability model

which can give predictions on model classes

0 - bad review, 1 - good revie

In [8]:
from src.predict import get_probability_model

model = get_probability_model()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, None, 128)         186880    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64)                41216     
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 130       
Total params: 232,386
Trainable params: 232,386
Non-trainable params: 0
_________________________________________________________________


**Firstly will try predict on some data from training dataset**

In [16]:
from src.predict import get_text_and_label_from_dataset, predict
REVIEW_INDEX = 110

text, real_label = get_text_and_label_from_dataset(REVIEW_INDEX)

print('text for prediction\n\n', text, '\n')

predicted_label, predictions = predict(text, model)

print(label_categories[predicted_label], 'review')

print('\n\nPredicted label:', predicted_label, 'real label: ', real_label, 'predictions:', predictions)
if (predicted_label == real_label):
    print('Successfully predicted')
else:
    print('Failed to predict')

text for prediction

 Dakota (1988) was another early Lou Diamond Phillips starring vehicle. This film is similar to the later released film Harley. There are a few differences but they're both the same. I don't know which one came first. I guess it'll remain one of the mysteries of life. But they both are troubled "kids" who are trying to turn there lives around. Instead of bikes this one involves horses. They're basically the same movie and they're both cheesy as hell. If you're a serious L.D.P. fan then I recommend that you watch them both. You get some extreme mugging and posturing from L.D.P. if you're game then go for it.<br /><br />Not recommended, except for L.D.P. fans!!! 



ValueError: Tensor's shape (140, 300) is not compatible with supplied shape [300]

**Then will try predict hadnwritten text**

In [13]:
# Can change text and check model
hadwriten = 'This is good film'

print('Hendwriten text:\n', hadwriten, '\n')

handwriten_label, predictions = predict(hadwriten, model)

print(label_categories[predicted_label], 'review')

print('Probabilities', predictions)


Hendwriten text:
 This is good film 

good review
Probabilities [0.00197127 0.9980287 ]
