In [1]:
from __future__ import print_function

import os
import time
import numpy as np
import tensorflow as tf
import pandas as pd
from collections import defaultdict

from sklearn.metrics import roc_auc_score, accuracy_score
import nltk

from correct_text import train, decode, decode_sentence, evaluate_accuracy, create_model,\
    get_corrective_tokens, DefaultPTBConfig, DefaultMovieDialogConfig
from text_corrector_data_readers import PTBDataReader, MovieDialogReader

%matplotlib inline




In [2]:
root_data_path = "C:/Users/ASUS/Desktop/deep-text-corrector-master/cornell movie-dialogs corpus/"
train_path = os.path.join(root_data_path, "movie_lines.txt")
val_path = os.path.join(root_data_path, "cleaned_dialog_val.txt")
test_path = os.path.join(root_data_path, "cleaned_dialog_test.txt")
model_path = os.path.join(root_data_path, "dialog_correcter_model_testnltk")
config = DefaultMovieDialogConfig()

## Train

In [3]:
data_reader = MovieDialogReader(config, train_path)

In [None]:
tf.reset_default_graph()
train(data_reader, train_path, val_path, model_path)

## Decode sentences

In [5]:
data_reader = MovieDialogReader(config, train_path, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1)

In [6]:
corrective_tokens = get_corrective_tokens(data_reader, train_path)

In [7]:
import pickle
with open(os.path.join(root_data_path, "corrective_tokens.pickle"), "wb") as f:
    pickle.dump(corrective_tokens, f)

In [8]:
import pickle
with open(os.path.join(root_data_path, "token_to_id.pickle"), "wb") as f:
    pickle.dump(data_reader.token_to_id, f)

In [9]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = create_model(sess, True, model_path, config=config)

Created model with fresh parameters.


In [10]:
# Test a sample from the test dataset.
decoded = decode_sentence(sess, model, data_reader, "you must have girlfriend", corrective_tokens=corrective_tokens)

Input: you must have girlfriend
Output: must must must must must must must must must must



In [11]:
decoded

['must',
 'must',
 'must',
 'must',
 'must',
 'must',
 'must',
 'must',
 'must',
 'must']

In [12]:
decoded = decode_sentence(sess, model, data_reader,
                          "did n't you say that they 're going to develop this revolutionary new thing ...",
                          corrective_tokens=corrective_tokens)

Input: did n't you say that they 're going to develop this revolutionary new thing ...
Output: than than than than than than than than than than than than than than than than than than than than



In [13]:
decode_sentence(sess, model, data_reader, "kvothe went to market", corrective_tokens=corrective_tokens, verbose=False)

['than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than']

In [14]:
decode_sentence(sess, model, data_reader, "blablahblah and bladdddd went to market", corrective_tokens=corrective_tokens,
                verbose=False)

['than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than']

In [15]:
decode_sentence(sess, model, data_reader, "do you have book", corrective_tokens=corrective_tokens, verbose=False)

['than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than']

In [16]:
decode_sentence(sess, model, data_reader, "the cardinals did better then the cubs", corrective_tokens=corrective_tokens, verbose=False)

['than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than',
 'than']

In [17]:
# 4 layers, 40k steps
errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/ASUS/Desktop/deep-text-corrector-master/cornell movie-dialogs corpus/cleaned_dialog_test.txt'

In [18]:
# 4 layers, 30k steps
errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/ASUS/Desktop/deep-text-corrector-master/cornell movie-dialogs corpus/cleaned_dialog_test.txt'

In [19]:
# 4 layers, 20k steps
errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/ASUS/Desktop/deep-text-corrector-master/cornell movie-dialogs corpus/cleaned_dialog_test.txt'

In [None]:
errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)

In [None]:
for decoding, target in errors:
    print("Decoding: " + " ".join(decoding))
    print("Target:   " + " ".join(target) + "\n")