# NLP Article Spinner #

## Import files & APIs

In [1]:
import nltk
import random
import numpy as np
import string

In [2]:
from bs4 import BeautifulSoup

In [3]:
positive_reviews = BeautifulSoup(open('positive.review').read())
positive_reviews = positive_reviews.findAll('review_text')

## Build trigram model ##

In [4]:
def tokenize(s):
    s = s.lower()
    tokens = nltk.tokenize.word_tokenize(s)
    return tokens

In [5]:
trigram = {}
for review in positive_reviews:
    review = review.get_text()
    tokens = tokenize(review)
    for i in range(1, len(tokens) - 1):
        prev_tok = tokens[i - 1]
        cur_tok = tokens[i]
        next_tok = tokens[i + 1]
        if (prev_tok, next_tok) not in trigram:
            trigram[(prev_tok, next_tok)] = {}
        if cur_tok not in trigram[(prev_tok, next_tok)]:
            trigram[(prev_tok, next_tok)][cur_tok] = 0
        trigram[(prev_tok, next_tok)][cur_tok] += 1

In [6]:
# Normalize
for middle_words in trigram.values():
    count_total = sum(middle_words.values())
    for word in middle_words:
        middle_words[word] /= count_total

## Sample function ##

In [7]:
def sample_word(prob_dict):
    sample = random.random()
    cum_prob = 0
    for word, prob in prob_dict.items():
        cum_prob += prob
        if cum_prob > sample:
            return word

## Spin article ##

In [8]:
def test_spinner():
    review = random.choice(positive_reviews)
    s = review.get_text().lower()
    print("Original:", s)
    tokens = tokenize(s)
    for i in range(1, len(tokens) - 1):
        if random.random() < 0.2:
            prev_next_words = (tokens[i - 1], tokens[i + 1])
            if prev_next_words in trigram:
                tokens[i] = sample_word(trigram[prev_next_words])
    print("New:")
    new_text = " ".join(tokens).replace(" .", ".")
    new_text = new_text.replace(" '", "'")
    new_text = new_text.replace(" ,", ",")
    new_text = new_text.replace("$ ", "$")
    new_text = new_text.replace(" !", "!")
    print(new_text)

In [9]:
test_spinner()

Original: 
the only thing you will need when you buy this package is some extra paper! i love the fact that epson advertises realistic figures for this package. some other printer makers do not do this!

New:
the only thing you will need when you buy this package is some extra paper! i dragged the fact that epson advertises realistic figures for this package. some other printer makers do not do this!
