# Article Spinner

In [1]:
data_path = '/home/raul/data/udemy/nlp/electronics'

In [3]:
import nltk
import numpy as np
import random
from bs4 import BeautifulSoup
import os

## 1. Load the Data

In [5]:
pos_reviews_path = os.path.join(data_path, 'positive.review')
positive_reviews = BeautifulSoup(open(pos_reviews_path).read(), "lxml")
positive_reviews = positive_reviews.findAll('review_text')

## 2. Get the Trigrams

In [7]:
trigrams = {}
for review in positive_reviews:
    s = review.text.lower()
    tokens = nltk.tokenize.word_tokenize(s)
    for i in range(len(tokens)-2):
        k = (tokens[i], tokens[i+2])
        if k not in trigrams:
            trigrams[k] = []
        trigrams[k].append(tokens[i+1])

In [14]:
for key in list(trigrams.keys())[:10]:
    print(key, trigrams[key])

('my', 'buying') ['lesson']
('running', '20') ['two']
('any', 'headphones') ['expensive', 'other']
('good..they', 'all') ['answer']
('scanner', 'photos') ['for']
('use', 'advance') ['the']
("'s", 'abilities') ['cooking']
('and', 'expense') ['the']
('adapter', 'installing') [',']
('look', 'my') ['at', 'on', 'to']


In [16]:
trigrams_probabilities = {}
for k, words in trigrams.items():
    if len(set(words)) > 1:
        d = {}
        n = 0
        for w in words:
            if w not in d:
                d[w] = 0
            d[w] += 1
            n += 1
        for w, c in d.items():
            d[w] = float(c) / n
        trigrams_probabilities[k] = d    

In [17]:
for key in list(trigrams_probabilities.keys())[:10]:
    print(key, trigrams_probabilities[key])

('my', 'most') {'camera': 0.5, 'hand': 0.5}
('any', 'headphones') {'expensive': 0.5, 'other': 0.5}
('no', 'setting') {'problems': 0.5, 'difficulty': 0.5}
('and', 'tracks') {'music': 0.5, 'blues': 0.5}
('just', 'my') {'recieved': 0.2, 'got': 0.4, 'removed': 0.2, 'take': 0.2}
('i', 'gotten') {'have': 0.3333333333333333, "'ve": 0.6666666666666666}
('wide', 'of') {'range': 0.5, 'number': 0.5}
('as', 'my') {'though': 0.3333333333333333, 'keeping': 0.3333333333333333, 'on': 0.3333333333333333}
('look', 'my') {'to': 0.3333333333333333, 'at': 0.3333333333333333, 'on': 0.3333333333333333}
("''", 'have') {'that': 0.5, 'models': 0.5}


In [18]:
print(trigrams[('just', 'my')])

['take', 'got', 'removed', 'recieved', 'got']


## 3. Implement the Spinner

In [42]:
def random_sample(d):
    r = random.random()
    cumulative = 0
    for w, p in d.items():
        cumulative += p
        if r < cumulative:
            return w

In [43]:
def test_spinner(trigrams_probabilities):
    review = random.choice(positive_reviews)
    s = review.text.lower()
    print("Original: {0}".format(s))
    tokens = nltk.tokenize.word_tokenize(s)
    for i in range(len(tokens) - 2):
        if random.random() < 0.2:
            k = (tokens[i], tokens[i+2])
            if k in trigrams_probabilities:
                w = random_sample(trigrams_probabilities[k])
                tokens[i+1] = w
                
    print("Spun:")
    spun_article = " ".join(word for word in tokens if word)
    spun_article = spun_article.replace(" .", ".").replace(" ,", ",").replace("$ ", "$").replace(" !", "!")
    print(spun_article)

In [44]:
test_spinner(trigrams_probabilities)

Original: 
i got this jacket a month into having my ipod.  i had to buy it because i had been using the one that comes with the ipod and when i pulled the ipod out of that case it was easy to drop.  one time i did drop it and within a month of owning the ipod i had it in fro repairs.

this case protected my ipod of another 11 months just fine.  my complaint with it tough is the plastic peice that protects the screen and pod from falling out.  it is stiched on the inside so when the ipod is in there is creates friction.  acrilic and plastic do not mux and you get a slightly scraped serface.

it will protect your pod though

Spun:
i do this jacket a month into having my playlist. i had to get it because i had been using the one that comes with the ipod and when i pulled the ipod photo of that case it was easy to drop. one time i did drop it and within a month of owning the ipod i had it in fro repairs. this case with my ipod of another 11 months just fine. my car with it truly is the pla