# Import libraries

In [106]:
# uncomment to install libraries
# ! pip install numpy pandas matplotlib seaborn nltk spacy regex gensim pyLDAvis
# ! python -m spacy download en_core_web_sm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
import spacy
import re
from gensim.models import Phrases, LdaModel
from gensim.corpora import Dictionary
import pyLDAvis.gensim
import warnings

# english model from spacy that needs explaining ...
nlp = spacy.load("en_core_web_sm")

# "punkt" tokenizer
nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/nicholasmichalak/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Review data

In [86]:
escape_room_reviews = pd.read_csv("data/escape_room_reviews.csv", index_col = 0)

# See it (random 5 rows)
escape_room_reviews.sample(n = 5, replace = False)

Unnamed: 0,state,city_href,room_href,review_number,review,room_title,room_description,room_address
495,arizona,/phoenix,/phoenix/quests/escape-house-mesa-flood-the-city,2,Thank you Ryan for making our experience a gre...,"Escape room ""Flood The City"" by Escape House M...",Description:Terrorist calling himself Marcin i...,"3460 E Southern Ave #110, Mesa, AZ 85204 (Show..."
427,arizona,/phoenix,/phoenix/quests/epic-escape-game-rogue-agent,0,It was a lot of fun!!! There were definitely s...,"Escape room ""Rogue Agent"" by Epic Escape Game ...",Description:The President and other world lead...,"106 N. Central Avenue, Phoenix, AZ 85004 (Show..."
19,alabama,/auburn,/auburn/quests/auburn-escape-zones-black-beard...,3,This was such a fun surprise for my husband's ...,"Escape room ""Black Beard's Brig"" by Auburn Esc...",Description:Ahoy Matey! Your crew has been cap...,"1234 Commerce Dr Auburn, AL 36830 (Show on map)"
86,alabama,/gadsden,/gadsden/quests/beat60-the-darkness,4,"We had a great time ""beating 60"" in the serial...","Escape room ""The Darkness"" by beat60 in Gadsden",Description:You and your team awake in a dimly...,"227 Broad Street Gadsden, AL 35901 (Show on map)"
274,arizona,/phoenix,/phoenix/quests/escape-games-az-blaines-basement,1,I had a BLAST! For my first escape room - Blai...,"Escape room ""Blaine's Basement"" by Escape Game...",Description:The FBI was able to apprehend a su...,"12 N. Center Street, Suite 200 Mesa, AZ 85201 ..."


# Review processing

## Extract reviews

In [87]:
reviews = escape_room_reviews["review"].tolist()

## Lower case text

In [88]:
reviews_lower = [text.lower() for text in reviews]

## Sentences

In [89]:
sentences = [nltk.tokenize.sent_tokenize(text) for text in reviews_lower]

# unpack sentences
sentences_unlist = [sentence for sub_sentences in sentences for sentence in sub_sentences]

## spacy docs/generator

In [93]:
spacy_docs = list(nlp.pipe(sentences_unlist))

## Remove stop words and words shorter than 2 characters

In [101]:
docs = [[text.lemma_ for text in doc if len(text.orth_) > 2 and not text.is_stop] for doc in spacy_docs]

# See some
for i in np.random.randint(low = 0, high = len(docs), size = 5):
    print(docs[i])
    print("\n")

['escape', 'room', 'challenge', 'friend', 'join', 'previous', 'experience']


['blast', 'control', 'master', 'tony', 'awesome']


['explain']


['game', 'master', 'janey', 'fun', 'professional']


['staff', 'try', 'enjoyable']


