In [3]:
# Import dependencies
import pandas as pd
from sqlalchemy import create_engine
import psycopg2
import datetime as dt

import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns

# For database connection
from keys import conn_string

In [5]:
# Import CSV of Disneyland reviews
reviews_file = "Resources/DisneyReviews.csv"
reviewsdf = pd.read_csv(reviews_file, encoding='latin-1')
reviewsdf.head()

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch
0,670772142,4,2019-4,Australia,If you've ever been to Disneyland anywhere you...,Disneyland_HongKong
1,670682799,4,2019-5,Philippines,Its been a while since d last time we visit HK...,Disneyland_HongKong
2,670623270,4,2019-4,United Arab Emirates,Thanks God it wasn t too hot or too humid wh...,Disneyland_HongKong
3,670607911,4,2019-4,Australia,HK Disneyland is a great compact park. Unfortu...,Disneyland_HongKong
4,670607296,4,2019-4,United Kingdom,"the location is not in the city, took around 1...",Disneyland_HongKong


In [6]:
# drop Disneyland_HongKong and Disneyland_Paris 
reviewsdf = reviewsdf.loc[(reviewsdf['Branch']=='Disneyland_California')&(reviewsdf['Year_Month']!='missing')]
reviewsdf

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch
9620,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California
9621,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California
9622,670565072,4,2019-5,Australia,All and all a great day was had. The crowds ar...,Disneyland_California
9623,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California
9624,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California
...,...,...,...,...,...,...
28499,92494269,1,2010-12,Canada,"Myself, along with my two chidren ages 8 and 1...",Disneyland_California
28500,92313324,4,2010-12,United States,We love Disneyland so much that we go there of...,Disneyland_California
28501,91799423,5,2010-10,Australia,As this was part of our international conferen...,Disneyland_California
28502,91657810,4,2010-12,Australia,we spent one day at disneyland withmy sister ...,Disneyland_California


In [7]:
# Format Year_Month to datetime
reviewsdf['YYYY-MM'] = pd.to_datetime(reviewsdf['Year_Month'], format='%Y-%m', errors='coerce').dropna()
# reviewsdf['YYYY-MM'] = reviewsdf['YYYY-MM'].dt.strftime('%Y-%m')
reviewsdf.head()

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch,YYYY-MM
9620,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California,2019-04-01
9621,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California,2019-04-01
9622,670565072,4,2019-5,Australia,All and all a great day was had. The crowds ar...,Disneyland_California,2019-05-01
9623,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California,2019-04-01
9624,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California,2019-04-01


In [8]:
# Filter out reviews before September 2014 and after April 2019
reviewsdf = reviewsdf[(reviewsdf['YYYY-MM'] >= "2014-09-01") & (reviewsdf['YYYY-MM'] <= "2019-04-30")]
reviewsdf

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch,YYYY-MM
9620,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California,2019-04-01
9621,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California,2019-04-01
9623,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California,2019-04-01
9624,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California,2019-04-01
9625,670382191,5,2019-4,Lebanon,Oh my god you can really forget your self and ...,Disneyland_California,2019-04-01
...,...,...,...,...,...,...,...
21112,226750628,5,2014-9,United States,The day after the holiday was the best day to ...,Disneyland_California,2014-09-01
21134,226493578,4,2014-9,United States,Liked: For a holiday weekend lines werent craz...,Disneyland_California,2014-09-01
21137,226470511,5,2014-9,United States,I have been to LA a lot of times already but l...,Disneyland_California,2014-09-01
21160,226296839,5,2014-9,United States,"I just got back from Disneyland and, as always...",Disneyland_California,2014-09-01


In [9]:
mydf = reviewsdf[(reviewsdf['YYYY-MM'] >= "2014-09-01") & (reviewsdf['YYYY-MM'] <= "2019-04-30")]
mydf

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch,YYYY-MM
9620,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California,2019-04-01
9621,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California,2019-04-01
9623,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California,2019-04-01
9624,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California,2019-04-01
9625,670382191,5,2019-4,Lebanon,Oh my god you can really forget your self and ...,Disneyland_California,2019-04-01
...,...,...,...,...,...,...,...
21112,226750628,5,2014-9,United States,The day after the holiday was the best day to ...,Disneyland_California,2014-09-01
21134,226493578,4,2014-9,United States,Liked: For a holiday weekend lines werent craz...,Disneyland_California,2014-09-01
21137,226470511,5,2014-9,United States,I have been to LA a lot of times already but l...,Disneyland_California,2014-09-01
21160,226296839,5,2014-9,United States,"I just got back from Disneyland and, as always...",Disneyland_California,2014-09-01


In [10]:
mydf.drop_duplicates(subset='Review_Text', inplace=True, keep='first')
mydf

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch,YYYY-MM
9620,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California,2019-04-01
9621,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California,2019-04-01
9623,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California,2019-04-01
9624,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California,2019-04-01
9625,670382191,5,2019-4,Lebanon,Oh my god you can really forget your self and ...,Disneyland_California,2019-04-01
...,...,...,...,...,...,...,...
21112,226750628,5,2014-9,United States,The day after the holiday was the best day to ...,Disneyland_California,2014-09-01
21134,226493578,4,2014-9,United States,Liked: For a holiday weekend lines werent craz...,Disneyland_California,2014-09-01
21137,226470511,5,2014-9,United States,I have been to LA a lot of times already but l...,Disneyland_California,2014-09-01
21160,226296839,5,2014-9,United States,"I just got back from Disneyland and, as always...",Disneyland_California,2014-09-01
