In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from datetime import timedelta

In [2]:
def GetSentiment(df=None, column=None, prefix=''):
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    #from nltk.sentiment.vader import SentimentIntensityAnalyzer
    sia = SentimentIntensityAnalyzer()
    Sent = df[[column]].copy()
    Sent[prefix+'compound'] = Sent[column].apply(lambda x: sia.polarity_scores(x)['compound'])
    Sent[prefix+'neutral'] = Sent[column].apply(lambda x: sia.polarity_scores(x)['neu'])
    Sent[prefix+'positive'] = Sent[column].apply(lambda x: sia.polarity_scores(x)['pos'])
    Sent[prefix+'negative'] = Sent[column].apply(lambda x: sia.polarity_scores(x)['neg'])
    return Sent.iloc[:,1:]

In [3]:
def GetSubjectivity(df=None, column=None, prefix=''):
    from textblob import TextBlob
    Sub = df[[column]].copy()
    Sub[prefix+'subjectivity'] = Sub[column].apply(lambda x: TextBlob(x).sentiment.subjectivity )
    return Sub.iloc[:,1:]

In [4]:
def GetReviewFiles(path='./'):
    from os import listdir
    files = [f for f in listdir(path)]
    Reviewfiles = [f for f in files if '_review.csv' in f]
    return Reviewfiles

In [5]:
def GetReviewNLP(path='./'):
    reviewfiles = GetReviewFiles(path=path)
    for file in reviewfiles:
        print(path+file)
        df = pd.read_csv(path+file)
        if len(df.columns) == 3:  
            Sentiment = GetSentiment(df=df, column='text', prefix='')
            Subjectivity = GetSubjectivity(df=df, column='text', prefix='')
            df = df.join(Sentiment).join(Subjectivity)
            df.to_csv(path+file, index=False)
        

In [None]:
def GetReviewNLPforFile(file='./OpenRestaurants/1123_mindys-hot-chocolate-chicago-2_review.csv'):
    print(file)
    df = pd.read_csv(file)
    if len(df.columns) == 3:  
        Sentiment = GetSentiment(df=df, column='text', prefix='')
        Subjectivity = GetSubjectivity(df=df, column='text', prefix='')
        df = df.join(Sentiment).join(Subjectivity)
        df.to_csv(file, index=False)

In [6]:
GetReviewNLP(path='./Gayot/')

./Gayot/0_2-sparrows_review.csv
./Gayot/100_coq-d-or-restaurant-and-lounge_review.csv
./Gayot/101_cafe-laguardia_review.csv
./Gayot/103_cafe-luciano_review.csv
./Gayot/104_cafe-matou_review.csv
./Gayot/105_cafe-chien_review.csv
./Gayot/106_cafe-selmarie_review.csv
./Gayot/107_caffe-baci_review.csv
./Gayot/108_caliterra_review.csv
./Gayot/109_cambridge-house-ltd_review.csv
./Gayot/10_adobo-grill_review.csv
./Gayot/110_cannellas-on-grand_review.csv
./Gayot/113_cantina-1910_review.csv
./Gayot/114_calo-ristorante_review.csv
./Gayot/115_filippos-ristorante_review.csv
./Gayot/116_carmichaels-chicago-steakhouse_review.csv
./Gayot/117_carriage-house_review.csv
./Gayot/118_centro_review.csv
./Gayot/119_cereality-cereal-bar-and-cafe_review.csv
./Gayot/11_aigre-doux_review.csv
./Gayot/120_ceres-table_review.csv
./Gayot/121_cerise_review.csv
./Gayot/122_chalkboard_review.csv
./Gayot/123_charlies-ale-house_review.csv
./Gayot/124_charlies-on-leavitt_review.csv
./Gayot/125_charlie-trotters_review.csv

./Gayot/384_one-sixtyblue_review.csv
./Gayot/386_oon_review.csv
./Gayot/387_opera_review.csv
./Gayot/388_leons-bar-b-q_review.csv
./Gayot/38_ballo_review.csv
./Gayot/391_la-madia_review.csv
./Gayot/392_otom_review.csv
./Gayot/393_the-outpost_review.csv
./Gayot/395_the-palm_review.csv
./Gayot/397_pane-caldo_review.csv
./Gayot/398_papa-milano_review.csv
./Gayot/399_papagus-greek-taverna_review.csv
./Gayot/3_a-la-turka-turkish-kitchen_review.csv
./Gayot/400_papagus-greek-taverna_review.csv
./Gayot/401_pappadeaux_review.csv
./Gayot/402_paris-club_review.csv
./Gayot/403_park-avenue-cafe_review.csv
./Gayot/404_the-parthenon_review.csv
./Gayot/408_penang_review.csv
./Gayot/410_perennial_review.csv
./Gayot/411_perennial-virant_review.csv
./Gayot/412_phil-and-lous_review.csv
./Gayot/413_piazza-m_review.csv
./Gayot/414_pierogi-factory_review.csv
./Gayot/416_pili-pili_review.csv
./Gayot/417_p-j-clarkes_review.csv
./Gayot/419_platiyo_review.csv
./Gayot/41_bangkok-thai-cuisine_review.csv
./Gayot/42

In [7]:
GetReviewNLP(path='./Tribune/')

./Tribune/0_25-degrees_review.csv
./Tribune/100_salpicon_review.csv
./Tribune/101_salt-and-pepper-diner_review.csv
./Tribune/102_sams-red-hots_review.csv
./Tribune/102_sam’s-red-hots_review.csv
./Tribune/103_sarks-in-the-park_review.csv
./Tribune/104_seasons-52_review.csv
./Tribune/105_seven-the-den-and-manhole_review.csv
./Tribune/106_shabowl_review.csv
./Tribune/107_shiso_review.csv
./Tribune/108_the-shrine_review.csv
./Tribune/109_smokey-hollow_review.csv
./Tribune/10_the-beer-bistro-north_review.csv
./Tribune/110_spritzburger_review.csv
./Tribune/111_strings-2_review.csv
./Tribune/112_taverna-750_review.csv
./Tribune/113_tete-charcuterie_review.csv
./Tribune/114_trattoria-isabella_review.csv
./Tribune/115_trinity-bar_review.csv
./Tribune/116_two-way-lounge_review.csv
./Tribune/117_unite-urban-grill_review.csv
./Tribune/118_veteran-tamale_review.csv
./Tribune/119_vip-chinese_review.csv
./Tribune/119_vip-restaurant_review.csv
./Tribune/11_belly-shack_review.csv
./Tribune/120_vivial_r

In [8]:
GetReviewNLP(path='./OpenRestaurants/')

./OpenRestaurants/0_rockit-burger-bar-chicago_review.csv
./OpenRestaurants/1001_taqueria-san-jose-chicago_review.csv
./OpenRestaurants/1003_osteria-via-stato-chicago_review.csv
./OpenRestaurants/1004_las-picosas-chicago_review.csv
./OpenRestaurants/1005_the-little-india-chicago_review.csv
./OpenRestaurants/1007_green-zebra-chicago_review.csv
./OpenRestaurants/1013_dicks-last-resort-chicago-2_review.csv
./OpenRestaurants/1017_buona-terra-ristorante-chicago_review.csv
./OpenRestaurants/1023_paramount-room-chicago_review.csv
./OpenRestaurants/1025_foodlife-chicago_review.csv
./OpenRestaurants/1026_sheffields-wine-and-beer-garden-chicago_review.csv
./OpenRestaurants/1028_primehouse-chicago-2_review.csv
./OpenRestaurants/1033_iguana-café-chicago-2_review.csv
./OpenRestaurants/1034_lou-malnatis-pizzeria-chicago-4_review.csv
./OpenRestaurants/1037_d-amatos-bakery-chicago_review.csv
./OpenRestaurants/1038_flat-top-grill-chicago-7_review.csv
./OpenRestaurants/1039_merlo-on-maple-chicago_review

ParserError: Error tokenizing data. C error: Buffer overflow caught - possible malformed input file.


1123_mindys-hot-chocolate-chicago-2_review_bad.csv

In [9]:
GetReviewNLP(path='./OpenRestaurants/')

./OpenRestaurants/0_rockit-burger-bar-chicago_review.csv
./OpenRestaurants/1001_taqueria-san-jose-chicago_review.csv
./OpenRestaurants/1003_osteria-via-stato-chicago_review.csv
./OpenRestaurants/1004_las-picosas-chicago_review.csv
./OpenRestaurants/1005_the-little-india-chicago_review.csv
./OpenRestaurants/1007_green-zebra-chicago_review.csv
./OpenRestaurants/1013_dicks-last-resort-chicago-2_review.csv
./OpenRestaurants/1017_buona-terra-ristorante-chicago_review.csv
./OpenRestaurants/1023_paramount-room-chicago_review.csv
./OpenRestaurants/1025_foodlife-chicago_review.csv
./OpenRestaurants/1026_sheffields-wine-and-beer-garden-chicago_review.csv
./OpenRestaurants/1028_primehouse-chicago-2_review.csv
./OpenRestaurants/1033_iguana-café-chicago-2_review.csv
./OpenRestaurants/1034_lou-malnatis-pizzeria-chicago-4_review.csv
./OpenRestaurants/1037_d-amatos-bakery-chicago_review.csv
./OpenRestaurants/1038_flat-top-grill-chicago-7_review.csv
./OpenRestaurants/1039_merlo-on-maple-chicago_review

./OpenRestaurants/1302_aruns-thai-restaurant-chicago_review.csv
./OpenRestaurants/1304_chi-cafe-chicago_review.csv
./OpenRestaurants/1307_golden-apple-grill-and-breakfast-house-chicago_review.csv
./OpenRestaurants/1309_naha-chicago_review.csv
./OpenRestaurants/1310_atinos-pizza-chicago_review.csv
./OpenRestaurants/1312_fado-irish-pub-and-restaurant-chicago-2_review.csv
./OpenRestaurants/1314_la-peña-restaurante-chicago_review.csv
./OpenRestaurants/1320_one-north-kitchen-and-bar-chicago_review.csv
./OpenRestaurants/1323_tempo-cafe-chicago_review.csv
./OpenRestaurants/1324_serbian-village-restaurant-chicago_review.csv
./OpenRestaurants/1328_tocco-chicago_review.csv
./OpenRestaurants/132_mercadito-chicago_review.csv
./OpenRestaurants/1330_harry-carays-italian-steakhouse-river-north-chicago-2_review.csv
./OpenRestaurants/1332_azucar-tapas-restaurant-and-cocktail-bar-chicago_review.csv
./OpenRestaurants/1336_fireplace-inn-chicago_review.csv
./OpenRestaurants/1340_azteca-grill-chicago-2_rev

ParserError: Error tokenizing data. C error: Buffer overflow caught - possible malformed input file.


In [33]:
df = pd.read_csv('./Gayot/262_indian-garden_review.csv')

In [7]:
#def GetStats(csvfile=None):
df = pd.read_csv('2_42-grams_review.csv')

In [10]:
df.shape

(80, 3)

In [12]:
df

Unnamed: 0,date,star,text
0,2017-07-17,5.0,We are sad to hear of 42 grams closing. We had...
1,2017-05-16,5.0,A gold standard in fine dining. Jake and Alex...
2,2017-04-25,5.0,Wow... this is an experience. Probably top ...
3,2017-03-17,5.0,What an excellent dining experience! One of th...
4,2017-03-09,5.0,Of all the fine dining experiences I have had ...
5,2017-01-05,5.0,"Overall , amazing. Food worth all the praise p..."
6,2016-12-03,5.0,Fantastic. Extremely personable between the ch...
7,2016-11-22,5.0,Absurd. You will not forget eating here. One...
8,2016-11-12,5.0,42 grams provides a unique dining experience t...
9,2016-11-11,4.0,went there last year for our anniversary last....


In [13]:
df.shape[0]

80

In [18]:
last = df['date'][0]

In [19]:
type(last)

str

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 3 columns):
date    80 non-null object
star    80 non-null float64
text    80 non-null object
dtypes: float64(1), object(2)
memory usage: 2.0+ KB


In [22]:
df['date'] =  pd.to_datetime(df['date'])

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 3 columns):
date    80 non-null datetime64[ns]
star    80 non-null float64
text    80 non-null object
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 2.0+ KB


In [25]:
df['date'][0]

Timestamp('2017-07-17 00:00:00')

In [44]:
daysago = 365
#td = timedelta(days=daysago)

In [45]:
timepoint = df['date'][0] - timedelta(days=daysago)

In [34]:
oneyago = df['date'][0] - td

In [33]:
df['date'][0] > ago

True

In [35]:
twoyago = df['date'][0] - timedelta(days=720)

In [37]:
oneyago

Timestamp('2016-07-17 00:00:00')

In [36]:
twoyago

Timestamp('2015-07-28 00:00:00')

In [38]:
twoyago > oneyago

False

In [41]:
recent = df[df['date'] > oneyago]

In [42]:
recent

Unnamed: 0,date,star,text
0,2017-07-17,5.0,We are sad to hear of 42 grams closing. We had...
1,2017-05-16,5.0,A gold standard in fine dining. Jake and Alex...
2,2017-04-25,5.0,Wow... this is an experience. Probably top ...
3,2017-03-17,5.0,What an excellent dining experience! One of th...
4,2017-03-09,5.0,Of all the fine dining experiences I have had ...
5,2017-01-05,5.0,"Overall , amazing. Food worth all the praise p..."
6,2016-12-03,5.0,Fantastic. Extremely personable between the ch...
7,2016-11-22,5.0,Absurd. You will not forget eating here. One...
8,2016-11-12,5.0,42 grams provides a unique dining experience t...
9,2016-11-11,4.0,went there last year for our anniversary last....


In [47]:
df.shape[0]

80

In [53]:
oneyear = 365
twoyear = 730

In [54]:
timepoint1 = df['date'][0] - timedelta(days=oneyear)
timepoint2 = df['date'][0] - timedelta(days=twoyear)
recent = df[df['date'] > timepoint1]
if recent.shape[0] >= 50:
    df = recent.copy()
else:
    df = df[df['date'] > timepoint2]

In [55]:
df

Unnamed: 0,date,star,text
0,2017-07-17,5.0,We are sad to hear of 42 grams closing. We had...
1,2017-05-16,5.0,A gold standard in fine dining. Jake and Alex...
2,2017-04-25,5.0,Wow... this is an experience. Probably top ...
3,2017-03-17,5.0,What an excellent dining experience! One of th...
4,2017-03-09,5.0,Of all the fine dining experiences I have had ...
5,2017-01-05,5.0,"Overall , amazing. Food worth all the praise p..."
6,2016-12-03,5.0,Fantastic. Extremely personable between the ch...
7,2016-11-22,5.0,Absurd. You will not forget eating here. One...
8,2016-11-12,5.0,42 grams provides a unique dining experience t...
9,2016-11-11,4.0,went there last year for our anniversary last....


In [None]:
if df.shape[0] > 50:
    timepoint = df['date'][0] - timedelta(days=daysago)
    recent = df[df['date'] > timepoint]
    if recent.shape[0] < 50