In [1]:
from flask import Flask, request, render_template
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize

In [2]:
# establish connection to engine
engine = create_engine("postgresql://postgres:postgres@localhost/CPG")

In [3]:
def readData(table="eucerin_intensive_lotion", 
         engine=create_engine("postgresql://postgres:postgres@localhost/CPG")):
    
    # connect engine
    conn = engine.connect()
    
    # try making query asked for
    try:
        query = f"SELECT * FROM {table}"
        # attempt to read table queried
        data = pd.read_sql(query,conn)
    except:
        # output default data
        query = "SELECT * FROM eucerin_intensive_lotion"
        data = pd.read_sql(query,conn)
    
    return data
        
    
    

In [4]:
test_df = readData(table="CeraVe_cream")
test_df.head()

Unnamed: 0,id,profile_name,stars,title,review_date,review,helpful,form,brand,sku,url
0,1,Em🐾,5.0 out of 5 stars,This is the moisturizer I’ve been searching for🤩,"Reviewed in the United States on May 18, 2018",I feel a little awkward posting a picture of m...,743 people found this helpful,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...
1,2,Mounir Errami,5.0 out of 5 stars,Highly recommend!,"Reviewed in the United States on January 4, 2019",I am a doctor. Not a dermatologist though. In ...,397 people found this helpful,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...
2,3,Laura K.,5.0 out of 5 stars,Best moisturizer,"Reviewed in the United States on April 30, 2018",I have extremely dry skin that's also acne pro...,252 people found this helpful,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...
3,4,BCB,5.0 out of 5 stars,Yaaaassss! Moisture is my face’s friend.,"Reviewed in the United States on April 7, 2018",Let me set the scene that is my 35 year old fa...,195 people found this helpful,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...
4,5,erinlbyrd,5.0 out of 5 stars,If you have eczema this will change your life,"Reviewed in the United States on December 30, ...","My daughter has severe eczema, one of the wors...",384 people found this helpful,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...


### Lambda Functions

In [5]:
def extractStars(row):
    """Lambda function to extract number of stars left in rating"""
    return float(row.split(' ')[0]) 

def extractHelpful(row):
    """Lambda function to extract number of upvotes on Amazon"""
    rev = row.split(' ')[0]
    
    if(rev.isnumeric()):
        return int(rev)
    elif(rev=="one"):
        return 1
    else:
        return 0
    
def extractDate(row):
    """Lambda function to convert string into datetime object"""
    date = pd.to_datetime(row[33:])
    return date

def convertTime(rev):
    """Lambda function to abstract datetime object per month for groupby"""
    corr_date = rev-pd.offsets.MonthBegin(1) 
    return corr_date

def countWords(rev):
    """Lambda function to count all words in a particular review"""
    return len(word_tokenize(rev))

### read_transform function

In [6]:
def read_transform(table="eucerin_intensive_lotion",
                   engine=create_engine("postgresql://postgres:postgres@localhost/CPG")):
    """Docstring: makes query to PostgreSQL database using the table defined.
    Performs all transformations, including cleaning prior to returning dataframe"""
    
    # read in raw data from PostgreSQL
    data = readData(table,engine)
       
    # transformations
    data["stars"] = data.apply(lambda x: extractStars(x["stars"]),axis=1)
    data["helpful"] = data.apply(lambda x: extractHelpful(x["helpful"]),axis=1)
    data["review_date"] = data.apply(lambda x: extractDate(x["review_date"]),axis=1)
    data["corr_date"] = data.apply(lambda x: convertTime(x["review_date"]),axis=1)
    data["word_count"] = data.apply(lambda x: countWords(x["review"]),axis=1)
    
    # perform groupby on month to get aggregate data
    gb = data.groupby('corr_date')["stars"].mean()
    
    # find review with maximum upvoted comments
    idx = data["helpful"].argmax()
    max_upvoted_review = data["review"][idx]
    
    # convert dates back to strings
    #data['review_date']=data['review_date'].astype(str)
    #data['corr_date']=data['corr_date'].astype(str)
    
    
    # populate dictionary containing all data to pass back to route
    ratings_dict = {}
    ratings_dict["review_date"] = list(data["review_date"])
    ratings_dict["gb_date"] = gb.index.astype(str).tolist()
    ratings_dict["avg_monthly_rating"] = list(gb)
    ratings_dict["histogram_rating_values"] = np.histogram(data["stars"], bins=[1,2,3,4,5,6])[0].tolist()
    ratings_dict["histogram_rating_bins"] = np.histogram(data["stars"], bins=[1,2,3,4,5,6])[1].tolist()
    ratings_dict["max_upvoted_review"] = max_upvoted_review
    
    return data, ratings_dict
    

In [62]:
test_df, test_dict = read_transform(table="CeraVe_cream")

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


In [63]:
test_df.head()

Unnamed: 0,id,profile_name,stars,title,review_date,review,helpful,form,brand,sku,url,corr_date,word_count
0,1,Em🐾,5.0,This is the moisturizer I’ve been searching for🤩,2018-05-18,I feel a little awkward posting a picture of m...,743,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...,2018-05-01 00:00:00,390
1,2,Mounir Errami,5.0,Highly recommend!,2019-01-04,I am a doctor. Not a dermatologist though. In ...,397,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...,2019-01-01 00:00:00,142
2,3,Laura K.,5.0,Best moisturizer,2018-04-30,I have extremely dry skin that's also acne pro...,252,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...,2018-04-01 00:00:00,74
3,4,BCB,5.0,Yaaaassss! Moisture is my face’s friend.,2018-04-07,Let me set the scene that is my 35 year old fa...,195,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...,2018-04-01 00:00:00,162
4,5,erinlbyrd,5.0,If you have eczema this will change your life,2015-12-30,"My daughter has severe eczema, one of the wors...",384,Cream,CeraVe,CeraVe Moisturizing Cream,https://www.amazon.com/CeraVe-Moisturizing-Cre...,2015-12-01 00:00:00,159


In [64]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3128 entries, 0 to 3127
Data columns (total 13 columns):
id              3128 non-null int64
profile_name    3128 non-null object
stars           3128 non-null float64
title           3128 non-null object
review_date     3128 non-null object
review          3128 non-null object
helpful         3128 non-null int64
form            3128 non-null object
brand           3128 non-null object
sku             3128 non-null object
url             3128 non-null object
corr_date       3128 non-null object
word_count      3128 non-null int64
dtypes: float64(1), int64(3), object(9)
memory usage: 317.8+ KB


In [43]:
test_df['review_date']=test_df['review_date'].astype(str)

In [44]:
test_df['review_date'][0]

'2018-05-18'

In [45]:
test_dict

{'review_date': ['2018-05-18',
  '2019-01-04',
  '2018-04-30',
  '2018-04-07',
  '2015-12-30',
  '2019-01-17',
  '2018-05-30',
  '2018-03-24',
  '2019-05-09',
  '2019-10-11',
  '2019-06-04',
  '2018-12-21',
  '2019-01-14',
  '2019-07-12',
  '2019-04-11',
  '2019-03-21',
  '2019-04-30',
  '2019-03-03',
  '2018-10-30',
  '2019-08-01',
  '2019-02-14',
  '2019-08-23',
  '2018-11-07',
  '2018-04-04',
  '2019-04-05',
  '2018-12-15',
  '2018-12-29',
  '2019-08-03',
  '2018-07-27',
  '2018-08-23',
  '2019-06-01',
  '2019-02-15',
  '2019-02-27',
  '2018-03-17',
  '2018-12-07',
  '2019-06-23',
  '2019-04-16',
  '2018-11-23',
  '2019-01-07',
  '2019-01-09',
  '2018-08-28',
  '2020-01-24',
  '2019-11-06',
  '2019-11-25',
  '2020-02-24',
  '2019-10-02',
  '2019-08-15',
  '2019-06-29',
  '2019-02-24',
  '2018-12-14',
  '2018-06-18',
  '2019-11-10',
  '2018-06-07',
  '2019-12-08',
  '2018-09-09',
  '2019-07-13',
  '2019-03-17',
  '2019-07-08',
  '2019-12-18',
  '2019-08-23',
  '2019-01-13',
  '2019-1

In [46]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3128 entries, 0 to 3127
Data columns (total 13 columns):
id              3128 non-null int64
profile_name    3128 non-null object
stars           3128 non-null float64
title           3128 non-null object
review_date     3128 non-null object
review          3128 non-null object
helpful         3128 non-null int64
form            3128 non-null object
brand           3128 non-null object
sku             3128 non-null object
url             3128 non-null object
corr_date       3128 non-null datetime64[ns]
word_count      3128 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(3), object(8)
memory usage: 317.8+ KB


# Emotion

In [7]:
# http://sentiment.nrc.ca/lexicons-for-research/

from nltk import word_tokenize
from nltk.stem.snowball import SnowballStemmer

def text_emotion(table="eucerin_intensive_lotion",
                   engine=create_engine("postgresql://postgres:postgres@localhost/CPG")):
    
    test_df, test_dict = read_transform()
    
    new_df = test_df.copy() # can refactor later, but this will be consistent with naming convention
    
    column = "review"

    
    filepath = ('data/NRC-Sentiment-Emotion-Lexicons/NRC-Emotion-Lexicon-v0.92/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt')  
    
    emolex_df = pd.read_csv(filepath, names=["word", "emotion", "association"], sep='\t')
    emolex_words = emolex_df.pivot(index='word',
                                   columns='emotion',
                                   values='association').reset_index()
    emotions = emolex_words.columns.drop('word')
    
    emo_df = pd.DataFrame(0,index=new_df.index, columns=emotions)
    
    stemmer = SnowballStemmer("english")
    
    for i in range(0, len(new_df)):
        row = new_df[column][i]
        document = word_tokenize(row)
        
        for word in document:
            word = stemmer.stem(word.lower())
            
            emo_score = emolex_words[emolex_words.word == word]
            
            if not emo_score.empty:
                for emotion in list(emotions):
                    emo_df.at[i,emotion] += emo_score[emotion]
        new_df = pd.concat([new_df, emo_df], axis=1)
    
    return new_df

def monthlyEmotionAvg(filename,table="eucerin_intensive_lotion",
                   engine=create_engine("postgresql://postgres:postgres@localhost/CPG")):

    
    # calculate emotional response
    emotion_df = text_emotion(table,engine)
    
    # calculate date
    #df['YearMonth'] = df['review_date'] - pd.offsets.MonthBegin(1)
    date = list(emotion_df["corr_date"]) ## HAVE TO HAVE THIS FCN
    
    # take row sums
    emotion_df["anger_sum"] = emotion_df["anger"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["anticipation_sum"] = emotion_df["anticipation"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["disgust_sum"] = emotion_df["disgust"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["fear_sum"] = emotion_df["fear"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["joy_sum"] = emotion_df["joy"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["negative_sum"] = emotion_df["negative"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["positive_sum"] = emotion_df["positive"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["sadness_sum"] = emotion_df["sadness"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["surprise_sum"] = emotion_df["surprise"].sum(axis=1)/len(emotion_df["review"])
    emotion_df["trust_sum"] = emotion_df["trust"].sum(axis=1)/len(emotion_df["review"])
    
    # take just sums
    emotion_df = emotion_df.iloc[:,-10:]
    
    # store emotional response for all 10 vectors in dictionary
    month_avg = {}
    for col in emotion_df.columns:
        emotions = list(emotion_df[col])
        col_name = col[:-4]
        month_avg[col_name] = list(pd.DataFrame({"Date": date, "Emotion": emotions}).groupby("Date").mean()["Emotion"])#.plot(kind="line")
    
      
    gb_dates = pd.DataFrame({"Date": date, "Emotion": emotions}).groupby("Date").mean().index.astype(str).tolist()
    month_avg["dates"] = gb_dates
    month_avg = pd.DataFrame(month_avg)
                      
    # format csv output
    name = "/Users/matthewrichtmyer/Documents/Data Science Bootcamp/Project 2/CPG-Analysis/data/emotion_csv/" + str(filename)
    month_avg.to_csv(name,index=False)
    
    return month_avg # this is just returning a dictionary, not list of dictionary which is needed for JS

In [8]:
# run model and save to csv for all tables in postgreSQL database
month_avg = monthlyEmotionAvg(filename="CeraVe_cream.csv",table="CeraVe_cream",
                             engine=create_engine("postgresql://postgres:postgres@localhost/CPG"))

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


In [10]:
# run model and save to csv for all tables in postgreSQL database
month_avg = monthlyEmotionAvg(filename="cerave_cream.csv",table="cerave_cream",
                             engine=create_engine("postgresql://postgres:postgres@localhost/CPG"))

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


KeyboardInterrupt: 

In [11]:
# run model and save to csv for all tables in postgreSQL database
month_avg = monthlyEmotionAvg(filename="eucerin_adv_cream.csv",table="eucerin_adv_cream",
                             engine=create_engine("postgresql://postgres:postgres@localhost/CPG"))

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


In [12]:
# run model and save to csv for all tables in postgreSQL database
month_avg = monthlyEmotionAvg(filename="eucerin_eczema_cream.csv",table="eucerin_eczema_cream",
                             engine=create_engine("postgresql://postgres:postgres@localhost/CPG"))

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


In [115]:
month_avg = monthlyEmotionAvg("eucerin_intensive_lotion.csv")

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


In [116]:
file_dict = {"CeraVe_cream": "CeraVe_cream.csv",
             "CeraVe_lotion": "CeraVe_lotion.csv",
             "cerave_cream": "cerave_cream.csv",
             "cerave_lotion": "cerave_lotion.csv",
             "eucerin_adv_cream": "Eucerin_advanced_cream.csv",
             "eucerin_eczema_cream": "Eucerin_eczema_cream.csv"}

for key in list(file_dict.keys()):
    try:
        filename = file_dict[key]
        table = key
        engine = create_engine("postgresql://postgres:postgres@localhost/CPG")
        month_avg = monthlyEmotionAvg(filename,table,engine)
    except:
        None

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


In [110]:
month_avg

Unnamed: 0,anger,anticipation,disgust,fear,joy,negative,positive,sadness,surprise,trust,dates
0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,2016-02-01
1,0.0,2.4,0.0,0.6,1.8,0.6,2.4,0.0,0.0,1.8,2017-07-01
2,0.0,0.4,0.0,0.0,0.4,0.8,0.8,0.0,0.0,1.6,2017-08-01
3,0.0,2.0,0.0,0.0,1.0,0.0,3.0,2.0,0.0,2.0,2017-11-01
4,0.8,2.4,0.8,0.0,3.2,0.8,5.6,0.0,0.8,4.8,2018-05-01


In [112]:
filename = "eucerin_intensive_lotion.csv"
name = "/Users/matthewrichtmyer/Documents/Data Science Bootcamp/Project 2/CPG-Analysis/data/emotion_csv/" + str(filename)
month_avg.to_csv(name,index=False)

In [54]:
test_df = readData()

In [55]:
test_df.head()

Unnamed: 0,id,profile_name,stars,title,review_date,review,helpful,form,brand,sku,url
0,1,saics,1.0 out of 5 stars,This is not the same great product,"Reviewed in the United States on November 23, ...","The bottle said it's the same great product, b...",88 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...
1,2,Shadow,5.0 out of 5 stars,Home Run,"Reviewed in the United States on May 16, 2018",PERFECT! Let me say that again. PERFECT. We...,31 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...
2,3,N. Keithley,5.0 out of 5 stars,Great for dry skin!,"Reviewed in the United States on July 3, 2017",I am a Texas woman who is not afraid to walk b...,38 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...
3,4,Nilan,4.0 out of 5 stars,Great but leaves a sticky feeling,"Reviewed in the United States on August 9, 2017","This is the richest formula, so its very heavy...",35 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...
4,5,jlfriddle2012,5.0 out of 5 stars,Won't use anything but this,"Reviewed in the United States on February 2, 2016",I've been using Eucerin lotion for 20+ years. ...,25 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...


In [73]:
test_df, test_dict = read_transform()

emotion_df = monthlyEmotionAvg(test_df,"eucerin_intensive_lotion.csv")

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


AttributeError: 'dict' object has no attribute 'to_csv'

In [59]:
test_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1090 entries, 0 to 1089
Data columns (total 13 columns):
id              1090 non-null int64
profile_name    1090 non-null object
stars           1090 non-null float64
title           1090 non-null object
review_date     1090 non-null object
review          1090 non-null object
helpful         1090 non-null int64
form            1090 non-null object
brand           1090 non-null object
sku             1090 non-null object
url             1090 non-null object
corr_date       1090 non-null datetime64[ns]
word_count      1090 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(3), object(8)
memory usage: 110.8+ KB


In [75]:
test_df = text_emotion(readData().head())

In [76]:
test_df

Unnamed: 0,id,profile_name,stars,title,review_date,review,helpful,form,brand,sku,...,anger,anticipation,disgust,fear,joy,negative,positive,sadness,surprise,trust
0,1,saics,1.0 out of 5 stars,This is not the same great product,"Reviewed in the United States on November 23, ...","The bottle said it's the same great product, b...",88 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,2,0,0,1,0,3,2,0,2
1,2,Shadow,5.0 out of 5 stars,Home Run,"Reviewed in the United States on May 16, 2018",PERFECT! Let me say that again. PERFECT. We...,31 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,1,3,1,0,4,1,7,0,1,6
2,3,N. Keithley,5.0 out of 5 stars,Great for dry skin!,"Reviewed in the United States on July 3, 2017",I am a Texas woman who is not afraid to walk b...,38 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,4,0,1,3,1,4,0,0,3
3,4,Nilan,4.0 out of 5 stars,Great but leaves a sticky feeling,"Reviewed in the United States on August 9, 2017","This is the richest formula, so its very heavy...",35 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,1,0,0,1,2,2,0,0,4
4,5,jlfriddle2012,5.0 out of 5 stars,Won't use anything but this,"Reviewed in the United States on February 2, 2016",I've been using Eucerin lotion for 20+ years. ...,25 people found this helpful,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,0,0,0,0,1,0,0,0,0


In [78]:
test_df, test_dict = read_transform()

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


In [79]:
test_df

Unnamed: 0,id,profile_name,stars,title,review_date,review,helpful,form,brand,sku,url,corr_date,word_count
0,1,saics,1.0,This is not the same great product,2017-11-23,"The bottle said it's the same great product, b...",88,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...,2017-11-01,35
1,2,Shadow,5.0,Home Run,2018-05-16,PERFECT! Let me say that again. PERFECT. We...,31,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...,2018-05-01,193
2,3,N. Keithley,5.0,Great for dry skin!,2017-07-03,I am a Texas woman who is not afraid to walk b...,38,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...,2017-07-01,160
3,4,Nilan,4.0,Great but leaves a sticky feeling,2017-08-09,"This is the richest formula, so its very heavy...",35,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...,2017-08-01,124
4,5,jlfriddle2012,5.0,Won't use anything but this,2016-02-02,I've been using Eucerin lotion for 20+ years. ...,25,Lotion,Eucerin,Eucerin Intensive Repair Lotion,https://www.amazon.com/Eucerin-Intensive-Repai...,2016-02-01,113


In [80]:
test_dict

{'review_date': [Timestamp('2017-11-23 00:00:00'),
  Timestamp('2018-05-16 00:00:00'),
  Timestamp('2017-07-03 00:00:00'),
  Timestamp('2017-08-09 00:00:00'),
  Timestamp('2016-02-02 00:00:00')],
 'gb_date': ['2016-02-01',
  '2017-07-01',
  '2017-08-01',
  '2017-11-01',
  '2018-05-01'],
 'avg_monthly_rating': [5.0, 5.0, 4.0, 1.0, 5.0],
 'histogram_rating_values': [1, 0, 0, 1, 3],
 'histogram_rating_bins': [1, 2, 3, 4, 5, 6],
 'max_upvoted_review': "The bottle said it's the same great product, but it isn't.  This new product irritates my skin.  The old blue top didn't.  I love the blue top."}

In [81]:
emotion_df = text_emotion(test_df)

In [82]:
emotion_df

Unnamed: 0,id,profile_name,stars,title,review_date,review,helpful,form,brand,sku,...,anger,anticipation,disgust,fear,joy,negative,positive,sadness,surprise,trust
0,1,saics,1.0,This is not the same great product,2017-11-23,"The bottle said it's the same great product, b...",88,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,2,0,0,1,0,3,2,0,2
1,2,Shadow,5.0,Home Run,2018-05-16,PERFECT! Let me say that again. PERFECT. We...,31,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,1,3,1,0,4,1,7,0,1,6
2,3,N. Keithley,5.0,Great for dry skin!,2017-07-03,I am a Texas woman who is not afraid to walk b...,38,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,4,0,1,3,1,4,0,0,3
3,4,Nilan,4.0,Great but leaves a sticky feeling,2017-08-09,"This is the richest formula, so its very heavy...",35,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,1,0,0,1,2,2,0,0,4
4,5,jlfriddle2012,5.0,Won't use anything but this,2016-02-02,I've been using Eucerin lotion for 20+ years. ...,25,Lotion,Eucerin,Eucerin Intensive Repair Lotion,...,0,0,0,0,0,1,0,0,0,0


In [89]:
abc = monthlyEmotionAvg(emotion_df,"test.csv")

In [90]:
# reformat dict into dataframe
test2_df = pd.DataFrame(abc)

In [91]:
test2_df

Unnamed: 0,anger,anticipation,disgust,fear,joy,negative,positive,sadness,surprise,trust,dates
0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,2016-02-01
1,0.0,4.8,0.0,1.2,3.6,1.2,4.8,0.0,0.0,3.6,2017-07-01
2,0.0,0.8,0.0,0.0,0.8,1.6,1.6,0.0,0.0,3.2,2017-08-01
3,0.0,4.0,0.0,0.0,2.0,0.0,6.0,4.0,0.0,4.0,2017-11-01
4,1.6,4.8,1.6,0.0,6.4,1.6,11.2,0.0,1.6,9.6,2018-05-01


In [None]:
#     name = "/Users/matthewrichtmyer/Documents/Data Science Bootcamp/Project 2/CPG-Analysis/data/emotion_csv/" + str(filename)
    
    
#     month_avg.to_csv(name,index=False)