# DoorDash Customer Experience

### The goal of this code is to extract DoorDash customer reviews about the DoorDash Ordering App from the Android Google Play Store
### This code powers the Domo dashboard located at:
### Created by Kendall Ruber

# Import packages

In [1]:
#  you may need to run these in the terminal :
# pip install google_play_scraper
# pip install authlib
# pip install holidays
# pip install scipy
###### from  https://ipywidgets.readthedocs.io/en/stable/user_install.html
# conda install -c conda-forge ipywidgets
#jupyter nbextension enable --py widgetsnbextension
# conda install -n base -c conda-forge widgetsnbextension
# conda install -n base -c conda-forge jupyterlab_widgets
# conda install -n base -c conda-forge ipywidgets
# conda install pytorch torchvision -c pytorch

#!pip install google_play_scraper --user
import google_play_scraper
from google_play_scraper import app, Sort, reviews #reviews_all can also be used instead of reviews, but beware - has a limit of 19K

# domo
import domojupyter as domo 

#pandas
import pandas as pd
from pandas import json_normalize

#!pip install requests
import requests
import time
import json

#!pip install authlib
import authlib
from authlib.jose import jwt

import os
os.getcwd()

# date manipulations
from datetime import datetime
import calendar as calendar
#!pip install holidays
import holidays

import matplotlib.pyplot as plt
import numpy as np

# Tutorial: https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment?text=1st+time+using+the+app...+everything+seemed+to+be+working+fine.+when+the+app+said+my+order+was+ready%2C+i+drove+to+the+restaurant.+when+i+got+there%2C+they+said+they+were+taking+care+of+the+drive+through+orders+1st.+well%2C+thats+fine%2C+but+the+app+said+my+order+was+ready%3F+ready+is+ready...+right%3F%3F%3F+why+did+i+have+to+wait%3F+i+still+had+to+wait+over+20+minutes+to+get+the+order+filled.+i+don%27t+think+i+will+be+using+this+app+again.+there+is+no+point+if+the+local+store+is+this+inept.+%F0%9F%91%8E%F0%9F%91%8E%F0%9F%91%8E
#!pip install torch==1.10.2+cu102 torchvision==0.11.3+cu102 torchaudio===0.10.2+cu102 -f https://download.pytorch.org/whl/cu102/torch_stable.html
#!pip install transformers
#!pip install torch
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import AutoModel, TFAutoModel
from transformers import pipeline
#import numpy as np
import scipy
from scipy.special import softmax
import csv
import urllib.request

# Import data directly from the Google Play Store using the google-play-scraper

In [2]:
# reviews_all grabs all reviews, but there is a limit of 19K - so you will need to set limits by using the "count" argument for apps that have more than that
# https://github.com/JoMingyu/google-play-scraper
# the ,token needs to be used when using reviews, but not reviews all: https://python.plainenglish.io/scraping-storing-google-play-app-reviews-with-python-5640c933c476
android_reviews, token = reviews(
    'com.dd.doordash',
    #sleep_milliseconds=0, # defaults to 0
    lang='en', # defaults to 'en'
    country='us', # defaults to 'us'
    sort=Sort.NEWEST,# defaults to Sort.MOST_RELEVANT
    count=1000, #limits number of reviews pulled
    continuation_token=None
)

android_reviews

# # put reviews into a df 
# # https://www.linkedin.com/pulse/how-scrape-google-play-reviews-4-simple-steps-using-python-kundi/
androiddf = pd.DataFrame(np.array(android_reviews),columns=['review'])
androiddf = androiddf.join(pd.DataFrame(androiddf.pop('review').tolist()))
androiddf.head()

# select and rename only df columns we want to keep 
#list(androiddf.columns)

# select columns
androiddf = androiddf[['reviewId',
                       'userName',
                       'at',
                       'score',
                       'reviewCreatedVersion',
                       'userImage', # title field does not exist in android data. pulled in image as placeholder
                       'content']]



# rename columns
androiddf.rename(columns={'reviewId': 'URI', 
                      'userName': 'Author Username', 
                      'at': 'Date Review Submitted',
                     'score': 'Overall App Star Rating',
                     'reviewCreatedVersion': 'App Release Version',
                     'userImage': 'Review Title', # DO NOT USE title field does not exist in android data. pulled in image as placeholder
                     'content': 'Text'}, inplace=True)

androiddf.tail()
#androiddf.info()

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text
995,gp:AOqpTOEmsgBEHKIKd1QfpIhoBw0f0lYgpXNf-9OGdzS...,Chekesha Bailey,2022-05-12 15:58:49,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,Delicious Food!
996,gp:AOqpTOEOUNtp0VwDrJLYkXsMq8HVxPVYJa4bhYly4W5...,Nicole L.,2022-05-12 15:46:28,1,15.50.18,https://play-lh.googleusercontent.com/a-/AOh14...,Apparently they're pulling bad reviews rather ...
997,gp:AOqpTOHwdSg_Ly6eYQWp2HDi6GELX6fPZZ3lj63gMcg...,Ricky Gautam,2022-05-12 15:28:31,1,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,Even if you get a undercooked food be ready yo...
998,gp:AOqpTOFxnzeAxakwFR6mAHigXiugkeKyhteNEcfWUvU...,Kaien Castan,2022-05-12 15:05:53,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,quick and easy and your first order; no fee!
999,gp:AOqpTOEqqsGTQHArXuEzw6r-IWUmI0oL219iKY4gvYh...,Crystal Berry,2022-05-12 14:46:40,5,,https://play-lh.googleusercontent.com/a-/AOh14...,Have used this app multiple times in many diff...


# Prepare the text strings and create additional date columns

In [3]:
# rename the df
reviewsdf=androiddf

# convert the data type of the inquiries from float to string
reviewsdf['Text'] = reviewsdf['Text'].astype(str) 

# convert the data type of the inquiries from float to string
reviewsdf['Text'] = reviewsdf['Text'].astype(str) 

# Convert all data to lowercase. This is so the analysis does not count "The" and "the" as different words
reviewsdf['Text'] = reviewsdf['Text'].str.lower() # makes everything in this column lowercase to avoid redundancy.

# Convert date column to a date
# grab substring 
reviewsdf['Date Review Submitted'] = reviewsdf['Date Review Submitted'].astype(str).str[:10]


# Convert date to a date/time var
reviewsdf['Date Review Submitted'] =  pd.to_datetime(reviewsdf['Date Review Submitted'])

### Extract month, day, year from Date into separate columns

# Extract month.
reviewsdf['Month'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).month

# Extract day.
reviewsdf['Day_of_Month'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).day

# Extract year.
reviewsdf['Year'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).year

# Extract quarter.
reviewsdf['Quarter'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).quarter

# extract week day
reviewsdf['Day of Week'] = reviewsdf['Date Review Submitted'].apply(lambda date: calendar.day_name[date.weekday()]) 

# extract if date falls on a weekend or not. Is the day saturday or sunday - could possibly include friday as weekend.
reviewsdf['Is_Weekend']= np.where((reviewsdf['Day of Week'] == "Saturday")| (reviewsdf['Day of Week'] == "Sunday"), 1, 0)

# only keep past month of reviews
#from datetime import date
#from dateutil.relativedelta import relativedelta
#n = 1
#reviewsdf['pastmonth'] = date.today() - relativedelta(months=n)
#print(pastmonth)

# delete rows where the Date Review Submitted is more than a month old
#reviewsdf = reviewsdf[(reviewsdf['Date Review Submitted'] > reviewsdf['pastmonth'])]

# rename df
df = reviewsdf.copy()
df.tail(10)
#df.dtypes

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend
990,gp:AOqpTOE4aPjHWoeNT4Eha0b-1KupCjuMSHP5hgpGCNM...,Star Lit Oracle,2022-05-12,1,,https://play-lh.googleusercontent.com/a-/AOh14...,awful. spent nearly 2 hours over 2 days trying...,5,12,2022,2,Thursday,0
991,gp:AOqpTOGYxKM1yEXyzcz0Xv0spjhvEv_2LdurTPwaJkK...,Katie Scott,2022-05-12,1,15.51.14,https://play-lh.googleusercontent.com/a/AATXAJ...,multiple lawsuits!!! avoid at all costs! food ...,5,12,2022,2,Thursday,0
992,gp:AOqpTOF7HV2tZZqWyp63VYAiClNJun57FDyBy7dakfa...,Arnetta Gipson,2022-05-12,4,15.51.14,https://play-lh.googleusercontent.com/a/AATXAJ...,when or if i have any trouble or problem doord...,5,12,2022,2,Thursday,0
993,gp:AOqpTOFvCi1XPZ7XZIia5-SY34_rvk_U0qIhVhUxJlH...,Gordon Mark Martin,2022-05-12,1,15.50.18,https://play-lh.googleusercontent.com/a/AATXAJ...,horrible... it is relentess and taken over my ...,5,12,2022,2,Thursday,0
994,gp:AOqpTOGPKXWfEwMecZRCHX1f4Hn-Dp1uWt0R4ELL_Vg...,Mandy Wiley,2022-05-12,5,15.49.15,https://play-lh.googleusercontent.com/a-/AOh14...,i always use door dash. i love it,5,12,2022,2,Thursday,0
995,gp:AOqpTOEmsgBEHKIKd1QfpIhoBw0f0lYgpXNf-9OGdzS...,Chekesha Bailey,2022-05-12,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,delicious food!,5,12,2022,2,Thursday,0
996,gp:AOqpTOEOUNtp0VwDrJLYkXsMq8HVxPVYJa4bhYly4W5...,Nicole L.,2022-05-12,1,15.50.18,https://play-lh.googleusercontent.com/a-/AOh14...,apparently they're pulling bad reviews rather ...,5,12,2022,2,Thursday,0
997,gp:AOqpTOHwdSg_Ly6eYQWp2HDi6GELX6fPZZ3lj63gMcg...,Ricky Gautam,2022-05-12,1,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,even if you get a undercooked food be ready yo...,5,12,2022,2,Thursday,0
998,gp:AOqpTOFxnzeAxakwFR6mAHigXiugkeKyhteNEcfWUvU...,Kaien Castan,2022-05-12,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,quick and easy and your first order; no fee!,5,12,2022,2,Thursday,0
999,gp:AOqpTOEqqsGTQHArXuEzw6r-IWUmI0oL219iKY4gvYh...,Crystal Berry,2022-05-12,5,,https://play-lh.googleusercontent.com/a-/AOh14...,have used this app multiple times in many diff...,5,12,2022,2,Thursday,0


# Drop rows that exceed the maximum character limit for our model

In [4]:
# get the length of the df
reviewsdf['length'] = reviewsdf['Text'].apply(lambda x: len(x))
reviewsdf = reviewsdf.sort_values(by=['length'], ascending=False)
#reviewsdf.head(50)


# drop rows that have a length of more than 512 characters
# this model cannot handle reviews with more than 512 characters: https://github.com/huggingface/transformers/issues/1791
reviewsdf.drop(reviewsdf[reviewsdf['length'] >= 512].index, inplace = True)

# drop the entire length column - we don't need it anymore
reviewsdf.drop('length', axis=1, inplace=True)
reviewsdf.shape
reviewsdf.tail(20)

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend
594,gp:AOqpTOE2HgFuUjSeX_qstZB1A3ahWczpTiIx29A95XU...,Tonix_Storm YT,2022-05-13,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,good,5,13,2022,2,Friday,0
603,gp:AOqpTOE0TUIElK2RPpzaEZseDx3FziKc8eZZIYwbPkw...,Gustavho Maxima,2022-05-13,5,15.51.14,https://play-lh.googleusercontent.com/a/AATXAJ...,good,5,13,2022,2,Friday,0
362,gp:AOqpTOEk5OUE_rCEkC1uhHP2G9UflBWtnj3wYsU_YEx...,Jade Foglesong,2022-05-14,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,food,5,14,2022,2,Saturday,1
103,gp:AOqpTOHwmR1nl2QAd5EHTz9V_fZkDM8Osuy7IMMldnW...,Certified Nick,2022-05-16,1,15.52.15,https://play-lh.googleusercontent.com/a-/AOh14...,cool,5,16,2022,2,Monday,0
694,gp:AOqpTOFYka8u8Um7YmEQ9ta-8lrTdgdShxnVVMFrp8w...,Lennet Adenekan,2022-05-13,5,15.51.14,https://play-lh.googleusercontent.com/a/AATXAJ...,good,5,13,2022,2,Friday,0
476,gp:AOqpTOFs7MWY8zk6nJdFIax084lyPceZ_W7gOOx3MYs...,Shala Baptise,2022-05-14,4,15.51.14,https://play-lh.googleusercontent.com/a/AATXAJ...,nice,5,14,2022,2,Saturday,1
87,gp:AOqpTOEwx0JjWzdD4nbBtEzak2I1LeLHZEcnmr7KuvP...,Jacqueline Gabriel,2022-05-16,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,yaya,5,16,2022,2,Monday,0
527,gp:AOqpTOEKp1kb3E13BcVlqcoEpHGkbOMHjfN8wOTsQeD...,Nareshbabu,2022-05-14,5,15.51.14,https://play-lh.googleusercontent.com/a/AATXAJ...,nice,5,14,2022,2,Saturday,1
86,gp:AOqpTOEm07Aap3027__-jxc5ygwayemMfh-wMbS5Ots...,Luis Chavez,2022-05-16,5,15.51.14,https://play-lh.googleusercontent.com/a/AATXAJ...,easy,5,16,2022,2,Monday,0
548,gp:AOqpTOFS5Lf1hlOvlCQlplXNEKnPjhfUGtOyJbWRdqc...,Fishing the Bow River,2022-05-13,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,good,5,13,2022,2,Friday,0


# Classify each review based on a given emotion: Joy, Sadness, Anger, Surprise, Neutral, or Fear.

In [5]:
# Learn more about the model used here: https://huggingface.co/j-hartmann/emotion-english-distilroberta-base

In [6]:
classifier = pipeline("sentiment-analysis", model="j-hartmann/emotion-english-distilroberta-base")
#classifier("i hate you")

In [7]:
def classifier_emotion(text):
  classifier_results = classifier(text)
  return classifier_results[0]['label']


def classifier_score(text):
  classifier_results = classifier(text)
  return classifier_results[0]['score']

In [8]:
df['Emotion'] = df['Text'].apply(lambda x: classifier_emotion(x))
df['Score'] = df['Text'].apply(lambda x: classifier_score(x))

#convert the emotion column to title case
df['Emotion'] = df['Emotion'].str.title()
df

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend,Emotion,Score
0,gp:AOqpTOFHfAAU3noc9m2fMSqhLIeaaeuegnPeGXvazCh...,Destiny Villarreal,2022-05-17,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,thk,5,17,2022,2,Tuesday,0,Neutral,0.676139
1,gp:AOqpTOHhO0UiIxNV2BD3pyuksbvKnVfabvxqMcRFgPX...,Just,2022-05-17,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,fast and easy,5,17,2022,2,Tuesday,0,Neutral,0.872316
2,gp:AOqpTOE48t9op4vpF6QJp-haauuToBBwr1wX8l4XAS3...,Fire and Ice,2022-05-17,1,15.52.15,https://play-lh.googleusercontent.com/a/AATXAJ...,would give doordash 0 stars if i could. they w...,5,17,2022,2,Tuesday,0,Anger,0.791225
3,gp:AOqpTOFyiT4AfA4szX39SL6LwFA6HTuOaGUF-UpPani...,Tana Ohrman,2022-05-17,2,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,not reliable nearly every time i order somethi...,5,17,2022,2,Tuesday,0,Sadness,0.583994
4,gp:AOqpTOHwCzgcveOtg2a8VNiZURkgmk-sSdOgXKz6hOU...,Wes Reagan,2022-05-17,5,15.41.8,https://play-lh.googleusercontent.com/a-/AOh14...,convenient af,5,17,2022,2,Tuesday,0,Neutral,0.912353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,gp:AOqpTOEmsgBEHKIKd1QfpIhoBw0f0lYgpXNf-9OGdzS...,Chekesha Bailey,2022-05-12,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,delicious food!,5,12,2022,2,Thursday,0,Joy,0.954445
996,gp:AOqpTOEOUNtp0VwDrJLYkXsMq8HVxPVYJa4bhYly4W5...,Nicole L.,2022-05-12,1,15.50.18,https://play-lh.googleusercontent.com/a-/AOh14...,apparently they're pulling bad reviews rather ...,5,12,2022,2,Thursday,0,Neutral,0.316866
997,gp:AOqpTOHwdSg_Ly6eYQWp2HDi6GELX6fPZZ3lj63gMcg...,Ricky Gautam,2022-05-12,1,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,even if you get a undercooked food be ready yo...,5,12,2022,2,Thursday,0,Sadness,0.954947
998,gp:AOqpTOFxnzeAxakwFR6mAHigXiugkeKyhteNEcfWUvU...,Kaien Castan,2022-05-12,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,quick and easy and your first order; no fee!,5,12,2022,2,Thursday,0,Neutral,0.426973


In [9]:
# show which operating system the review comes from 
df['OS'] = np.where(df.URI.str.startswith('https://itunes.apple.com'), 'Apple', 'Android')
df.head()

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend,Emotion,Score,OS
0,gp:AOqpTOFHfAAU3noc9m2fMSqhLIeaaeuegnPeGXvazCh...,Destiny Villarreal,2022-05-17,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,thk,5,17,2022,2,Tuesday,0,Neutral,0.676139,Android
1,gp:AOqpTOHhO0UiIxNV2BD3pyuksbvKnVfabvxqMcRFgPX...,Just,2022-05-17,5,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,fast and easy,5,17,2022,2,Tuesday,0,Neutral,0.872316,Android
2,gp:AOqpTOE48t9op4vpF6QJp-haauuToBBwr1wX8l4XAS3...,Fire and Ice,2022-05-17,1,15.52.15,https://play-lh.googleusercontent.com/a/AATXAJ...,would give doordash 0 stars if i could. they w...,5,17,2022,2,Tuesday,0,Anger,0.791225,Android
3,gp:AOqpTOFyiT4AfA4szX39SL6LwFA6HTuOaGUF-UpPani...,Tana Ohrman,2022-05-17,2,15.51.14,https://play-lh.googleusercontent.com/a-/AOh14...,not reliable nearly every time i order somethi...,5,17,2022,2,Tuesday,0,Sadness,0.583994,Android
4,gp:AOqpTOHwCzgcveOtg2a8VNiZURkgmk-sSdOgXKz6hOU...,Wes Reagan,2022-05-17,5,15.41.8,https://play-lh.googleusercontent.com/a-/AOh14...,convenient af,5,17,2022,2,Tuesday,0,Neutral,0.912353,Android


# Change data types and drop dupilcate rows before going back into domo

In [10]:
# changes data types before going back into domo
df['Overall App Star Rating']= df['Overall App Star Rating'].astype('float')
df['Overall App Star Rating']= df['Overall App Star Rating'].astype('int64')

In [11]:
# remove duplicate reviews. for some reason, a few app store reviews have duplicated
df = df.drop_duplicates()

In [12]:
df.dtypes

URI                                object
Author Username                    object
Date Review Submitted      datetime64[ns]
Overall App Star Rating             int64
App Release Version                object
Review Title                       object
Text                               object
Month                               int64
Day_of_Month                        int64
Year                                int64
Quarter                             int64
Day of Week                        object
Is_Weekend                          int64
Emotion                            object
Score                             float64
OS                                 object
dtype: object

# Write data back to domo

In [13]:
# instructions on setting up a scheduled refresh: https://domopalooza2022.brandlive.com/home/en/session/10a98144-9651-11ec-95c3-9349e7013cba
domo.write_dataframe(df, 'DoorDash_Reviews.ipynb')