# DoorDash Customer Experience

### The goal of this code is to extract DoorDash customer reviews about the DoorDash Ordering App from the Google Play Store (Android) and App Stores (Apple)
### This code powers the Domo dashboard located at:
### Created by Kendall Ruber

# Import packages

In [1]:
#  you may need to run these in the terminal :
# pip install google_play_scraper
# pip install authlib
# pip install holidays
# pip install scipy
###### from  https://ipywidgets.readthedocs.io/en/stable/user_install.html
# conda install -c conda-forge ipywidgets
#jupyter nbextension enable --py widgetsnbextension
# conda install -n base -c conda-forge widgetsnbextension
# conda install -n base -c conda-forge jupyterlab_widgets
# conda install -n base -c conda-forge ipywidgets
# conda install pytorch torchvision -c pytorch

#!pip install google_play_scraper --user
import google_play_scraper
from google_play_scraper import app, Sort, reviews #reviews_all can also be used instead of reviews, but beware - has a limit of 19K

# domo
import domojupyter as domo 

import ray 
ray.shutdown()
ray.init(object_store_memory=5000000000)

#pandas
import modin.pandas as pd
from pandas import json_normalize

#!pip install requests
import requests
import time
import json

#!pip install authlib
import authlib
from authlib.jose import jwt

import os
os.getcwd()

# date manipulations
from datetime import datetime
import calendar as calendar
#!pip install holidays
import holidays

import matplotlib.pyplot as plt
import numpy as np

# Tutorial: https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment?text=1st+time+using+the+app...+everything+seemed+to+be+working+fine.+when+the+app+said+my+order+was+ready%2C+i+drove+to+the+restaurant.+when+i+got+there%2C+they+said+they+were+taking+care+of+the+drive+through+orders+1st.+well%2C+thats+fine%2C+but+the+app+said+my+order+was+ready%3F+ready+is+ready...+right%3F%3F%3F+why+did+i+have+to+wait%3F+i+still+had+to+wait+over+20+minutes+to+get+the+order+filled.+i+don%27t+think+i+will+be+using+this+app+again.+there+is+no+point+if+the+local+store+is+this+inept.+%F0%9F%91%8E%F0%9F%91%8E%F0%9F%91%8E
#!pip install torch==1.10.2+cu102 torchvision==0.11.3+cu102 torchaudio===0.10.2+cu102 -f https://download.pytorch.org/whl/cu102/torch_stable.html
#!pip install transformers
#!pip install torch
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import AutoModel, TFAutoModel
from transformers import pipeline
#import numpy as np
import scipy
from scipy.special import softmax
import csv
import urllib.request

A newer version of domojupyter is available. Update domojupyter with the following command:

	conda update -c https://domo-conda-prod.s3.amazonaws.com/domo domojupyter


2022-10-22 20:45:37,840	INFO services.py:1456 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [2]:
# print output of packages installations with specific versions
!pip list --format=freeze

[0maiohttp==3.8.1
aiohttp-cors==0.7.0
aiosignal==1.2.0
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
astroid==2.6.6
asttokens==2.0.5
async-timeout==4.0.2
attrs==21.4.0
Authlib==1.0.1
autopep8==1.5.7
backcall==0.2.0
backports.functools-lru-cache==1.6.4
beautifulsoup4==4.11.1
bleach==5.0.0
blessed==1.19.1
brotlipy==0.7.0
cachetools==5.2.0
certifi==2022.6.15
cffi==1.14.5
charset-normalizer==2.0.12
click==8.1.2
colorama==0.4.5
colorful==0.5.4
conda==4.10.3
conda-package-handling==1.8.1
convertdate==2.4.0
cryptography==37.0.2
cycler==0.11.0
debugpy==1.6.0
decorator==5.1.1
defusedxml==0.7.1
distlib==0.3.5
domojupyter==0.4.3
domophoenix==1.0.0
entrypoints==0.4
executing==0.8.3
fastjsonschema==2.15.3
filelock==3.6.0
flake8==3.9.2
flit_core==3.7.1
frozenlist==1.3.0
fsspec==2022.7.1
google-api-core==2.8.2
google-auth==2.9.1
google-play-scraper==1.0.3
googleapis-common-protos==1.56.4
gpustat==1.0.0rc1
grpcio==1.43.0
hijri-converter==2.2.3
holidays==0.13
huggingface-hub==0.5.1
idna==3.3
import

# Import data directly from the Google Play Store using the google-play-scraper

In [3]:
# reviews_all grabs all reviews, but there is a limit of 19K - so you will need to set limits by using the "count" argument for apps that have more than that
# https://github.com/JoMingyu/google-play-scraper
# the ,token needs to be used when using reviews, but not reviews all: https://python.plainenglish.io/scraping-storing-google-play-app-reviews-with-python-5640c933c476
android_reviews, token = reviews(
    'com.dd.doordash',
    #sleep_milliseconds=0, # defaults to 0
    lang='en', # defaults to 'en'
    country='us', # defaults to 'us'
    sort=Sort.NEWEST,# defaults to Sort.MOST_RELEVANT
    count=1000, #limits number of reviews pulled
    continuation_token=None
)

android_reviews

# # put reviews into a df 
# # https://www.linkedin.com/pulse/how-scrape-google-play-reviews-4-simple-steps-using-python-kundi/
androiddf = pd.DataFrame(np.array(android_reviews),columns=['review'])
androiddf = androiddf.join(pd.DataFrame(androiddf.pop('review').tolist()))
androiddf.head()

# select and rename only df columns we want to keep 
#list(androiddf.columns)

# select columns
androiddf = androiddf[['reviewId',
                       'userName',
                       'at',
                       'score',
                       'reviewCreatedVersion',
                       'userImage', # title field does not exist in android data. pulled in image as placeholder
                       'content']]



# rename columns
androiddf.rename(columns={'reviewId': 'URI', 
                      'userName': 'Author Username', 
                      'at': 'Date Review Submitted',
                     'score': 'Overall App Star Rating',
                     'reviewCreatedVersion': 'App Release Version',
                     'userImage': 'Review Title', # DO NOT USE title field does not exist in android data. pulled in image as placeholder
                     'content': 'Text'}, inplace=True)

androiddf.tail()
#androiddf.info()

Please refer to https://modin.readthedocs.io/en/stable/supported_apis/defaulting_to_pandas.html for explanation.


Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text
995,9d4effc2-2378-4df4-bfd7-7b19f9fa8ee9,Umang Malhotra,2022-10-14 01:08:55,2,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,too expensive
996,998dfa0e-63e1-4656-9db3-44de656e4ec1,Charles Webber,2022-10-14 01:08:22,1,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,"Service is absolute trash, I had a driver eith..."
997,9e1da314-ad79-4fb5-ab85-35a03b18f86e,Rebecca Grigg,2022-10-14 00:56:49,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,easy to order food.
998,00ae3afa-89a3-4048-b742-cf0bc8ce3b6f,Donna Hixson,2022-10-14 00:52:56,5,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,Convenient home delivery of food and other items.
999,9b2f2f9d-e7d8-4e5b-9a59-846493e44907,Sage Bee,2022-10-14 00:47:12,2,15.76.7,https://play-lh.googleusercontent.com/a/ALm5wu...,keeps crashing


# Prepare the text strings and create additional date columns

In [4]:
# rename the df
reviewsdf=androiddf

# convert the data type of the inquiries from float to string
reviewsdf['Text'] = reviewsdf['Text'].astype(str) 

# convert the data type of the inquiries from float to string
reviewsdf['Text'] = reviewsdf['Text'].astype(str) 

# Convert all data to lowercase. This is so the analysis does not count "The" and "the" as different words
reviewsdf['Text'] = reviewsdf['Text'].str.lower() # makes everything in this column lowercase to avoid redundancy.

# Convert date column to a date
# grab substring 
reviewsdf['Date Review Submitted'] = reviewsdf['Date Review Submitted'].astype(str).str[:10]


# Convert date to a date/time var
reviewsdf['Date Review Submitted'] =  pd.to_datetime(reviewsdf['Date Review Submitted'])

### Extract month, day, year from Date into separate columns

# Extract month.
reviewsdf['Month'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).month

# Extract day.
reviewsdf['Day_of_Month'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).day

# Extract year.
reviewsdf['Year'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).year

# Extract quarter.
reviewsdf['Quarter'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).quarter

# extract week day
reviewsdf['Day of Week'] = reviewsdf['Date Review Submitted'].apply(lambda date: calendar.day_name[date.weekday()]) 

# extract if date falls on a weekend or not. Is the day saturday or sunday - could possibly include friday as weekend.
reviewsdf['Is_Weekend']= np.where((reviewsdf['Day of Week'] == "Saturday")| (reviewsdf['Day of Week'] == "Sunday"), 1, 0)

# only keep past month of reviews
#from datetime import date
#from dateutil.relativedelta import relativedelta
#n = 1
#reviewsdf['pastmonth'] = date.today() - relativedelta(months=n)
#print(pastmonth)

# delete rows where the Date Review Submitted is more than a month old
#reviewsdf = reviewsdf[(reviewsdf['Date Review Submitted'] > reviewsdf['pastmonth'])]

# rename df
df = reviewsdf.copy()
df.tail(10)
#df.dtypes

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend
990,699126d3-3d42-4820-ad55-74e1fe5bcfb7,Shannon Burton,2022-10-14,4,15.76.7,https://play-lh.googleusercontent.com/a-/ACNPE...,if your orders jacked up they will make it rig...,10,14,2022,4,Friday,0
991,1850220d-c9a4-48e6-855e-f2d3d76b83f7,Kimberly Webb,2022-10-14,5,15.76.7,https://play-lh.googleusercontent.com/a-/ACNPE...,the company is wonderful and service they are ...,10,14,2022,4,Friday,0
992,538e3024-b3ec-460c-a3c3-192b5c1ce837,Jenn Mckinnish,2022-10-14,5,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,life saver,10,14,2022,4,Friday,0
993,8d5f2de5-0709-4e4e-a98f-a64044d157bd,Viktoriya Guchshina,2022-10-14,1,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,"doesn't support, canceling order, changing typ...",10,14,2022,4,Friday,0
994,bab8cd82-f9a1-4df5-bebd-a4e64fbbb5b2,Ivy S.,2022-10-14,2,15.76.7,https://play-lh.googleusercontent.com/a-/ACNPE...,door dash always has issues with my payment me...,10,14,2022,4,Friday,0
995,9d4effc2-2378-4df4-bfd7-7b19f9fa8ee9,Umang Malhotra,2022-10-14,2,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,too expensive,10,14,2022,4,Friday,0
996,998dfa0e-63e1-4656-9db3-44de656e4ec1,Charles Webber,2022-10-14,1,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,"service is absolute trash, i had a driver eith...",10,14,2022,4,Friday,0
997,9e1da314-ad79-4fb5-ab85-35a03b18f86e,Rebecca Grigg,2022-10-14,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,easy to order food.,10,14,2022,4,Friday,0
998,00ae3afa-89a3-4048-b742-cf0bc8ce3b6f,Donna Hixson,2022-10-14,5,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,convenient home delivery of food and other items.,10,14,2022,4,Friday,0
999,9b2f2f9d-e7d8-4e5b-9a59-846493e44907,Sage Bee,2022-10-14,2,15.76.7,https://play-lh.googleusercontent.com/a/ALm5wu...,keeps crashing,10,14,2022,4,Friday,0


# Drop rows that exceed the maximum character limit for our model

In [5]:
# get the length of the df
reviewsdf['length'] = reviewsdf['Text'].apply(lambda x: len(x))
reviewsdf = reviewsdf.sort_values(by=['length'], ascending=False)
#reviewsdf.head(50)


# drop rows that have a length of more than 512 characters
# this model cannot handle reviews with more than 512 characters: https://github.com/huggingface/transformers/issues/1791
reviewsdf.drop(reviewsdf[reviewsdf['length'] >= 512].index, inplace = True)

# drop the entire length column - we don't need it anymore
reviewsdf.drop('length', axis=1, inplace=True)
reviewsdf.shape
reviewsdf.tail(20)



Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend
182,faf3c199-2947-4d45-b732-94008407d50c,Megan Moreno,2022-10-20,5,15.76.7,https://play-lh.googleusercontent.com/a/ALm5wu...,fast,10,20,2022,4,Thursday,0
451,e31be2a8-9b9c-4b84-a5e7-4d2ea2d7f9c3,L Thomas,2022-10-17,5,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,cool,10,17,2022,4,Monday,0
930,e8ec9497-374a-4b7e-b7f7-7ae2222bd3df,Stephanie Grove,2022-10-14,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,nice,10,14,2022,4,Friday,0
423,6a927a32-e83c-49de-b6e6-94fd987f85cd,Jonny X Junior,2022-10-18,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,good,10,18,2022,4,Tuesday,0
693,9b63be92-0dfb-4fae-869b-26982938028b,Wayne Roper,2022-10-16,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,cool,10,16,2022,4,Sunday,1
723,6372e8a6-a052-4e11-a348-23ef67be6d02,Allen Moore,2022-10-15,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,👍👍👍👍,10,15,2022,4,Saturday,1
106,a3ff3124-0a2c-41e7-9889-f12783405f38,tariq wuf,2022-10-20,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,lit,10,20,2022,4,Thursday,0
4,e86463d3-1c8b-4791-8af1-b9401c958453,Johnny Shepard,2022-10-21,5,15.75.13,https://play-lh.googleusercontent.com/a/ALm5wu...,fud,10,21,2022,4,Friday,0
596,f51306c5-2ccf-47bf-81af-83b6c6aa807a,Shloimy Halberg,2022-10-16,2,15.67.13,https://play-lh.googleusercontent.com/a/ALm5wu...,2ax,10,16,2022,4,Sunday,1
491,5b476d54-274b-4387-9bda-65926500c281,Michaela Taylor,2022-10-17,1,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,bad,10,17,2022,4,Monday,0


# Classify each review based on a given emotion: Joy, Sadness, Anger, Surprise, Neutral, or Fear.

In [6]:
# Learn more about the model used here: https://huggingface.co/j-hartmann/emotion-english-distilroberta-base

In [7]:
classifier = pipeline("sentiment-analysis", model="j-hartmann/emotion-english-distilroberta-base")
#classifier("i hate you")

In [8]:
def classifier_emotion(text):
  classifier_results = classifier(text)
  return classifier_results[0]['label']


def classifier_score(text):
  classifier_results = classifier(text)
  return classifier_results[0]['score']

In [9]:
df['Emotion'] = df['Text'].apply(lambda x: classifier_emotion(x))
df['Score'] = df['Text'].apply(lambda x: classifier_score(x))

#convert the emotion column to title case
df['Emotion'] = df['Emotion'].str.title()
df

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend,Emotion,Score
0,140f3bb0-c1ce-4e0c-bb94-545d013fdfb9,Chris Crouse,2022-10-21,5,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,great service,10,21,2022,4,Friday,0,Joy,0.878444
1,7aa9a3df-d26c-4c54-b35a-c64ec1320f20,Mike James,2022-10-21,5,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,always on schedule.,10,21,2022,4,Friday,0,Neutral,0.895809
2,4e9db611-387a-4642-ba62-c5b0d3ac8c98,alizay rapholz,2022-10-21,1,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,horrible would not recommend to anyone! the ap...,10,21,2022,4,Friday,0,Fear,0.977089
3,e413f278-4edb-43cb-bb77-2f4c300e2dda,LySavion Jones,2022-10-21,5,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,loving it!,10,21,2022,4,Friday,0,Joy,0.878260
4,e86463d3-1c8b-4791-8af1-b9401c958453,Johnny Shepard,2022-10-21,5,15.75.13,https://play-lh.googleusercontent.com/a/ALm5wu...,fud,10,21,2022,4,Friday,0,Anger,0.478379
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,9d4effc2-2378-4df4-bfd7-7b19f9fa8ee9,Umang Malhotra,2022-10-14,2,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,too expensive,10,14,2022,4,Friday,0,Sadness,0.767708
996,998dfa0e-63e1-4656-9db3-44de656e4ec1,Charles Webber,2022-10-14,1,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,"service is absolute trash, i had a driver eith...",10,14,2022,4,Friday,0,Anger,0.782595
997,9e1da314-ad79-4fb5-ab85-35a03b18f86e,Rebecca Grigg,2022-10-14,5,15.77.9,https://play-lh.googleusercontent.com/a/ALm5wu...,easy to order food.,10,14,2022,4,Friday,0,Neutral,0.715969
998,00ae3afa-89a3-4048-b742-cf0bc8ce3b6f,Donna Hixson,2022-10-14,5,15.77.9,https://play-lh.googleusercontent.com/a-/ACNPE...,convenient home delivery of food and other items.,10,14,2022,4,Friday,0,Neutral,0.628952


In [10]:
# show which operating system the review comes from 
df['OS'] = np.where(df.URI.str.startswith('https://itunes.apple.com'), 'Apple', 'Android')
df.head()

Unnamed: 0,URI,Author Username,Date Review Submitted,Overall App Star Rating,App Release Version,Review Title,Text,Month,Day_of_Month,Year,Quarter,Day of Week,Is_Weekend,Emotion,Score,OS
0,140f3bb0-c1ce-4e0c-bb94-545d013fdfb9,Chris Crouse,2022-10-21,5,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,great service,10,21,2022,4,Friday,0,Joy,0.878444,Android
1,7aa9a3df-d26c-4c54-b35a-c64ec1320f20,Mike James,2022-10-21,5,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,always on schedule.,10,21,2022,4,Friday,0,Neutral,0.895809,Android
2,4e9db611-387a-4642-ba62-c5b0d3ac8c98,alizay rapholz,2022-10-21,1,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,horrible would not recommend to anyone! the ap...,10,21,2022,4,Friday,0,Fear,0.977089,Android
3,e413f278-4edb-43cb-bb77-2f4c300e2dda,LySavion Jones,2022-10-21,5,15.78.7,https://play-lh.googleusercontent.com/a/ALm5wu...,loving it!,10,21,2022,4,Friday,0,Joy,0.87826,Android
4,e86463d3-1c8b-4791-8af1-b9401c958453,Johnny Shepard,2022-10-21,5,15.75.13,https://play-lh.googleusercontent.com/a/ALm5wu...,fud,10,21,2022,4,Friday,0,Anger,0.478379,Android


# Change data types and drop dupilcate rows before going back into domo

In [11]:
# changes data types before going back into domo
df['Overall App Star Rating']= df['Overall App Star Rating'].astype('float')
df['Overall App Star Rating']= df['Overall App Star Rating'].astype('int64')

In [12]:
# remove duplicate reviews. for some reason, a few app store reviews have duplicated
df = df.drop_duplicates()

In [13]:
df.dtypes

URI                                object
Author Username                    object
Date Review Submitted      datetime64[ns]
Overall App Star Rating             int64
App Release Version                object
Review Title                       object
Text                               object
Month                               int64
Day_of_Month                        int64
Year                                int64
Quarter                             int64
Day of Week                        object
Is_Weekend                          int64
Emotion                            object
Score                             float64
OS                                 object
dtype: object

# Write data back to domo

In [14]:
# instructions on setting up a scheduled refresh: https://domopalooza2022.brandlive.com/home/en/session/10a98144-9651-11ec-95c3-9349e7013cba
domo.write_dataframe(df, 'DoorDash_Reviews.ipynb')

