In [1]:
# Import the required libraries
import pandas as pd
import numpy as np
import scipy.stats as scs
import matplotlib.pyplot as plt
import matplotlib.colors
import seaborn as sns

%matplotlib inline

In [2]:
# Read in the collected and prepared data
cookpad_reviews_df = pd.read_csv('../data/cookpad_recent_reviews.csv')
cookpad_reviews_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4276 entries, 0 to 4275
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   reviewId              4276 non-null   object
 1   userName              4276 non-null   object
 2   content               4276 non-null   object
 3   score                 4276 non-null   int64 
 4   reviewCreatedVersion  4276 non-null   object
 5   reviewTimestamp       4276 non-null   object
 6   replyContent          20 non-null     object
 7   repliedAt             20 non-null     object
 8   reviewYear            4276 non-null   int64 
 9   reviewDateString      4276 non-null   object
dtypes: int64(2), object(8)
memory usage: 334.2+ KB


In [3]:
# select the reviews within our desired timeframe 24 month timespan (1 January 2019 - 31 December 2021)
reviews_df = cookpad_reviews_df[(cookpad_reviews_df['reviewTimestamp'] >= '2019-01-01') & 
                                (cookpad_reviews_df['reviewTimestamp'] <=  '2020-12-31')]
reviews_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3943 entries, 333 to 4275
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   reviewId              3943 non-null   object
 1   userName              3943 non-null   object
 2   content               3943 non-null   object
 3   score                 3943 non-null   int64 
 4   reviewCreatedVersion  3943 non-null   object
 5   reviewTimestamp       3943 non-null   object
 6   replyContent          19 non-null     object
 7   repliedAt             19 non-null     object
 8   reviewYear            3943 non-null   int64 
 9   reviewDateString      3943 non-null   object
dtypes: int64(2), object(8)
memory usage: 338.9+ KB


In [4]:
print('min review time')
print(reviews_df['reviewTimestamp'].min())

print('max review time')
print(reviews_df['reviewTimestamp'].max())

min review time
2019-01-01 04:27:45
max review time
2020-12-30 23:31:13


In [5]:
low_ratings_by_version = reviews_df[reviews_df['score'] < 5].groupby(['reviewCreatedVersion'])['reviewId'].count()
high_ratings_by_version = reviews_df[reviews_df['score'] == 5 ].groupby(['reviewCreatedVersion'])['reviewId'].count()

low_ratings_versions = low_ratings_by_version.index
high_ratings_versions = high_ratings_by_version.index

low_ratings_nparray = np.array(low_ratings_by_version.index)
high_ratings_nparray = np.array(high_ratings_by_version.index)

In [7]:
print('Number of Versions in Lower ratings(<5 stars): ' + str(len(low_ratings_by_version)))
print('Number of Versions in 5 star ratings: ' + str(len(high_ratings_by_version)))

only_in_low = np.setdiff1d(low_ratings_nparray, np.array(high_ratings_nparray))
print('Number of Versions found ONLY in Lower ratings (>3 stars): ' + str(len(only_in_low)))

only_in_high = np.setdiff1d(np.array(high_ratings_nparray), low_ratings_nparray)
print('Number of Versions found ONLY in 5 star ratings (>3 stars): ' + str(len(only_in_high)))

Number of Versions in Lower ratings(<5 stars): 128
Number of Versions in 5 star ratings: 189
Number of Versions found ONLY in Lower ratings (>3 stars): 17
Number of Versions found ONLY in 5 star ratings (>3 stars): 78


In [8]:
print(only_in_low)

['1.38.1.0-android' '1.43.0.0-android' '1.9.0.1-android'
 '2.109.0.0-android' '2.11' '2.112.0.0-android' '2.12.0.0-android'
 '2.130.0.0-android' '2.132.2.0-android' '2.148.0.0-android'
 '2.167.0.0-android' '2.2.0.0-android' '2.72.0.0-android' '3.25' '4.20'
 '6.47' '7.02']


In [9]:
print(only_in_high)

['2.100.0.0-android' '2.104.0.0-android' '2.104.1.0-android'
 '2.104.3.0-android' '2.105.0.0-android' '2.108.0.0-android'
 '2.108.1.0-android' '2.110.0.0-android' '2.111.1.0-android'
 '2.114.0.0-android' '2.116.1.0-android' '2.119.0.0-android'
 '2.122.0.0-android' '2.128.1.0-android' '2.129.0.0-android'
 '2.131.0.0-android' '2.132.1.0-android' '2.134.1.0-android'
 '2.135.1.0-android' '2.135.2.0-android' '2.137.0.0-android'
 '2.138.0.0-android' '2.138.1.0-android' '2.140.0.0-android'
 '2.147.0.0-android' '2.149.0.0-android' '2.151.0.0-android'
 '2.154.0.0-android' '2.155.1.0-android' '2.155.2.0-android'
 '2.156.1.0-android' '2.158.0.0-android' '2.162.2.0-android'
 '2.163.1.0-android' '2.163.2.0-android' '2.164.1.0-android'
 '2.166.0.0-android' '2.169.0.0-android' '2.17.0.0-android'
 '2.174.0.0-android' '2.176.1.0-android' '2.179.0.0-android'
 '2.180.0.0-android' '2.182.0.0-android' '2.22.2.0-android'
 '2.40.1.0-android' '2.42.0.0-android' '2.46.0.0-android'
 '2.48.2.0-android' '2.50.0.0