In [1]:
import pandas as pd

In [3]:
vine_df = pd.read_csv("vine_table_export.csv")
vine_df.head()
vine_df.describe()

Unnamed: 0,star_rating,helpful_votes,total_votes
count,569999.0,569999.0,569999.0
mean,4.256569,1.63282,2.107658
std,1.218425,11.668592,12.424752
min,1.0,0.0,0.0
25%,4.0,0.0,0.0
50%,5.0,0.0,0.0
75%,5.0,1.0,2.0
max,5.0,3894.0,4038.0


In [6]:
# Filter the data and create a new DF to retrieve all rows where total_votes count is >= 20
at_least_20_total_votes = vine_df.loc[vine_df['total_votes'] >=20]
at_least_20_total_votes.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
28,R2243Y3OD8U6KQ,5.0,47.0,61.0,N,N
85,R2TGT0CDTCAAHW,5.0,21.0,23.0,N,Y
457,RX4D22YSXEF4P,1.0,37.0,38.0,N,Y
1028,R3FL2NTLFUSPTQ,5.0,33.0,37.0,N,N
1039,R3QTP3YNZXAPPF,3.0,23.0,24.0,N,Y


In [8]:
# Filter the DF created in the previous step and create a new DF to retrieve all rows 
# where # of helpful votes divided by the # of total votes is >= 50%
helpful_votes_pct = at_least_20_total_votes.loc[at_least_20_total_votes['helpful_votes']/
                                                at_least_20_total_votes['total_votes'] >= 0.5]
helpful_votes_pct.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
28,R2243Y3OD8U6KQ,5.0,47.0,61.0,N,N
85,R2TGT0CDTCAAHW,5.0,21.0,23.0,N,Y
457,RX4D22YSXEF4P,1.0,37.0,38.0,N,Y
1028,R3FL2NTLFUSPTQ,5.0,33.0,37.0,N,N
1039,R3QTP3YNZXAPPF,3.0,23.0,24.0,N,Y


In [9]:
# Filter previous DF and create a table that retrieves all rows 
# where a review was written as part of the Vine program (paid)
vine_yes = helpful_votes_pct.loc[helpful_votes_pct['vine'] == "Y"]
vine_yes.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
4229,R1R9RU7JW0MFR2,4.0,20.0,23.0,Y,N
9120,R19EFYNN3W8Q07,5.0,26.0,32.0,Y,N
34704,R34DJ1R8AEU0SG,5.0,29.0,35.0,Y,N
38510,R25P5CXK5L9RHF,5.0,146.0,161.0,Y,N
49330,R2E9VZB3I4LSN5,5.0,55.0,59.0,Y,N


In [36]:
# Filter previous DF and create a table that retrieves all rows 
# where a review was not written as part of the Vine program (unpaid)
vine_no = helpful_votes_pct.loc[helpful_votes_pct['vine'] == "N"]
vine_no.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
28,R2243Y3OD8U6KQ,5.0,47.0,61.0,N,N
85,R2TGT0CDTCAAHW,5.0,21.0,23.0,N,Y
457,RX4D22YSXEF4P,1.0,37.0,38.0,N,Y
1028,R3FL2NTLFUSPTQ,5.0,33.0,37.0,N,N
1039,R3QTP3YNZXAPPF,3.0,23.0,24.0,N,Y


In [14]:
# Determine the total number of reviews, 
total_reviews = len(helpful_votes_pct)
total_reviews

7834

In [21]:
# total number of 5-star reviews

five_star_total = helpful_votes_pct['star_rating'].value_counts()[5.0]
print(f'The total number of five-star reviews is {five_star_total}.')

The total number of five-star reviews is 4499.


In [52]:
# Total number of paid versus unpaid reviews
vine_yes_total = len(vine_yes)
vine_no_total = len(vine_no)
print(f'Total number of reviews part of the Vine program (paid): {vine_yes_total}')
print(f'Total number of reviews not part of the Vine program (unpaid): {vine_no_total}')

Total number of reviews part of the Vine program (paid): 43
Total number of reviews not part of the Vine program (unpaid): 7791


In [48]:
# Total number of 5-star reviews for the two types of review (paid vs unpaid).
paid_five_star_total = vine_yes['star_rating'].value_counts()[5.0]
unpaid_five_star_total = vine_no['star_rating'].value_counts()[5.0]


print(f'Out of {vine_yes_total} paid reviews, {paid_five_star_total} received 5 stars.')
print(f'Out of {vine_no_total} unpaid reviews, {unpaid_five_star_total} received 5 stars.')


Out of 43 paid reviews, 24 received 5 stars.
Out of 7791 unpaid reviews, 4475 received 5 stars.


In [45]:
# Percentage of reviews that received 5 stars, for paid vs unpaid
paid_pct = ((paid_five_star_total / vine_yes_total)*100).round(1)
unpaid_pct = ((unpaid_five_star_total / vine_no_total)*100).round(1)

print(f'{paid_pct}% of all paid reviews received 5 stars.')
print(f'{unpaid_pct}% of all unpaid reviews received 5 stars.')

55.8% of all paid reviews received 5 stars.
57.4% of all unpaid reviews received 5 stars.
