### Load the Vine Table CSV file into a DataFrame

In [11]:
# Import your dependencies.
import pandas as pd

In [12]:
# Read in the vine table csv file. 
vine_df = pd.read_csv("vine_table.csv")
vine_df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,R8EWA1OFT84NX,5,0,0,N,Y
1,R2JWY4YRQD4FOP,5,0,0,N,N
2,RL5ESX231LZ0B,5,0,0,N,Y
3,RRMS9ZWJ2KD08,5,0,0,N,Y
4,R14I3ZG5E6S7YM,5,0,0,N,Y
...,...,...,...,...,...,...
1752927,R7W6TPBOZ7WJZ,5,2,2,N,N
1752928,R1Y4I7KYF7Y5MH,5,0,0,N,N
1752929,R35M9WTWVWMLX9,5,27,27,N,N
1752930,R2LUF1DDNNRNAJ,5,18,18,N,N


In [13]:
# Check the datatypes
vine_df.dtypes

review_id            object
star_rating           int64
helpful_votes         int64
total_votes           int64
vine                 object
verified_purchase    object
dtype: object

### Filter by votes

In [14]:
# Create a new DataFrame that retrieves all the rows where the total votes is equal to or greater than 20.
df1 = vine_df.loc[vine_df["total_votes"] >= 20]
df1

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
591,ROI00QN8IS49I,5,83,91,N,N
1122,R13C5INE1RTZP6,2,26,26,N,Y
1311,RXVMMXCL67MZN,4,378,383,N,Y
1837,R33JJQWAUYBKD3,5,270,280,N,Y
1954,R3N0XV9267NOXV,5,41,47,Y,N
...,...,...,...,...,...,...
1752918,RYN307NME6KX0,5,22,22,N,N
1752919,R1TOGWW745RP5X,5,109,110,N,N
1752922,RWZESJ0UGDQDG,4,34,34,N,N
1752925,RTEMVUI94ORI0,5,19,20,N,N


In [15]:
#  Create a new DataFrame that retrieves all the rows where 
# the number of helpful votes divided by total votes is equal to or greater than 0.5
df2 = df1.loc[(df1["helpful_votes"]/df1["total_votes"] >= 0.5)]
df2

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
591,ROI00QN8IS49I,5,83,91,N,N
1122,R13C5INE1RTZP6,2,26,26,N,Y
1311,RXVMMXCL67MZN,4,378,383,N,Y
1837,R33JJQWAUYBKD3,5,270,280,N,Y
1954,R3N0XV9267NOXV,5,41,47,Y,N
...,...,...,...,...,...,...
1752918,RYN307NME6KX0,5,22,22,N,N
1752919,R1TOGWW745RP5X,5,109,110,N,N
1752922,RWZESJ0UGDQDG,4,34,34,N,N
1752925,RTEMVUI94ORI0,5,19,20,N,N


### Analyze the Vine Reviews

In [16]:
# Create a DataFrame that retrieves all the rows where a review was written as part of the Vine program (vine == Y).
paid_df = df2.loc[df2["vine"]=="Y"]
paid_df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
1954,R3N0XV9267NOXV,5,41,47,Y,N
5736,RSA6JQ346JZHZ,5,55,64,Y,Y
7385,R1FXF4HRMCLG4C,5,69,84,Y,N
7883,RCTBWC3II42MG,4,113,117,Y,N
24464,RTMQM2CQ1XIZ0,5,21,25,Y,N
...,...,...,...,...,...,...
1636787,RFFBMMET74TBB,5,27,30,Y,N
1637905,R19G60JWE86BRS,3,31,35,Y,N
1638433,R36L3YS2VNQUAB,4,163,175,Y,N
1639054,R1PSNB760HL99L,3,205,216,Y,N


In [17]:
# Create a DataFrame that retrieves all the rows where a review wasn't written as part of the Vine program (vine == N).
unpaid_df = df2.loc[df2["vine"]=="N"]
unpaid_df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
591,ROI00QN8IS49I,5,83,91,N,N
1122,R13C5INE1RTZP6,2,26,26,N,Y
1311,RXVMMXCL67MZN,4,378,383,N,Y
1837,R33JJQWAUYBKD3,5,270,280,N,Y
2056,R31HQD6YXSQV1W,3,40,48,N,Y
...,...,...,...,...,...,...
1752918,RYN307NME6KX0,5,22,22,N,N
1752919,R1TOGWW745RP5X,5,109,110,N,N
1752922,RWZESJ0UGDQDG,4,34,34,N,N
1752925,RTEMVUI94ORI0,5,19,20,N,N


### Determine the percentage of five-star reviews among Vine reviews

In [18]:
# Retrieve the number of 5 star ratings from the DataFrame that has a written review.
paid_five_star_number = paid_df.loc[(paid_df['star_rating']== 5)]["star_rating"].count()

# Retrieve the total number of star ratings from the DataFrame that has a written review.
paid_number = paid_df["star_rating"].count()

# Calculate the percentage of five star reviews.
percentage_five_star_vine = paid_five_star_number / paid_number * 100

# Print the results. 
print(paid_number)
print(paid_five_star_number)
print(percentage_five_star_vine)

463
202
43.628509719222464


### Determine the percentage of five-star reviews among non-Vine reviews

In [19]:
# Retrieve the number of 5 star ratings from the DataFrame that doesn't have a written review.
unpaid_five_star_number = unpaid_df.loc[(unpaid_df['star_rating']== 5)]["star_rating"].count()

# Retrieve the total number of star ratings from the DataFrame that doesn't have a written review.
unpaid_number = unpaid_df["star_rating"].count()

# Calculate the percentage of five star reviews.
percentage_five_star_non_vine = unpaid_five_star_number / unpaid_number * 100

# Print the results. 
print(unpaid_number)
print(unpaid_five_star_number)
print(percentage_five_star_non_vine)

25094
12033
47.95170160197657
