In [1]:
# Import dependencies
import pandas as pd

In [2]:
# Read in vine_table CSV as DF
vine_df = pd.read_csv("Resources/vine_table.csv")
vine_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,R3VR960AHLFKDV,4,0,0,N,Y
1,R16LGVMFKIUT0G,5,0,0,N,Y
2,R1AIMEEPYHMOE4,5,1,1,N,Y
3,R1892CCSZWZ9SR,3,0,0,N,Y
4,R285P679YWVKD1,3,0,0,N,N


In [3]:
# Create new DF with "total_votes" >= 20
vine_votes_df = vine_df[vine_df["total_votes"] >= 20]
vine_votes_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
73,RL8D0KJ0J9L0O,5,152,165,N,Y
157,R1BEINAIQFBRJC,5,21,23,N,Y
190,R2L59KIJH302P9,4,26,26,N,Y
221,RR99CPG695T0I,5,215,248,N,N
237,R1XQNKKUPCMWVO,5,43,44,N,Y


In [4]:
# Create new DF with "helpful_votes" / "total_votes" >= 50%
vine_helpful_df = vine_votes_df[(vine_votes_df["helpful_votes"] / vine_votes_df["total_votes"]) >= 0.5]
vine_helpful_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
73,RL8D0KJ0J9L0O,5,152,165,N,Y
157,R1BEINAIQFBRJC,5,21,23,N,Y
190,R2L59KIJH302P9,4,26,26,N,Y
221,RR99CPG695T0I,5,215,248,N,N
237,R1XQNKKUPCMWVO,5,43,44,N,Y


In [9]:
# Create new DF with reviews written as part of Vine program
vine_yes_df = vine_helpful_df[vine_helpful_df["vine"] == "Y"]
vine_yes_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
5408,R2BQOD1R0228FN,3,17,26,Y,N
8246,RC31RUPFOHBHQ,4,102,117,Y,N
12219,REN3N1WITLF1Y,5,33,37,Y,N
13330,R71RZQ9UZVG47,4,38,47,Y,N
16942,R38NMQBH88HLM6,4,18,24,Y,N


In [10]:
# Create new DF with reviews NOT written as part of Vine program
vine_no_df = vine_helpful_df[vine_helpful_df["vine"] == "N"]
vine_no_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
73,RL8D0KJ0J9L0O,5,152,165,N,Y
157,R1BEINAIQFBRJC,5,21,23,N,Y
190,R2L59KIJH302P9,4,26,26,N,Y
221,RR99CPG695T0I,5,215,248,N,N
237,R1XQNKKUPCMWVO,5,43,44,N,Y


In [18]:
# Determine total number of reviews, number of 5-star reviews, and percentage of 5-star reviews that were part of Vine program
vine_yes_total = vine_yes_df["review_id"].count()
vine_yes_5_star = vine_yes_df[vine_yes_df["star_rating"] == 5]["review_id"].count()
vine_yes_5_star_percentage = float(vine_yes_5_star) / float(vine_yes_total) * 100

In [23]:
# Determine total number of reviews, number of 5-star reviews, and percentage of 5-star reviews that were NOT part of Vine program
vine_no_total = vine_no_df["review_id"].count()
vine_no_5_star = vine_no_df[vine_no_df["star_rating"] == 5]["review_id"].count()
vine_no_5_star_percentage = float(vine_no_5_star) / float(vine_no_total) * 100

In [28]:
# Create summary dictionary of Vine and non-Vine review data
vine_summary_dict = {
    "Vine": ["Y", "N"],
    "Total_Reviews": [vine_yes_total, vine_no_total],
    "5_Star_Reviews": [vine_yes_5_star, vine_no_5_star],
    "Percentage_5_Star_Reviews": [vine_yes_5_star_percentage, vine_no_5_star_percentage]}
print(vine_summary_dict)

{'Vine': ['Y', 'N'], 'Total_Reviews': [136, 18019], '5_Star_Reviews': [74, 8482], 'Percentage_5_Star_Reviews': [54.41176470588235, 47.072534546867196]}


In [30]:
# Create summary DF of Vine and non-Vine review data
vine_summary_df = pd.DataFrame(vine_summary_dict).set_index("Vine")
vine_summary_df

Unnamed: 0_level_0,Total_Reviews,5_Star_Reviews,Percentage_5_Star_Reviews
Vine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Y,136,74,54.411765
N,18019,8482,47.072535
