In [1]:
#import dependencies
import pandas as pd
import numpy as np
import os
import csv
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 
analyzer = SentimentIntensityAnalyzer()


In [2]:
#read csv files into dataframes
csv_path = "google-amazon-facebook-employee-reviews/employee_reviews.csv"
company_reviews = pd.read_csv(csv_path)
clean_df = company_reviews.drop("Unnamed: 0", axis = 1)

#delete the unnamed column and set index
clean_df.reset_index()

#see if there are any missing columns
# clean_df.dtypes

#decided to remove the rows with missing columns
company_reviews_clean = clean_df.dropna(how="any")
company_reviews_clean.head()

Unnamed: 0,company,location,dates,job-title,summary,pros,cons,advice-to-mgmt,overall-ratings,work-balance-stars,culture-values-stars,carrer-opportunities-stars,comp-benefit-stars,senior-mangemnet-stars,helpful-count,link
0,google,none,"Dec 11, 2018",Current Employee - Anonymous Employee,Best Company to work for,People are smart and friendly,Bureaucracy is slowing things down,none,5.0,4.0,5.0,5.0,4.0,5.0,0,https://www.glassdoor.com/Reviews/Google-Revie...
1,google,"Mountain View, CA","Jun 21, 2013",Former Employee - Program Manager,"Moving at the speed of light, burn out is inev...","1) Food, food, food. 15+ cafes on main campus ...",1) Work/life balance. What balance? All those ...,1) Don't dismiss emotional intelligence and ad...,4.0,2.0,3.0,3.0,5.0,3.0,2094,https://www.glassdoor.com/Reviews/Google-Revie...
2,google,"New York, NY","May 10, 2014",Current Employee - Software Engineer III,Great balance between big-company security and...,"* If you're a software engineer, you're among ...","* It *is* becoming larger, and with it comes g...",Keep the focus on the user. Everything else wi...,5.0,5.0,4.0,5.0,5.0,4.0,949,https://www.glassdoor.com/Reviews/Google-Revie...
3,google,"Mountain View, CA","Feb 8, 2015",Current Employee - Anonymous Employee,The best place I've worked and also the most d...,You can't find a more well-regarded company th...,I live in SF so the commute can take between 1...,Keep on NOT micromanaging - that is a huge ben...,5.0,2.0,5.0,5.0,4.0,5.0,498,https://www.glassdoor.com/Reviews/Google-Revie...
4,google,"Los Angeles, CA","Jul 19, 2018",Former Employee - Software Engineer,"Unique, one of a kind dream job",Google is a world of its own. At every other c...,"If you don't work in MTV (HQ), you will be giv...",Promote managers into management for their man...,5.0,5.0,5.0,5.0,5.0,5.0,49,https://www.glassdoor.com/Reviews/Google-Revie...


In [3]:
# filter the data to just display one company at a time
google_reviews = company_reviews_clean.loc[company_reviews_clean["company"]=='google',:]
google_reviews.head()

Unnamed: 0,company,location,dates,job-title,summary,pros,cons,advice-to-mgmt,overall-ratings,work-balance-stars,culture-values-stars,carrer-opportunities-stars,comp-benefit-stars,senior-mangemnet-stars,helpful-count,link
0,google,none,"Dec 11, 2018",Current Employee - Anonymous Employee,Best Company to work for,People are smart and friendly,Bureaucracy is slowing things down,none,5.0,4.0,5.0,5.0,4.0,5.0,0,https://www.glassdoor.com/Reviews/Google-Revie...
1,google,"Mountain View, CA","Jun 21, 2013",Former Employee - Program Manager,"Moving at the speed of light, burn out is inev...","1) Food, food, food. 15+ cafes on main campus ...",1) Work/life balance. What balance? All those ...,1) Don't dismiss emotional intelligence and ad...,4.0,2.0,3.0,3.0,5.0,3.0,2094,https://www.glassdoor.com/Reviews/Google-Revie...
2,google,"New York, NY","May 10, 2014",Current Employee - Software Engineer III,Great balance between big-company security and...,"* If you're a software engineer, you're among ...","* It *is* becoming larger, and with it comes g...",Keep the focus on the user. Everything else wi...,5.0,5.0,4.0,5.0,5.0,4.0,949,https://www.glassdoor.com/Reviews/Google-Revie...
3,google,"Mountain View, CA","Feb 8, 2015",Current Employee - Anonymous Employee,The best place I've worked and also the most d...,You can't find a more well-regarded company th...,I live in SF so the commute can take between 1...,Keep on NOT micromanaging - that is a huge ben...,5.0,2.0,5.0,5.0,4.0,5.0,498,https://www.glassdoor.com/Reviews/Google-Revie...
4,google,"Los Angeles, CA","Jul 19, 2018",Former Employee - Software Engineer,"Unique, one of a kind dream job",Google is a world of its own. At every other c...,"If you don't work in MTV (HQ), you will be giv...",Promote managers into management for their man...,5.0,5.0,5.0,5.0,5.0,5.0,49,https://www.glassdoor.com/Reviews/Google-Revie...


In [4]:
company_list = company_reviews_clean["company"].unique()
company_list

array(['google', 'amazon', 'facebook', 'netflix', 'apple', 'microsoft'],
      dtype=object)

In [5]:
#variables for holding each sentiment for each sentence
google_compound_list = []
google_positive_list = []
google_negative_list = []
google_neutral_list = []

for sentence in google_reviews["summary"]: 
    
     
#     caluculating the polarity scores on each sentence
    results = analyzer.polarity_scores(sentence)
    compound = results["compound"]
    pos = results['pos']
    neg = results['neg']
    neu = results['neu']

    #add each value to the appropriate array
    google_compound_list.append(compound)
    google_positive_list.append(pos)
    google_negative_list.append(neg)
    google_neutral_list.append(neu)

#store the average sentiments
sentiments = pd.DataFrame([{"Company":"Google",
             "Compound": np.mean(google_compound_list),
             "Positive":np.mean(google_positive_list),
             "Negative": np.mean(google_negative_list),
             "Neutral": np.mean(google_neutral_list),
             "Review Count": len(google_compound_list)}])
sentiments = sentiments.set_index("Company")

print(sentiments)
    
    


         Compound  Negative   Neutral  Positive  Review Count
Company                                                      
Google   0.309983  0.027724  0.616618  0.353858          7775


In [6]:
#Amazon reviews analyzer
amazon_reviews = company_reviews_clean.loc[company_reviews_clean["company"]=="amazon",:]

#empty list to hold each sentiment
amazon_compound_list = []
amazon_positive_list = []
amazon_negative_list = []
amazon_neutral_list = []

#loop through the summary coloumn and pass it into the analyser
for sentence in amazon_reviews["summary"]:
    results = analyzer.polarity_scores(sentence)
    compound = results["compound"]
    pos = results["pos"]
    neg = results["neg"]
    neu = results["neu"]
    
    #add each of these into the individual list
    amazon_compound_list.append(compound)
    amazon_positive_list.append(pos)
    amazon_negative_list.append(neg)
    amazon_neutral_list.append(neu)
    
#store average sentiments
# sentiments = {"Company":"Amazon",
#              "Compound":np.mean(amazon_compound_list),
#              "Positive":np.mean(amazon_positive_list),
#              "Negative":np.mean(amazon_negative_list),
#              "Neutral":np.mean(amazon_neutral_list),
#              "Number of Reviews":len(amazon_compound_list)}

# print(sentiments)   

sentiments.loc["Amazon"] = [np.mean(amazon_compound_list),np.mean(amazon_positive_list),np.mean(amazon_negative_list),np.mean(amazon_neutral_list),len(amazon_compound_list)]


In [7]:
#analyzer for facebook
fb_compound_list = []
fb_positive_list = []
fb_negative_list = []
fb_neutral_list = []
facebook_reviews = company_reviews_clean.loc[company_reviews_clean["company"]=="facebook"]
for each in facebook_reviews["summary"]:
    result = analyzer.polarity_scores(each)
    compound = result["compound"]
    pos = result["pos"]
    neg = result["neg"]
    neu = result["neu"]
    
    #store each sentiment in respective lists
    fb_compound_list.append(compound)
    fb_positive_list.append(pos)
    fb_negative_list.append(neg)
    fb_neutral_list.append(neu)
    
#average the sentiments
# sentiments = {"Company":"Facebook",
#              "Compound":np.mean(fb_compound_list),
#              "Positive":np.mean(fb_positive_list),
#              "Negative":np.mean(fb_negative_list),
#              "Neutral":np.mean(fb_neutral_list),
#              "Number of Reviews":len(fb_compound_list)}
sentiments.loc["Facebook"] = [np.mean(fb_compound_list),np.mean(fb_positive_list),np.mean(fb_negative_list),np.mean(fb_neutral_list),len(fb_compound_list)]
    

In [8]:
#Netflix reviews analyzer
nf_compound_list = []
nf_positive_list = []
nf_negative_list = []
nf_neutral_list = []
nf_reviews = company_reviews_clean.loc[company_reviews_clean["company"]=="netflix"]


#loop through each sentence
for each in nf_reviews["summary"]:
    result = analyzer.polarity_scores(each)
    comp = result["compound"]
    pos = result["pos"]
    neg = result["neg"]
    neu = result["neu"]
    
    #append it to theie respective sentiment list
    nf_compound_list.append(comp)
    nf_positive_list.append(pos)
    nf_negative_list.append(neg)
    nf_neutral_list.append(neu)
    
#average the sentiments to get overall scores
# sentiments = {"Company":"Netflix",
#              "Compound":np.mean(nf_compound_list),
#              "Positive":np.mean(nf_positive_list),
#              "Negative":np.mean(nf_negative_list),
#              "Neutral":np.mean(nf_neutral_list),
#              "Number of Reviews":len(nf_compound_list)}
sentiments.loc["Netflix"] = [np.mean(nf_compound_list),np.mean(nf_positive_list),np.mean(nf_negative_list),np.mean(nf_neutral_list),len(nf_compound_list)]


In [9]:
#Apple reviews analyzer
apple_compound_list = []
apple_positive_list = []
apple_negative_list = []
apple_neutral_list = []
apple_reviews = company_reviews_clean.loc[company_reviews_clean["company"]=="apple"]

# loop through each sentence
for each in apple_reviews["summary"]:
    result = analyzer.polarity_scores(each)
    comp = result["compound"]
    pos = result["pos"]
    neg = result["neg"]
    neu = result["neu"]
    
    #append it to theie respective sentiment list
    apple_compound_list.append(comp)
    apple_positive_list.append(pos)
    apple_negative_list.append(neg)
    apple_neutral_list.append(neu)
    
#average the sentiments to get overall scores
# sentiments = {"Company":"Apple",
#              "Compound":np.mean(apple_compound_list),
#              "Positive":np.mean(apple_positive_list),
#              "Negative":np.mean(apple_negative_list),
#              "Neutral":np.mean(apple_neutral_list),
#              "Number of Reviews":len(apple_compound_list)}
sentiments.loc["Apple"] = [np.mean(apple_compound_list),np.mean(apple_positive_list),np.mean(apple_negative_list),np.mean(apple_neutral_list),len(apple_compound_list)]


In [12]:
#Microsoft reviews analyzer
ms_compound_list = []
ms_positive_list = []
ms_negative_list = []
ms_neutral_list = []
ms_reviews = company_reviews_clean.loc[company_reviews_clean["company"]=="microsoft"]

# loop through each sentence
for each in ms_reviews["summary"]:
    result = analyzer.polarity_scores(each)
    comp = result["compound"]
    pos = result["pos"]
    neg = result["neg"]
    neu = result["neu"]
    
    #append it to theie respective sentiment list
    ms_compound_list.append(comp)
    ms_positive_list.append(pos)
    ms_negative_list.append(neg)
    ms_neutral_list.append(neu)
    
#average the sentiments to get overall scores
# sentiments = {"Company":"Microsoft",
#              "Compound":np.mean(ms_compound_list),
#              "Positive":np.mean(ms_positive_list),
#              "Negative":np.mean(ms_negative_list),
#              "Neutral":np.mean(ms_neutral_list),
#              "Number of Reviews":len(ms_compound_list)}
sentiments.loc["Microsoft"] = [np.mean(ms_compound_list),np.mean(ms_positive_list),np.mean(ms_negative_list),np.mean(ms_neutral_list),len(ms_compound_list)]
print(sentiments)

           Compound  Negative   Neutral  Positive  Review Count
Company                                                        
Google     0.309983  0.027724  0.616618  0.353858        7775.0
Amazon     0.180549  0.272013  0.062087  0.664874       26283.0
Facebook   0.367170  0.373890  0.029860  0.595619        1578.0
Netflix    0.231131  0.283866  0.081566  0.634566         798.0
Apple      0.264154  0.303672  0.039903  0.655492       12858.0
Microsoft  0.270566  0.301299  0.038654  0.659151       17830.0


######  Insights
Using Vader Sentiment Analysis, the following insights were found:
Amazon has the highest number of reviews, including highest scores for positive comments and comparatively low negative scores.
Microsoft and Apple have almost the same positive and negative scores making them almost same. 
Google has the least positive score and least negative scores.

This is a fun excercise on Vader Sentiment Analysis and conclusions on which company is best to work cannot be made due to limitations such as number of reviews available (i.e, not all companies have the same number of reviews and hence any conclusions could be biased), certain comments are in non-english languages and a detailed analysis would be required on how the scores where calculated.
    