In [5]:
df=Get_Reviews()
df=Ogranize_By_Job_Function(df)

In [6]:
TotalReviewsPieChart(df)

In [7]:
Former_vs_Current_Employees_BarChart(df)

In [8]:
Reviews_Plot_Chart(df)

In [9]:
TopTopics(df,10,'Cons')

In [10]:
TopReviewsScatter(df,10,'Cons')

In [11]:
TopTopics(df,10,'Pros')

In [12]:
TopReviewsScatter(df,10,'Pros')

In [4]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from plotly.offline import iplot,init_notebook_mode
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib notebook
init_notebook_mode(connected=True)

def Get_Reviews():
    df=pd.read_csv('reviews.csv')
    df.reset_index(inplace=True)
    df.drop('outlook',axis=1,inplace=True)
    df.columns=['Headline','Rating','Position','Status','Date','Duration','Cons','Pros','Management Advice','Recommends','Outlook']
    df['Date'] = pd.to_datetime(df['Date'])
    df['Rating']=df['Rating']/5*100
    return df


def Ogranize_By_Job_Function(df1):
    df1.loc[df1['Position'].str.contains('intern|Intern'),'Position']='Intern'
    df1.loc[df1['Position'].str.contains('Developer|Systems|Software|Programmer|User|Engineer'),'Position']='Technology'
    df1.loc[df1['Position'].str.contains('Consulting|Consultant'),'Position']='Consultant'
    df1.loc[df1['Position'].str.contains('Manager|Executive|Director'),'Position']='Manager'
    df1.loc[df1['Position'].str.contains('Analyst|analyst'),'Position']='Analyst'
    df1.loc[df1['Position'].str.contains('Technology|Specialist'),'Position']='Technology'
    df1.loc[~df1['Position'].str.contains('Technology|Intern|Manager|Analyst|Consultant'),'Position']='Other'
    return df


def Former_vs_Current_Employees_BarChart(df1):
    Current,Former=df1[df1['Status'].str.contains('Current')],df1[df1['Status'].str.contains('Former')]
    
    Positions=df1['Position'].unique().tolist()
    Current_Count=[]; Former_Count=[];
    for i in range(len(Positions)):
        Current_Count.append("Number of Current Employees: <br>"+str(len(Current[Current['Position']==Positions[i]])))
        Former_Count.append("Number of Former Employees: <br>"+str(len(Former[Former['Position']==Positions[i]])))
        
    Current=Current.groupby('Position').agg({'Rating': np.mean})
    Former=Former.groupby('Position').agg({'Rating': np.mean})    
    
    trace1 = go.Bar( x=Current.index.tolist(), y=(Current['Rating']/100).tolist(), name='Current Employees',
                   text=Current_Count,hoverinfo= "text")
    trace2 = go.Bar( x=Former.index.tolist(), y=(Former['Rating']/100).tolist(), name='Former Employees',
                   text=Former_Count,hoverinfo= "text")
    
    data1 = [trace1, trace2]
    layout1 = go.Layout(
    barmode='group',
    yaxis=dict(tickformat='%',range=(0,1),title='Rating'),
    title="<b>Former vs Current Employees Rating</b>",
    titlefont=dict(family='Courier New',size=20),
    annotations=[dict(xref='paper', yref='paper', x=0.5, y=-0.1,
                                  xanchor='center', yanchor='top',
                                  text='Source: Glassdoor.com Reviews',
                                  font=dict(family='Arial', size=12,color='rgb(150,150,150)'),showarrow=False)])
    fig=dict(data=data1, layout=layout1)
    iplot(fig)

    
def Average_Year_Rating(df1):
    ftt=df1.groupby([df1['Date'].map(lambda x: x.year),df['Position']])['Rating'].agg({'Avg': np.mean})
    ftt.reset_index(inplace=True)
    return ftt


def TotalReviewsPieChart(df1):
    total=len(df1['Position'])
    Labels=df1['Position'].unique().tolist()
    values=[len(df1[df1['Position']==Labels[x]]) for x in range(len(Labels))]
    
    figs = go.Figure = {
        'data': [{'labels':Labels ,
                  'values': values,
                   'textinfo': "label+percent",
                   'hoverinfo': "value+percent",
                   'type': 'pie'
                 }],
        'layout': {'title':'<b>{} Total Reviews</b>'.format(total),
                   'titlefont': dict(family='Courier New',size=20),
                  'annotations':[dict(xref='paper', yref='paper', x=0.5, y=-0.1,
                                  xanchor='center', yanchor='top',
                                  text='Source: Glassdoor.com Reviews',
                                  font=dict(family='Arial', size=12,color='rgb(150,150,150)'),showarrow=False)]} }
    iplot(figs)
    
    
    
def Reviews_Plot_Chart(df1):
    ftt=Average_Year_Rating(df1);
    labels = ftt['Position'].unique().tolist(); 
    mode_size = [12,8];x_data=[]; y_data=[]; colors=['red','blue','green','grey','orange','brown']

    for i in range(len(labels)):
        x_data.append(ftt[(ftt['Position']==labels[i]) & (ftt['Date']>2011)]['Date'].tolist())

        y_data.append(ftt[(ftt['Position']==labels[i])]['Avg'].tolist())

    traces = []

    for i in range(len(labels)):
        traces.append(go.Scatter( x=x_data[i], y=y_data[i], mode='lines', line=dict( width=2,color=colors[i]),
                                 name=labels[i]))

        traces.append(go.Scatter( x=[x_data[i][0], x_data[i][5]], y=[y_data[i][0], y_data[i][5]], 
                                 mode='markers',marker=dict(size=8,color=colors[i])))

    layout = go.Layout(
        xaxis=dict( showline=True, showgrid=False, showticklabels=True, linecolor='rgb(204, 204, 204)',linewidth=2,
            autotick=True, ticks='outside', tickcolor='rgb(204, 204, 204)', tickwidth=2, ticklen=5,
            tickfont=dict( family='Arial', size=12, color='rgb(82, 82, 82)' )),
        yaxis=dict( showgrid=False, zeroline=False, showline=False, showticklabels=False),
        autosize=True, margin=dict( autoexpand=True,), showlegend=False,
    )
    annotations = []

    # Adding labels
    for y_trace, label,color in zip(y_data, labels,colors):
        # labeling the left_side of the plot
        annotations.append(dict(xref='paper', x=0.05, y=y_trace[0], xanchor='right', yanchor='middle',
                                      text=label + '  {0:.0f}%'.format(y_trace[0]),
                                      font=dict(family='Arial', size=12,color=color),
                                      showarrow=False))
        # labeling the right_side of the plot
        annotations.append(dict(xref='paper', x=0.95, y=y_trace[5],
                                      xanchor='left', yanchor='middle',
                                      text='{0:.0f}%'.format(y_trace[5]),
                                      font=dict(family='Arial',size=12,color=color),showarrow=False))
    # Title
    annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05,
                                  xanchor='left', yanchor='bottom',
                                  text='<b>Employees at Accenture Approval Rate Over Time</b>',
                                  font=dict(family='Courier New',size=20,color='rgb(37,37,37)'),showarrow=False))
    # Source
    annotations.append(dict(xref='paper', yref='paper', x=0.5, y=-0.1,
                                  xanchor='center', yanchor='top',
                                  text='Source: Glassdoor.com Reviews',
                                  font=dict(family='Arial', size=12,color='rgb(150,150,150)'),showarrow=False))
    annotations[2]['y']=67.5; annotations[3]['y']=70;
    annotations[10]['y']=78; annotations[9]['y']=74;
    layout['annotations'] = annotations
    fig = dict(data=traces, layout=layout)
    iplot(fig)
    
    
    
import re
from collections import Counter

def TopProsTopics(df,numberoftopics):
    c=" ".join([re.sub("['-.]","",x) for x in [str(x) for x in df['Pros'].tolist()]]).split(' ')
    pros=[]
    for x in c:
        if(len(x)>4 and x.title()!='Accenture' and x.title()!='Great' and x.title()!='Company'):
            pros.append(x)
    return dict(Counter(Counter(pros)).most_common(numberoftopics))


def TopConsTopics(df,numberoftopics):
    c=" ".join([re.sub("['-.]","",x) for x in [str(x) for x in df['Cons'].tolist()]]).split(' ')
    cons=[]
    for x in c:
        if(len(x)>4 and x.title() not in ['Accenture','Company','Youre','Their','There','Little','Which','Sometimes','Great','Being']):
            cons.append(x)
    topcons=dict(Counter(Counter(cons)).most_common(numberoftopics))
    return topcons

def TopTopics(df,count,mode):
    if mode=='Cons':
        topics=TopConsTopics(df,count)
        text='<b>Top {} Complains Topics</b>'.format(count)
    if mode=='Pros':
        topics=TopProsTopics(df,count)
        text='<b>Top {} Positive Reviews Topics</b>'.format(count)
    data1 = [go.Bar( x=[x.title() for x in topics.keys()], y=[x/82*10 for x in topics.values()],
                    text=list(topics.values()),hoverinfo='x+text',
                    textposition='outside' )]
    layout1=go.Layout(yaxis=dict(showline=False,showgrid=False,showticklabels=False,type='linear'),
                     xaxis=dict(tickfont=dict(family='Arial',size='15')),
                      title=text, titlefont=dict(family='Ariel', size=22,color='rgb(150,150,150)')
                     )
    fi=dict(data=data1,layout=layout1)
    iplot(fi)

def TopReviews(df,numberoftopics,mode):
    topreviews= TopConsTopics(df,numberoftopics) if mode=='Cons' else TopProsTopics(df,numberoftopics)
    topRkeys=list(topreviews.keys())
    mask= '|'.join(tuple(topreviews.keys()))
    temp=df.dropna()
    df_reviews={}
    header='Pros' if mode=='Cons' else 'Cons'
    for x in range(0,len(topRkeys)):
        df_reviews[topRkeys[x]]=(temp[temp[mode].str.contains(topRkeys[x])])
        df_reviews[topRkeys[x]]=df_reviews[topRkeys[x]].drop(['Recommends','Headline','Outlook',header],axis=1)
        temp1,temp2=df_reviews[topRkeys[x]][mode].tolist(),df_reviews[topRkeys[x]]['Management Advice'].tolist()
        for i in range(len(temp1)):
            words,words1=temp1[i].split(' '),temp2[i].split(' ')
            for count in range(len(words)):
                if (count+1)%12==0:
                    words[count]=words[count]+'<br>'
            for count1 in range(len(words1)):
                if(count1+1)%12==0:
                    words1[count1]=words1[count1]+'<br>'
            temp1[i]=" ".join(words); temp2[i]=" ".join(words1);
            temp1[i]='<b>Review:</b><br>'+temp1[i]+'<br><br><b>Advice to Management :</b><br>'+temp2[i];
        df_reviews[topRkeys[x]][mode]=temp1
    return df_reviews

def TopReviewsScatter(df,numberoftopics,mode):
    df_reviews=TopReviews(df,numberoftopics,mode)
    topreviews= TopConsTopics(df,numberoftopics) if mode=='Cons' else TopProsTopics(df,numberoftopics)
    topRkeys=list(topreviews.keys())
    topRvalues=list(topreviews.values())
    
    datas=[]
    for i in range(len(df_reviews)):    
        datas.append(go.Scatter( x=df_reviews[topRkeys[i]]['Date'].tolist(), y=(df_reviews[topRkeys[i]]['Rating']/100).tolist(), 
                                 mode='markers',marker=dict(size=10),hoverinfo= "text",
                                text=df_reviews[topRkeys[i]][mode].tolist(),
                                name='<i><b>{}</i></b>'.format(topRkeys[i].title())+' {}'.format(str(topRvalues[i]))
                               ))

    layout1 = go.Layout(
        xaxis=dict( showline=True, showgrid=False, showticklabels=True, ticks='outside',
                   tickcolor='rgb(204, 204, 204)', tickwidth=2, ticklen=5),

        yaxis=dict(tickformat='%', showgrid=False,range=[0,105],autorange= True,title='Rating',
        titlefont=dict(size=18,family='Arier')),showlegend=True,
        hovermode= 'closest',title='<b>Top {} Popular </b>'.format(numberoftopics)+'<b> '+mode+' Reviews</b>',
        titlefont=dict( family='Courier New', size=18, color='Black'), legend=dict(font=dict(family="Raleway",size=14)),
        annotations=[dict(xref='paper', yref='paper', x=0.5, y=-0.1,
                                  xanchor='center', yanchor='top',
                                  text='Source: Glassdoor.com Reviews',
                                  font=dict(family='Arial', size=12,color='rgb(150,150,150)'),showarrow=False)]
    )
    fig3 = dict(data=datas,layout=layout1)
    iplot(fig3)