## *US*: Insights of Hiring and Rejections of '*Analysts*' Roles in *2019*

**Lemme know what you think about this analysis? Also, which results do you wanna see in the next version? Feedbacks are very uch appreciated!**

And a **BIG**, **FAT** *Thank You* for looking at my work! 

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# We run this to suppress various deprecation warnings from plotnine - keeps our notebook cleaner
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from plotnine import *
import plotly.express as px
import plotly.graph_objects as go

%matplotlib inline

In [None]:
df = pd.read_csv('../input/2019-data-analyst-jobcsv/2019_data_analyst_job.csv')
df

## How Many *Analyst* Jobs were Opened in 2019 in Different Cities of US?

In [None]:
location_wise = df.groupby(['job_title', 'job_location'])['job_title'].count().to_frame()
location_wise.columns = ['Count']
location_wise = location_wise.reset_index()
location_wise.columns = ['Job Title', 'Job Location', 'Count']

fig = px.scatter(location_wise, x="Count", y="Job Title", color="Count",
                 size='Count', hover_data=['Job Location'])

fig.update_layout(title='Job Openings in Different Locations (Hover over to see the Locations!)', height = 800)
fig.show()

## How Many People were Hired in each *Analyst* Role in 2019?

In [None]:
hired = df[df.hired == True]

job_title = hired.job_title.value_counts().to_frame().reset_index()
job_title.columns = ['Job Title', 'Count']

fig = px.bar(job_title, x="Count", y="Job Title", height=800, orientation='h')
fig.update_layout(title='Total Hired for each Position in 2019')

fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6)
fig.show()

## How Many People were Rejected? 

In [None]:
n_hired = df[df.hired == False]

job_title = n_hired.job_title.value_counts().to_frame().reset_index()
job_title.columns = ['Job Title', 'Count']

fig = px.bar(job_title, x="Count", y="Job Title", height=800, orientation='h')
fig.update_layout(title='Total NOT Hired for each Position in 2019')
fig.update_traces(marker_color='rgb(225,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.9)
fig.show()

In [None]:
location_wise_2 = df.copy()

from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# define example
data = location_wise_2.hired
values = array(data)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
# invert first example
inverted = label_encoder.inverse_transform([argmax(onehot_encoded[0, :])])

location_wise_2['Hired'] = pd.Series(onehot_encoded[:, 1])
location_wise_2['Not Hired'] = pd.Series(onehot_encoded[:, 0])

date_wise = location_wise_2.copy()

## Comparison of Hires and Rejections (Count) in Different Cities

In [None]:
location_wise_2 = location_wise_2.groupby(['job_location'])['Hired','Not Hired'].sum().reset_index()
location_wise_2 = location_wise_2.sort_values(by = 'job_location', ascending = False)

y = location_wise_2.job_location

fig = go.Figure()

y = location_wise_2.job_location
fig.add_bar(y=y,x=location_wise_2['Not Hired'], name='Count of Rejection', orientation='h')
fig.add_bar(y=y,x=location_wise_2['Hired'], name='Count of Hires', orientation='h')
fig.update_layout(barmode="relative", height = 1000)
fig.update_layout(title='Count of Hires and Rejections in Different Locations (Hover over to see the count!)')
fig.show()

## Which Cities Hired the Most and What's the Ratio?

In [None]:
location_wise_2 = location_wise_2.groupby(['job_location'])['Hired','Not Hired'].sum().reset_index()
location_wise_2 = location_wise_2.sort_values(by = 'job_location', ascending = False)
location_wise_2 = location_wise_2.sort_values("Hired", ascending = False)
location_wise_3 = location_wise_2.head(20)
location_wise_3

In [None]:
fig = go.Figure()

y = location_wise_3.job_location
fig.add_bar(y=y,x=location_wise_3['Not Hired'], name='Count of Rejection', orientation='h')
fig.add_bar(y=y,x=location_wise_3['Hired'], name='Count of Hires', orientation='h')
fig.update_layout(barmode="relative", height = 600)
fig.update_layout(title='Count of Hires and Rejections in Top 20 Hiring Cities')

fig.show()

## Which Citis Rejected the Most and What's the Ratio?

In [None]:
location_wise_4 = location_wise_2.tail(20)
location_wise_4

In [None]:
fig = go.Figure()

y = location_wise_4.job_location
fig.add_bar(y=y,x=location_wise_4['Not Hired'], name='Count of Rejection', orientation='h')
fig.add_bar(y=y,x=location_wise_4['Hired'], name='Count of Hires', orientation='h')
fig.update_layout(barmode="relative", height = 600)
fig.update_layout(title='Count of Hires and Rejections in Top 20 Rejecting Cities')

fig.show()

## When More Hires Took Place during the Year? (Jan, May, September?)

In [None]:
date_wise["date"] = pd.to_datetime(date_wise["date"])

date_wise = date_wise.groupby(['date'])['Hired','Not Hired'].sum().reset_index()

date_wise = date_wise.sort_values(by = 'date', ascending = False)

yy = date_wise.date

fig = go.Figure()

y = location_wise_2.job_location
fig.add_bar(y=yy,x=date_wise['Not Hired'], name='Count of Rejection', orientation='h')
fig.add_bar(y=yy,x=date_wise['Hired'], name='Count of Hires', orientation='h')
fig.update_layout(barmode="relative", height = 1200)
fig.update_layout(title='Count of Hires and Rejections over the Dates (Hover over to see the count!)')
fig.show()

## What are the Percentage of Hires and Rejections for each *Analyst* Role?

In [None]:
import matplotlib.pyplot as plt

title = list(df.job_title.unique())

for t in title:
    
        df = pd.read_csv('../input/2019-data-analyst-jobcsv/2019_data_analyst_job.csv')
        
        df_ = df[df.job_title == t]
        df_ = df_[['hired']]
        df_ = df_.hired.value_counts().to_frame().reset_index()
        df_.columns  = ['Hired', 'Count']


        import plotly.graph_objects as go

        irises_colors = ['rgb(33, 75, 99)', 'rgb(79, 129, 102)', 'rgb(151, 179, 100)',
                         'rgb(175, 49, 35)', 'rgb(36, 73, 147)']

        labels = df_.Hired
        values = df_.Count

        # Use `hole` to create a donut-like pie chart
        fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3, name='Hired?', marker_colors=irises_colors, opacity=0.7)])
        fig.update_layout(title='Percentage of Hires of {}'.format(t), width = 700)

        fig.show()