**Work in progress..**

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import plotly.io as pio


import missingno as msno
from itertools import zip_longest
from itertools import compress
import warnings

warnings.filterwarnings(action = 'ignore')
np.set_printoptions(precision = 3, suppress = True)

In [None]:
pio.templates.default = 'simple_white'
colors = ["#7c7cff","#c2c2da","#d4d4e0","#eeeef7","#fefeff"]
static = {'staticPlot' : True}

In [None]:
test_filepath = '../input/hr-analytics-job-change-of-data-scientists/aug_test.csv'
train_filepath = '../input/hr-analytics-job-change-of-data-scientists/aug_train.csv'
test = pd.read_csv(test_filepath)
train = pd.read_csv(train_filepath)

hr = pd.concat([test, train], axis = 0)
hr.drop(columns = 'enrollee_id', inplace = True)

In [None]:
# Missing Values
msno.bar(hr, fontsize=12, color = "#7c7cff", figsize = (12,5))

In [None]:
#@title
hr.head()

In [None]:
entries_count = hr.shape[0]
city_count = hr.city.nunique()

fig = go.Figure()

fig.add_annotation(
    xref = 'paper',
    yref = 'paper',
    x = 0,
    y = 3,
    text = f'<b>{entries_count:,}</b> respondents',
    font = {'family' : 'Arial', 'size': 40, 'color' : colors[0]},
    showarrow = False
)

fig.add_annotation(
    xref = 'paper',
    yref = 'paper',
    x = 0,
    y = -1,
    text = 'from <b>{}</b> cities'.format(city_count),
    font = {'family' : 'Arial', 'size': 30},
    showarrow = False
)

fig.update_xaxes(visible = False)
fig.update_yaxes(visible = False)
fig.update_layout(autosize = False, paper_bgcolor = colors[3],
    plot_bgcolor = colors[3],
    height = 200,
    width = 500
)

fig.show(config = static)

In [None]:
cities_values = hr.city.value_counts()
cities_values.index = cities_values.index.str.replace('_', '-').str.upper()
city_plot = cities_values.iloc[0:20]

fig = go.Figure()

fig.add_trace(go.Bar(
    x = city_plot.index,
    y = city_plot.values,
    marker_color = colors[0],
    text = city_plot.values
))

fig.update_layout(title = '<b>Top 20 Cities</b> : Count',
    font = dict(family = 'Arial', size = 10), xaxis = dict(tickangle = -90))
fig.update_traces(textposition = 'outside')
fig.update_yaxes(visible = False)
fig.show()

In [None]:
# Preparing Dataframe
male_respondents = hr[hr.gender == 'Male']
female_respondents = hr[hr.gender == 'Female']
other_respondents = hr[hr.gender == 'Other']

male_plot = male_respondents.education_level.value_counts()
female_plot = female_respondents.education_level.value_counts()
other_plot = other_respondents.education_level.value_counts()
all_gender_respondents = [male_plot, female_plot, other_plot]

gender_percentage = hr.gender.value_counts().transform(lambda x : x / x.sum() * 100)
gender_null = round(hr.gender.isnull().sum()/len(hr)*100, 2)


# Creating Figure
fig = go.Figure()

fig.add_trace(go.Bar(
    x = male_plot.index,
    y = male_plot.values,
    name = 'Male',
    marker_color = colors[0],
    text = male_plot.values
))


fig.add_trace(go.Bar(
    x = female_plot.index,
    y = female_plot.values,
    name = 'Female',
    marker_color = colors[1],
    text = female_plot.values
))


fig.add_trace(go.Bar(
    x = other_plot.index,
    y = other_plot.values,
    name = 'Other',
    marker_color = colors[2],
    text = other_plot.values
))

fig.update_traces(
    textposition = 'outside'
)



fig.add_annotation(
    xref = 'paper',
    yref = 'paper',
    x = .8,
    y = .8,
    text = "<b>Male {}%</b>".format(round(gender_percentage[0], 2)),
    font = {'family' : 'Arial', 'color' : colors[0], 'size' : 35},
    showarrow = False
)

fig.add_annotation(
    xref = 'paper',
    yref = 'paper',
    x = .8,
    y = .62,
    text = "<b>Female {}%</b>".format(round(gender_percentage[1], 2)),
    font = {'family' : 'Arial', 'color' : colors[1], 'size' : 28},
    showarrow = False
)

fig.add_annotation(
    xref = 'paper',
    yref = 'paper',
    x = .8,
    y = .52,
    text = "<b>Other {}%</b>".format(round(gender_percentage[2], 2)),
    font = {'family' : 'Arial', 'color' : colors[2], 'size' : 20},
    showarrow = False
)

fig.add_annotation(
    xref = 'paper',
    yref = 'paper',
    x = 1,
    y = 1.1,
    text = "<b>gender null percentage = {}%</b> <br>".format(gender_null),
    font = {'family' : 'Arial', 'color' : colors[2], 'size' : 12},
    showarrow = False
)

fig.update_traces(textfont = dict(family = 'Arial'))
fig.update_yaxes(visible = False)
fig.update_layout(title = '<b>Education Level, Gender Counts</b>', font = dict(family = 'Arial', size = 10))
fig.show()

In [None]:
# Preparing the Dataframe
yes_experience = hr.query('relevent_experience == "Has relevent experience"')
no_experience = hr.query('relevent_experience == "No relevent experience"')
data1 = yes_experience.city_development_index
data2 = no_experience.city_development_index
histdata = [data1, data2]
label = ['Experienced', 'No Relevant Experience']

# Create Distribution Plot
fig = ff.create_distplot(
    histdata,
    label,
    bin_size = .1,
    colors = colors
)

# Create Annotation
fig.add_annotation(
    xref = 'paper',
    yref = 'paper',
    y = 1,
    x = 0.8,
    text = """
    A <b>denser</b> distribution of experienced individuals in <br> 
    cities with higher development index.""",
    font = {'family' : 'Arial', 'color' : 'gray'},
    showarrow = False
)

fig.update_layout(title = "<b>Respondents' Experience", font = dict(family = 'Arial', size = 10))
fig.show()

In [None]:
# Get company type as dataframes

company_type = hr.company_type.unique()
companies_df = []

for type in company_type:
    company_query = hr.query("company_type == '{}'".format(type))
    companies_df.append(company_query)

nan, private, funded_startup, other, public, early_startup, NGO = compress(companies_df,
    [True, True, True, True, True, True, True]
)

companies = [private, funded_startup, other, public, early_startup, NGO]

In [None]:
# Preparing the Dataframe
private_experience = private[private['relevent_experience']  == 'Has relevent experience']
private_noexperience = private[private['relevent_experience']  != 'Has relevent experience']

public_experience = public[public['relevent_experience']  == 'Has relevent experience']
public_noexperience = public[public['relevent_experience']  != 'Has relevent experience']

funded_startup_experience = funded_startup[funded_startup['relevent_experience']  == 'Has relevent experience']
funded_startup_noexperience = funded_startup[funded_startup['relevent_experience']  != 'Has relevent experience']

early_startup_experience = early_startup[early_startup['relevent_experience']  == 'Has relevent experience']
early_startup_noexperience = early_startup[early_startup['relevent_experience']  != 'Has relevent experience']

other_experience = other[other['relevent_experience']  == 'Has relevent experience']
other_noexperience = other[other['relevent_experience']  != 'Has relevent experience']

# Create subplot
fig = make_subplots(rows = 4, cols = 2,
    subplot_titles = ('Experience', 'No-relevant Experience'),
    vertical_spacing = 0.10,
    horizontal_spacing = 0.10,
)

plot_experience = [private_experience, public_experience, early_startup_experience, funded_startup_experience]
plot_noexperience = [private_noexperience, public_noexperience, early_startup_noexperience, funded_startup_noexperience]
plot_names1 = ['Private(Experience)', 'Public(Experience)', 'Early Start-up(Experience)', 'Funded Start-up(Experience)']
plot_names2 = ['Private(No-experience)', 'Public(No-experience)', 'Early Start-up(No-experience)', 'Funded Start-up(No-experience)']

# Create flags
row_flag = 1
row_flag2 = 1


# Create Names
for frames, names in zip(plot_experience, plot_names1):
    frames.training_hours.name = '{}'.format(names)

for frames, names in zip(plot_noexperience, plot_names2):
    frames.training_hours.name = '{}'.format(names)

# Create Figure

for types in plot_experience:
    fig.add_trace(go.Histogram(
        x = types.training_hours,
        marker_color = colors[0],
        histfunc = 'avg',
        name = types.training_hours.name
    ), row = row_flag, col = 1)
    row_flag += 1

for types in plot_noexperience:
    fig.add_trace(go.Histogram(
        x = types.training_hours,
        marker_color = colors[1],
        histfunc = 'avg',
        name = types.training_hours.name
    ), row = row_flag2, col = 2)
    row_flag2 += 1

fig.update_layout(
    height = 800,
    template = 'plotly_white',
    title = '<b>Training Hours per Company Type</b>',
    font = dict(family = 'Arial', size = 10),
    margin = dict(l = 100, t = 200)
)

fig.update_yaxes(automargin = True)
fig.show()

In [None]:
company_type_cdi = hr.groupby(['company_type', 'city_development_index']).size()
unique_companies = hr.company_type.unique()

fig = go.Figure()
plot_names = ['nan', 'Private', 'Funded Start-up', 'Other', 'Public', 'Early Start-up', 'NGO']

for types, names in zip_longest(unique_companies, plot_names):
    try:
        if types == 'Pvt Ltd':
            fig.add_trace(go.Scatter(
                y = company_type_cdi['{}'.format(types)].values,
                x = company_type_cdi['{}'.format(types)].index,
                mode = 'lines+markers',
                marker_color = colors[0],
                name = names
            ))
        else:
            fig.add_trace(go.Scatter(
                y = company_type_cdi['{}'.format(types)].values,
                x = company_type_cdi['{}'.format(types)].index,
                mode = 'lines',
                marker_color = 'gray',
                name = names
            ))
    except:
        pass

#fig.add_hline(x=0.86, line_width=3, line_dash="dash", line_color="yellow")

fig.update_traces(line_width = 0.5, marker_size = 3)
fig.update_yaxes(visible = False)
fig.update_layout(title = '<b>Company Type and City Development Index</b>',
    autosize = True,   
    font = dict(family = 'Arial', size = 10)
)

fig.show()

In [None]:
def percentage(s):
    """
    Converts a series to round off - percentage string format.
    """
    x = s.apply(lambda x : round(x/s[:].sum() * 100, 2))
    x = x.apply(lambda x : str(x) + '%')
    return x

In [None]:
experience = hr.groupby(['relevent_experience','target']).size()

job_change = dict(tickmode = 'array',
    tickvals = [0, 1],
    ticktext = ['No',
    'Yes']     
)

fig = go.Figure()

fig.add_trace(go.Bar(
    y = experience['Has relevent experience'],
    name = 'Has relevant experience',
    text = percentage(experience['Has relevent experience']).values,
    marker_color = colors[0]
))

fig.add_trace(go.Bar(
    y = experience['No relevent experience'],
    name = 'Has no relevant experience',
    text = percentage(experience['No relevent experience']).values,
    marker_color = colors[1]
))

fig.update_traces(textposition = 'inside')
fig.update_layout(
    #barmode = 'overlay', 
    xaxis = job_change,
    autosize = True,   
    title = '<b>Relevent Experience and Job Change</b>',
    font = dict(family = 'Arial', size = 10)
)

fig.update_xaxes(title = 'Job Change')

fig.show()

In [None]:
gender_target = hr.groupby(['gender', 'target']).size()
fig = go.Figure()

fig.add_trace(go.Bar(
    y = gender_target['Male'],
    name = 'Male',
    marker_color = colors[0],
    text = percentage(gender_target['Male']).values
))

fig.add_trace(go.Bar(
    y = gender_target['Female'],
    name = 'Female',
    marker_color = colors[1],
    text = percentage(gender_target['Female']).values
))

fig.add_trace(go.Bar(
    y = gender_target['Other'],
    name = 'Other',
    marker_color = colors[2],
    text = percentage(gender_target['Other']).values
))

fig.update_layout(
    #barmode = 'overlay', 
    xaxis = job_change,
    autosize = True,   
    title = '<b>Gender and Job Change</b>',
    font = dict(family = 'Arial', size = 10)
)

fig.update_traces(
    textposition = 'outside'
)

fig.update_xaxes(title = 'Job Change')

fig.show()

In [None]:
#@title
enrollment = hr.groupby(['enrolled_university', 'target']).size()

fig = go.Figure()

fig.add_trace(go.Bar(
    y = enrollment['Full time course'],
    name = 'Full time course',
    marker_color = colors[0],
    text = percentage(enrollment['Full time course']).values
))

fig.add_trace(go.Bar(
    y = enrollment['Part time course'],
    name = 'Part time course',
    marker_color = colors[1],
    text = percentage(enrollment['Part time course']).values
))

fig.add_trace(go.Bar(
    y = enrollment['no_enrollment'],
    name = 'no_enrollment ',
    marker_color = colors[2],
    text = percentage(enrollment['no_enrollment']).values
))

fig.update_layout(
    xaxis = job_change,
    autosize = True,   
    title = '<b>Enrollment and Job Change</b>',
    font = dict(family = 'Arial', size = 10)
)

fig.update_traces(
    textposition = 'outside'
)

fig.update_xaxes(title = 'Job Change')

fig.show()