In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns 

import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# Load Dataset

In [None]:
df = pd.read_csv('Data_Professional_Salary_Survey_Responses.csv')
df.rename(columns={' SalaryUSD ': 'SalaryUSD'}, inplace=True)
df

# Show the Count of NaN in each Columns 

In [None]:
df.isna().sum()

# Data Cleaning:

#### Replacing missing values with np.nan

In [None]:
missing_val = ['Not Asked']
df.replace(missing_val, np.nan, inplace = True)

### Count of Nan's after replacing

In [None]:
df.isna().sum()

#### Remove columns that contains NaN more than 30%

In [None]:
df=df.drop(['PostalCode', 'HowManyCompanies', 'CompanyEmployeesOverall', 'Education', 'EducationIsComputerRelated', 
'Certifications', 'HoursWorkedPerWeek', 'TelecommuteDaysPerWeek', 'NewestVersionInProduction', 'OldestVersionInProduction',
'PopulationOfLargestCityWithin20Miles', 'OtherJobDuties', 'KindsOfTasksPerformed', 'LookingForAnotherJob'], axis = 1)

#### Fill nulls values

In [None]:
df['OtherDatabases'] = df['OtherDatabases'].fillna(df['OtherDatabases'].mode()[0])
df['DatabaseServers'] = df['DatabaseServers'].fillna(df['DatabaseServers'].mode()[0])
df['CareerPlansThisYear'] = df['CareerPlansThisYear'].fillna(df['CareerPlansThisYear'].mode()[0])
# We can't fill the Gender by mode. We assume that it is Unknown
df['Gender'] = df['Gender'].fillna('Unknown')
df['Gender']= df['Gender'].replace(['None'],'Unknown')

#### Convert to numeric

In [None]:
#convert to numeric
df['SalaryUSD']=df["SalaryUSD"].str.replace(",","").astype(float)
df['SalaryUSD']= pd.to_numeric(df["SalaryUSD"])

# Country Selection Options list

In [None]:
countries = df[['Country']].groupby(['Country']).count()

# Load country list as option for multi select dropdown select
optionsCountry =[{'label': "Select All", 'value': -1}]
for i in range(len(countries.index)):
    optionsCountry.append({'label': countries.index[i], 'value': countries.index[i]})

countries

In [None]:
def update_figure(val_country):
    data = df

    if (val_country != -1):
        data = data[data["Country"].isin(val_country)]

    data = data.groupby(["Country"]).sum().reset_index()
    data = data.reset_index()

    return px.bar(
        data,
        x= "Country",
        y= "SalaryUSD"
    )

# Dashboard App

In [None]:
app = JupyterDash(__name__)

app.layout = html.Div([

    html.Label('Country'),
    dcc.Dropdown(
        id="country",
        options= optionsCountry,
        value= -1,
        multi= True
    ),

    dcc.Graph(id="graphCountry"),
  
])


@app.callback(
    Output('graphCountry', 'figure'),

    [Input("country", "value"),]
)

def update_figure(val_country):
    data = df

    if (val_country != -1):
        data = data[data["Country"].isin(val_country)]

    data = data.groupby(["Country"]).sum()
    data = data.reset_index()

    return px.bar(
        data,
        x= "Country",
        y= "SalaryUSD"
    )
   


if __name__ == '__main__':
    app.run_server(mode="inline", debug=False)