In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns 

import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# Load Dataset

In [None]:
df = pd.read_csv('Data_Professional_Salary_Survey_Responses.csv')
df.rename(columns={' SalaryUSD ': 'SalaryUSD'}, inplace=True)
df

# Show the Count of each Columns 

In [None]:
df.isna().sum()

# Data Cleaning:

#### Replacing missing values with np.nan

In [None]:
missing_val = ['Not Asked']
df.replace(missing_val, np.nan, inplace = True)

#### Count of Nan's after replacing

In [None]:
df.isna().sum()

#### Remove columns that contains NaN more than 30%

In [None]:
df=df.drop(['PostalCode', 'HowManyCompanies', 'CompanyEmployeesOverall', 'Education', 'EducationIsComputerRelated', 
'Certifications', 'HoursWorkedPerWeek', 'TelecommuteDaysPerWeek', 'NewestVersionInProduction', 'OldestVersionInProduction',
'PopulationOfLargestCityWithin20Miles', 'OtherJobDuties', 'KindsOfTasksPerformed', 'LookingForAnotherJob'], axis = 1)

In [None]:
df.isna().sum()

#### Fill nulls values

In [None]:
df['OtherDatabases'] = df['OtherDatabases'].fillna(df['OtherDatabases'].mode()[0])
df['DatabaseServers'] = df['DatabaseServers'].fillna(df['DatabaseServers'].mode()[0])
df['CareerPlansThisYear'] = df['CareerPlansThisYear'].fillna(df['CareerPlansThisYear'].mode()[0])
# We can't fill the Gender by mode. We assume that it is Unknown
df['Gender'] = df['Gender'].fillna('Unknown')
df['Gender']= df['Gender'].replace(['None'],'Unknown')

#### Replace outliers by nulls

In [None]:
print(df['YearsWithThisTypeOfJob'].unique())
df.loc[df['YearsWithThisTypeOfJob'] > 45]= np.nan

#### Fill the replaced values by the mean

In [None]:
m= df['YearsWithThisTypeOfJob'].mean(skipna=True)
df['YearsWithThisTypeOfJob'] = df['YearsWithThisTypeOfJob'].fillna(round(m))

#### Convert to numeric

In [None]:
#convert to numeric
df['SalaryUSD']=df["SalaryUSD"].str.replace(",","").astype(float)
df['SalaryUSD']= pd.to_numeric(df["SalaryUSD"])
df.head()

# Country Selection Options List

In [None]:
countries = df[['Country']].groupby(['Country']).count()

# Load country list as option for multi select dropdown select
optionsCountry =[{'label': "Select All", 'value': -1}]
for i in range(len(countries.index)):
    optionsCountry.append({'label': countries.index[i], 'value': countries.index[i]})

countries

# Years Selection

In [None]:
years = df[['Survey Year']].groupby(['Survey Year']).count()
years.index = years.index.astype(int) # Convert type to int

# Load country list as option for multi select dropdown select
optionsYears =[{'label': "Select All", 'value': -1}]
for i in range(len(years.index)):
    optionsYears.append({'label': years.index[i], 'value': years.index[i]})

years

### Job Title and Gender selection lists

In [None]:
gender=df[['Gender']].groupby(['Gender']).count()


# Load gender list as option for dropdown select
optionsGender =[{'label': "Unknown", 'value': "Unknown"}]
for i in range(len(gender.index)-1):
    optionsGender.append({'label': gender.index[i], 'value': gender.index[i]})

optionsGender

In [None]:
jobTitle=df[['JobTitle']].groupby(['JobTitle']).count()


# Load gender list as option for dropdown select
optionsJobs =[{'label': "Developer: Business Intelligence (SSRS, PowerBI, etc)", 'value': "Developer: Business Intelligence (SSRS, PowerBI, etc)"}]
for i in range(1,len(jobTitle)):
    optionsJobs.append({'label': jobTitle.index[i], 'value': jobTitle.index[i]})

optionsJobs

# Bootstrap Dashboard App

In [None]:
import dash
import dash_bootstrap_components as dbc
#from dash.dependencies import Input, Output
from dash import Input, Output, dcc, html

app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

app.title = "Salary Survey"
server = app.server

# the style arguments for the sidebar. We use position:fixed and a fixed width
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "16rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
}

navbar = dbc.NavbarSimple(
    children=[
        dbc.NavItem(dbc.NavLink("Salary", href="/", active="exact")),
        dbc.NavItem(dbc.NavLink("Job Experience", href="/page-1", active="exact")),
        dbc.NavItem(dbc.NavLink("Primary Database", href="/page-2", active="exact")),
    ],
    brand="Data Professional Salary Survey Responses",
    brand_href="#",
    color="primary",
    dark=True,
)
# the styles for the main content position it to the right of the sidebar and
# add some padding.
CONTENT_STYLE = {
    "margin-left": "2rem",
    "margin-right": "2rem",
    "padding": "2rem 1rem",
}


#The controls of Salary page
controls_page0 = dbc.Card(
    [
         html.Label('Group By'),
            dcc.Dropdown(
            id="groupby",
            options= [{'label': 'Country', 'value': 'Country'},
                      {'label': 'Survey Year', 'value': 'Survey Year'},
                      {'label': 'Employment Sector', 'value': 'EmploymentSector'},
                      {'label': 'Employment Status', 'value': 'EmploymentStatus'},
                      {'label': 'Manage Staff', 'value': 'ManageStaff'},
                      {'label': 'Gender', 'value': 'Gender'},
                      {'label': 'Career Plans This Year', 'value': 'CareerPlansThisYear'}],
            value= 'Country',
        ),

        html.Br(),
        html.Label('Operation'),
        dcc.Dropdown(
            id="operation",
            options= [{'label': 'sum()', 'value': 'sum'},
                      {'label': 'count()', 'value': 'count'}],
            value= 'sum',
        ),

        html.Br(),
        html.Label('Country'),
        dcc.Dropdown(
            id="country",
            options= optionsCountry,
            value= -1,
            multi= True
        ),

        html.Br(),
        html.Label('Plot Type'),
        dbc.Card([dbc.RadioItems(
            id='plot_radio_items',
            value="1",
            options=[{
                'label': 'Bar',
                'value': '1'
            },
                {
                    'label': 'Line',
                    'value': '2'
                },
              
            ],
            
        )]),
    ],
    body=True,
)

#The controls of Job Experience page
controls_page1 = dbc.Card(
    [
    
                html.Label('Gender'),
                dbc.Card([
                  dcc.RadioItems(
                    id="gender-slider",
                    value='Female',
                    options=optionsGender,
                    labelStyle={'display': 'block'},
                        
                )]),

        
                html.Br(),
                html.Label('Job Title'),
                dcc.Dropdown(id="slct_job",
                            options=optionsJobs,
                            multi=False,
                            value='Manager',
                            optionHeight=60,
                            ),
                
                
    ],
    body=True,
)

#The controls of Primary Database page
controls_page2 = dbc.Card(
    [

            html.Label("Year"),
            dcc.Dropdown(
                    id='yrs', 
                    value= 2021, #it is the default value
                    options=optionsYears,
                ),
            html.Br(),
            html.Label("Country"),
             dcc.Dropdown(
                    id='c', 
                    clearable=True,
                    value= "United States", #it is the defaullt value 
                    options=optionsCountry,
                    ),
                
    ],
    body=True,
)

content = html.Div(id="page-content", style=CONTENT_STYLE)

app.layout = html.Div([dcc.Location(id="url"), navbar, content])


@app.callback(Output("page-content", "children"), [Input("url", "pathname")])
def render_page_content(pathname):
            ############# Salary ###############
    if pathname == "/":
        return [
            dbc.Row(
            [
                dbc.Col(
                    width=4,
                    children=dbc.Card(
                        [dbc.CardHeader("Controls"), dbc.CardBody(controls_page0),]
                    ),
                ),
                dbc.Col(
                    width=8,
                    children=dbc.Card(
                        [
                            dbc.CardHeader("The Visualization"),
                            dbc.CardBody(dcc.Graph(id="graphCountry"), style={"height": "100%"}),
                        ],
                        style={"height": "80vh"},
                    ),
                ),
            ],
            ),
           
             ]
            ############# Salary ###############

            ############# Job Experience ###############
    elif pathname == "/page-1":
        return  [
                dbc.Row(
            [
                dbc.Col(
                    width=4,
                    children=dbc.Card(
                        [dbc.CardHeader("Controls"), dbc.CardBody(controls_page1),]
                    ),
                ),
                dbc.Col(
                    width=8,
                    children=dbc.Card(
                        [
                            dbc.CardHeader("The Visualization"),
                            dbc.CardBody(dcc.Graph(id="Experience"), style={"height": "100%"}),
                        ],
                        style={"height": "80vh"},
                    ),
                ),
            ],
            ),
                
                ]
            ############ End Job Experience ##############

            ############# Primary Database ###############
    elif pathname == "/page-2":
        return [
            dbc.Row(
            [
                dbc.Col(
                    width=4,
                    children=dbc.Card(
                        [dbc.CardHeader("Controls"), dbc.CardBody(controls_page2),]
                    ),
                ),
                dbc.Col(
                    width=8,
                    children=dbc.Card(
                        [
                            dbc.CardHeader("The Visualization"),
                            dbc.CardBody(dcc.Graph(id="graph2"), style={"height": "100%"}),
                        ],
                        style={"height": "80vh"},
                    ),
                ),
            ],
            ),

        ]
         ############# End Primary Database ###############

    # If the user tries to reach a different page, return a 404 message
    return dbc.Jumbotron(
        [
            html.H1("404: Not found", className="text-danger"),
            html.Hr(),
            html.P(f"The pathname {pathname} was not recognised..."),
        ]
    )

############# Salary ###############
@app.callback(Output('graphCountry', 'figure'), [Input("country", "value"),Input("groupby", "value"),Input("operation", "value"),Input("plot_radio_items","value"),])
def update_figure(val_country, val_groupby, val_operation, plot_radio_items):
    data = df

    if (val_country != -1):
        data = data[data["Country"].isin(val_country)]

    if (val_operation == 'sum'):
        data = data.groupby([val_groupby]).sum()
    if (val_operation == 'count'):
        data = data.groupby([val_groupby]).count()

    data = data.reset_index()

    plot =0 
    if (plot_radio_items == '1' ):
        plot = px.bar(
                data,
                x= val_groupby,
                y= "SalaryUSD"
            )
    elif (plot_radio_items =='2' ):
        plot = px.line(data, x=val_groupby, y="SalaryUSD", markers=True)

    plot.update_layout(title={
        'text': "The Amount of Salary Based on "+val_groupby,
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
    return plot
############ End Salary ##############

############# Job Experince ###############
@app.callback(
    Output('Experience', 'figure'),

    [Input("gender-slider", "value"),
    Input("slct_job", "value"),]
)  
def update_Experince(slctdGender, slctdJob):
    data = df.copy()

    data = data[data["Gender"] == slctdGender]
    data = data[data["JobTitle"] == slctdJob]

    fig = px.box(data, y="YearsWithThisTypeOfJob", labels={"YearsWithThisTypeOfJob": "Years",},)

    fig.update_layout(title={
        'text': "Experience Years for a Specific Job Title",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

    return fig
############ End Job Experince ##############
    
############# Primary Database ###############
@app.callback(
    Output('graph2', 'figure'),
    [Input("yrs", "value"),
    Input("c","value")]
    
)
def update_figure(val,c):

    test = df[(df['Survey Year']==val) & (df['Country']==c)]
    value_counts = test['PrimaryDatabase'].value_counts()#value counts for primary database
    fig =px.bar(x=value_counts.index, y=value_counts,labels={"x":" Primary Databases","y":"No. of Used Database"})#bar graph
    fig.update_layout(
    title={
        'text': "The Most Used Databases",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

    return fig

############ End Primary Database ##############

if __name__ == "__main__":
    app.run_server(port=8888)
