In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('../input/kaggle-survey-2020/kaggle_survey_2020_responses.csv')
df.shape

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
# removing the top rows
df_fin = df.iloc[1:, :]

In [None]:
df_fin.head()

In [None]:
# get percent of null values in question
df_fin.isnull().sum() / df.shape[0]

In [None]:
Questions = {}
qnums = list(dict.fromkeys([i.split('_')[0] for i in df_fin.columns]))

qnums

In [None]:
# add data for each questions to key value pairs in dictionary
for i in qnums:
    if i in ['Q1', 'Q2', 'Q3']:
        Questions[i] = df_fin[i]
    else:
        Questions[i] = df_fin[[q for q in df_fin.columns if q.startswith(i)]]

In [None]:
df_fin.Q1

In [None]:
import plotly.express as px

fig = px.histogram(df_fin, x='Q1')
fig.show()

In [None]:
fig = px.density_heatmap(df_fin, x='Q1', y='Q6', category_orders={'Q1':['18-21','22-24', '25-29','30-34',
                                                                       '40-44', '45-49', '50-54', '55-59',
                                                                        '60-69', '70+'],'Q6':['I have never written code','<1 years', '1-2 years', '3-5 years', '5-10 years', '10-20 years', '20+ years']})
fig.show()

In [None]:
Questions['Q7']

In [None]:
Questions['Q7'].columns = list(Questions['Q7'].mode().values)
q7 = Questions['Q7'].count().reset_index()
q7.columns = ['language', 'Count']
q7 = q7.sort_values('Count', ascending=False)
fig = go.Figure([go.Bar(x=q7.language, y=q7.Count)])
fig.show()

In [None]:
# Create dictionary with role/ data key value pairs
Roles = {}
for i in df_fin.Q5.unique():
    Roles[i] = df_fin[df_fin.Q5==i]

In [None]:
Roles.keys()

In [None]:
Roles['Student']

In [None]:
#education level variation
edu = df_fin.Q4.value_counts()
edu

In [None]:
fig = go.Figure([go.Bar(x=edu.index, y=edu.values)])
fig.show()

In [None]:
#education for just data scientists
ds_edu = Roles['Data Scientist'].Q4.value_counts()
fig = go.Figure([go.Bar(x=ds_edu.index, y = ds_edu.values)])
fig.show()

In [None]:
#Plotting Graphs of different carrer tracks

fig = go.Figure()
fig.add_trace(go.Bar(x=edu.index, y=edu.values))

buttons = []

buttons.append(dict(method='restyle',
                    label = 'Data Scientist',
                     visible = True,
                      args = [{'y':[Roles['Data Scientist'].Q4.value_counts().values],
                              'x': [Roles['Data Scientist'].Q4.value_counts().index],
                                         'type':'bar'},[0]],
                   )
              )
buttons.append(dict(method = 'restyle',
                    label = 'Student',
                    visible = True,
                     args = [{'y':[Roles['Student'].Q4.value_counts().values],
                               'x': [Roles['Student'].Q4.value_counts().index], 
                                   'type': 'bar'},[0]]
                   )
              )
buttons.append(dict(method = 'restyle',
                    label = 'Data Analyst',
                    visible = True,
                     args = [{'y':[Roles['Data Analyst'].Q4.value_counts().values],
                               'x': [Roles['Data Analyst'].Q4.value_counts().index], 
                                   'type': 'bar'},[0]]
                   )
              )

updatemenu = []
your_menu = {}
updatemenu.append(your_menu)

updatemenu[0]['buttons'] = buttons
updatemenu[0]['direction']='down'
updatemenu[0]['showactive']=True

fig.update_layout(showlegend=False, updatemenus = updatemenu)
fig.show()

In [None]:
fig = go.Figure(layout=go.Layout(title= go.layout.Title(text="Comparing Education by Position")))
fig.add_trace(go.Bar(name= 'Role Selection', x= edu.index, y=(edu.values/ edu.values.sum())))

buttons = []
# add buttons for first series of bars  
for i in list(Roles.keys())[1:]:
    buttons.append(dict(method='restyle',
                        label= i,
                        visible=True,
                        args=[{'y':[Roles[i].Q4.value_counts().values/Roles[i].Q4.value_counts().values.sum()],
                               'x':[Roles[i].Q4.value_counts().index],
                               'type':'bar'}, [0]], # the [0] at the end lets us know they are for the first trace
                        )
                  )

fig.add_trace(go.Bar(name= 'All Data',x= edu.index, y=(edu.values/ edu.values.sum())))

buttons2 = []
# add buttons for second series of bars               
for i in list(Roles.keys())[1:]:
    buttons2.append(dict(method='restyle',
                        label= i,
                        visible=True,
                        args=[{'y':[Roles[i].Q4.value_counts().values/Roles[i].Q4.value_counts().values.sum()],
                               'x':[Roles[i].Q4.value_counts().index],
                               'type':'bar'}, [1]], # the [1] at the end lets us know they are for the first trace
                        )                        #literally figured that out by just experimenting 
                  )
# adjusted dropdown placement 
#found out updatemenus take a dictionary of buttons and allow you to format how the dropdowns look etc.
# https://plotly.com/python/dropdowns/
button_layer_1_height = 1.23
updatemenus = list([
    dict(buttons=buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top"),
    dict(buttons=buttons2,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.5,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top")])
    
fig.update_layout( updatemenus=updatemenus)
fig.update_xaxes(categoryorder= 'array', categoryarray= ["Doctoral degree",'Master’s degree','Bachelor’s degree','Some college/university study without earning a bachelor’s degree',"Professional degree","No formal education past high school","I prefer not to answer"])
fig.show()

In [None]:
fig = go.Figure(layout=go.Layout(title= go.layout.Title(text="Comparing Education by Position")))
#changed from role selection to selection 1
fig.add_trace(go.Bar(name= 'Selection 1', x= edu.index, y=(edu.values/ edu.values.sum())))

buttons = []

#added button for all data comparison
buttons.append(dict(method='restyle',
                        label= 'All Samples',
                        visible=True,
                        args=[{'y':[df_fin.Q4.value_counts().values/df_fin.Q4.value_counts().values.sum()],
                               'x':[df_fin.Q4.value_counts().index],
                               'type':'bar'}, [0]], # the [0] at the end lets us know they are for the first trace
                        )
                  )

for i in list(Roles.keys())[1:]:
    buttons.append(dict(method='restyle',
                        label= i,
                        visible=True,
                        args=[{'y':[Roles[i].Q4.value_counts().values/Roles[i].Q4.value_counts().values.sum()],
                               'x':[Roles[i].Q4.value_counts().index],
                               'type':'bar'}, [0]], # the [0] at the end lets us know they are for the first trace
                        )
                  )

fig.add_trace(go.Bar(name= 'Selection 2',x= edu.index, y=(edu.values/ edu.values.sum())))

buttons2 = []
buttons2.append(dict(method='restyle',
                        label= 'All Samples',
                        visible=True,
                        args=[{'y':[df_fin.Q4.value_counts().values/df_fin.Q4.value_counts().values.sum()],
                               'x':[df_fin.Q4.value_counts().index],
                               'type':'bar'}, [1]], # the [0] at the end lets us know they are for the first trace
                        )
                  )

for i in list(Roles.keys())[1:]:
    buttons2.append(dict(method='restyle',
                        label= i,
                        visible=True,
                        args=[{'y':[Roles[i].Q4.value_counts().values/Roles[i].Q4.value_counts().values.sum()],
                               'x':[Roles[i].Q4.value_counts().index],
                               'type':'bar'}, [1]], # the [1] at the end lets us know they are for the first trace
                        )                        #literally figured that out by just experimenting 
                  )
    button_layer_1_height = 1.23
updatemenus = list([
    dict(buttons=buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.11,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top"),
    dict(buttons=buttons2,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.71,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top")])
    
fig.update_layout( updatemenus=updatemenus)
#added annotations next to dropdowns 
fig.update_layout(
    annotations=[
        dict(text="Selection 1", x=0, xref="paper", y=1.15, yref="paper",
                             align="left", showarrow=False),
        dict(text="Selection 2", x=0.65, xref="paper", y=1.15,
             yref="paper", showarrow=False)
    ])
fig.update_xaxes(categoryorder= 'array', categoryarray= ["Doctoral degree",'Master’s degree','Bachelor’s degree','Some college/university study without earning a bachelor’s degree',"Professional degree","No formal education past high school","I prefer not to answer"])
fig.show()