In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px
import plotly.graph_objects as go
from plotly import subplots

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
kaggle_filepath = '../input/kaggle-survey-2020/kaggle_survey_2020_responses.csv'
continents_filepath = '../input/continent/Continent.csv'

kaggle_df = pd.read_csv(kaggle_filepath, error_bad_lines=False, index_col=False, dtype='unicode')
continent_df = pd.read_csv(continents_filepath)

In [None]:
kaggle_df.head()

In [None]:
# Remove row [0]
kaggle_df.drop(kaggle_df.index[0], inplace=True)

Lets take a look about countries

In [None]:
# Convert some country names
convert_country_name = {'United States of America': 'United States',
                       'United Kingdom of Great Britain and Northern Ireland': 'United Kingdom',
                       'Iran, Islamic Republic of...': 'Iran',
                       'Viet Nam': 'Vietnam'
                      }
kaggle_df['Q3'] = kaggle_df['Q3'].replace(convert_country_name)
# Add Continents 
continents_dictionary = {country: continent for country, continent in zip(continent_df['Country'], continent_df['Continent'])}
kaggle_df['Continent'] = kaggle_df['Q3'].map(continents_dictionary)

In [None]:
kaggle_df.head()

# Demography

In [None]:
# Lets see by Continent, Country, Sex and Age just click in it
# Sunburts graphic with plotly express
fig_sunburst = px.sunburst(kaggle_df,
                           path=['Continent', 'Q3', 'Q2', 'Q1'],#[Continent, Country, Sex, Age]
                           template='seaborn',
                           maxdepth=2,
                           title='Demography')
fig_sunburst.update_traces(textinfo='label+percent entry')# Shows % in graph
fig_sunburst.update_layout(margin=dict(t=25, l=0, r=0, b=0))

In [None]:
continents = ['Africa', 'Asia', 'Europe', 'North America', 'Oceania', 'South America']
for continent in continents:
    df_ages = kaggle_df[kaggle_df['Continent'] == continent]
    unique_ages, count_by_age = np.unique(df_ages['Q1'], return_counts=True)
    fig_bar = px.bar(x=unique_ages, y=count_by_age, labels={'x': 'Age', 'y': 'Users'}, title='Users in {} by age'.format(continent))
    fig_bar.show()

# Graphic Libraries

In [None]:
Q14_ans = []
Q14_col = []
for col in kaggle_df.columns:
    if 'Q14' in col:
        answer = kaggle_df[col].unique()
        Q14_col.append(col) 
        Q14_ans.append(answer)

Q14_r=[]
for i in Q14_ans:
    for j in i:
        if type(j) is str:
            Q14_r.append(j)
            
count_Q14 = kaggle_df[Q14_col].count()
fig_Q14 = px.bar(x=Q14_r, y=count_Q14, labels={'x':'Graphic Library', 'y':'Users'}, title='Q_14_Graphic_Libraries', template='seaborn')
fig_Q14.show()

# Machine Learning Frameworks

In [None]:
Q16_ans =[]
Q16_col = []
for col in kaggle_df.columns:
    if 'Q16' in col:
        answer = kaggle_df[col].unique()
        Q16_col.append(col)
        Q16_ans.append(answer)
Q16_r=[]
for i in Q16_ans:
    for j in i:
        if type(j) is str:
            Q16_r.append(j)
count_Q16 = kaggle_df[Q16_col].count()
px.bar(x=Q16_r, y=count_Q16, labels={'x': 'ML Framework', 'y': 'Users'}, title='Q16_Machine_Learning_Frameworks', template='gridon')

# Job positions and education

In [None]:
x_q5 = kaggle_df['Q5'].unique()
df_position_educations = pd.DataFrame()
for position in x_q5:
    df_position_educations[position] = kaggle_df[kaggle_df['Q5']== position]['Q4'].value_counts()

# Lets see with stacked bar

fig_bar_2 = go.Figure(data=[
    go.Bar(
            x=df_position_educations.loc['Doctoral degree'],
            y=df_position_educations.columns,
            orientation='h',
            name='Doctoral degree'),
    go.Bar(
            x=df_position_educations.loc["Bachelor’s degree"],
            y=df_position_educations.columns,
            orientation='h',
            name="Bachelor's degree"),
    go.Bar(
            x=df_position_educations.loc["Some college/university study without earning a bachelor’s degree"],
            y=df_position_educations.columns,
            orientation='h',
            name='Study without degree'),
    go.Bar(
            x=df_position_educations.loc["Master’s degree"],
            y=df_position_educations.columns,
            orientation='h',
            name="Master's degree"),
    go.Bar(
            x=df_position_educations.loc["Professional degree"],
            y=df_position_educations.columns,
            orientation='h',
            name="Professional degree")
])
fig_bar_2.update_layout(barmode='stack',
                       margin=dict(t=50, l=50, r=0, b=100),                       
                       template='ggplot2',
                       legend = dict(orientation='h',yanchor='top',xanchor='center',font=dict(size= 12), traceorder='normal'),
                       title='Job positions by degree')
fig_bar_2.show()