In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt

import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
pio.templates.default = "none"
# import plotly.offline as py
# py.offline.init_notebook_mode()

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import warnings
warnings.filterwarnings("ignore")

In [None]:
df17= pd.read_csv("/kaggle/input/kaggle-survey-2017/multipleChoiceResponses.csv", encoding="ISO-8859-1")
df18= pd.read_csv("/kaggle/input/kaggle-survey-2018/multipleChoiceResponses.csv", )
df19= pd.read_csv("/kaggle/input/kaggle-survey-2019/multiple_choice_responses.csv", )
df20= pd.read_csv("/kaggle/input/kaggle-survey-2020/kaggle_survey_2020_responses.csv", )
df21= pd.read_csv("/kaggle/input/kaggle-survey-2021/kaggle_survey_2021_responses.csv", )

In [None]:
### Helper functions
## horizontal bar graphs 
def plotly_hBar(df, q, title, height=400,l=250,r=50,b=50,t=100,):   
    fig = px.histogram(df.iloc[1:], 
                       y=q,
                       orientation='h',
                       width=700,
                       height=height,
                       histnorm='percent',
                       color='region',
                       color_discrete_map={
                           "Asia": "gold", "World": "salmon"
                       },
                       opacity=0.6
                       )

    fig.update_layout(title=title, 
                      font_family="San Serif",
                      bargap=0.2,
                      barmode='group',
                      titlefont={'size': 28},
                      paper_bgcolor='#E6E6E6',
                      plot_bgcolor='#E6E6E6',                      
                      legend=dict(
                      orientation="v", 
                          y=1, 
                          yanchor="top", 
                          x=1.250, 
                          xanchor="right",)                 
                      ).update_yaxes(categoryorder='total ascending')
    fig.update_traces(marker_line_color='black',
                  marker_line_width=1.5)
    fig.update_layout(yaxis_title=None,yaxis_linewidth=2.5,
    autosize=False,
    margin=dict(
        l=l,
        r=r,
        b=b,
        t=t,
    ),
    )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()   
    

###################
## vertical bar graphs##########################
def plotly_vBar(df, q, title, l=50,r=50,b=50,t=100):   
    fig = px.histogram(df.iloc[1:],
                       x=q,
                       orientation='v',
                       width=700,
                       height=450,
                       histnorm='percent',
                       color='region',
                       color_discrete_map={
                           "Asia": "gold", "World": "salmon"
                       },
                       opacity=0.6
                       )

    fig.update_layout(title=title, 
                      font_family="San Serif",
                      bargap=0.2,
                      barmode='group',
                      titlefont={'size': 28},
                      paper_bgcolor='#E6E6E6',
                      plot_bgcolor='#E6E6E6',
                      legend=dict(
                      orientation="v", 
                          y=1, 
                          yanchor="top", 
                          x=1.250, 
                          xanchor="right",)                 
                      ).update_xaxes(categoryorder='total descending')
    fig.update_traces(marker_line_color='black',
                  marker_line_width=1.5)
    fig.update_layout(xaxis_title=None,xaxis_linewidth=2.5,
    autosize=False,
    margin=dict(
        l=l,
        r=r,
        b=b,
        t=t,
    ),
    )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()   

## head_count function copied from https://www.kaggle.com/ruchi798/kaggle-ml-ds-survey-analysis
def head_count(df, question_num, parts):
    questions = []
    questions = ['Q'+ str(question_num) +'_Part_'+ str(j) for j in range(1, parts)]
    questions.append('Q'+ str(question_num) + '_OTHER')
    
    categories = []
    values = []
    for i in questions:
        category = df[i].value_counts().index[0]
        val = df[i].value_counts()[0]
        
        categories.append(category)
        values.append(val)
       
    combined_df = pd.DataFrame()
    combined_df['Category'] = categories
    combined_df['Value'] = values
    
    #combined_df = combined_df.sort_values(['Value'],ascending=False)
    
    return combined_df

###########################################
## make a dataframe with percentages included 
def df_with_percentages(df, q, n, region):
    
    dff = head_count(df, q, n)
    perc = []

    for item  in dff.Value:          
        
        perc.append(np.round(item/len(df)*100, 2))

    data = pd.DataFrame()
    data['Percentage'] = perc
    df_perc = pd.concat([dff, data], axis=1)
    df_perc['Region'] = region
    
    return df_perc
    

###############################################################
## bargraphs for multipart questions 
def plot_barH_percent(df1, df2, title, l=150, r=50, b=50, t=100):
    fig = go.Figure()
    fig.add_trace(go.Bar(y=df1['Category'],
                         x=df1['Percentage'],
                         orientation= 'h',
                         name="World",
                         marker_color='salmon',
                         opacity =0.6
                        ),
                 )
    fig.add_trace(go.Bar(y=df2['Category'],
                         x=df2['Percentage'],
                         orientation= 'h',
                         name="Asia",
                         marker_color='gold',
                         opacity =0.6
                        ),
                 )
    fig.update_traces(marker_line_color='black',
                  marker_line_width=1.5)
    fig.update_layout(title=title, 
                  font_family="San Serif",
                  bargap=0.2,
                  barmode='group',
                  titlefont={'size': 28},
                  paper_bgcolor='#E6E6E6',
                  plot_bgcolor='#E6E6E6',
                  legend=dict(
                  orientation="v", 
                      y=1, 
                      yanchor="top", 
                      x=1.250, 
                      xanchor="right",)                 
                  ).update_yaxes(categoryorder='total ascending')

    fig.update_layout(xaxis_title='percentage',yaxis_linewidth=2.5,
    autosize=False,
    margin=dict(
        l=l, r=r, b=b, t=t,
    ),
    )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()     
    
    
###################################################   
def annotated_heatmap(df_w, df_a, title, width=850):
    x =list(df_w['Category'])

    y = ['World', 'Asia']

    z= [list(df_w['Percentage']),list(df_a['Percentage'])]

    fig = ff.create_annotated_heatmap(z, x=x, y=y, annotation_text=z, colorscale='orrd', xgap=3, ygap=3)

    fig.update_layout(title_text=title,
                      title_x=0.5,
                      titlefont={'size': 24, 'family':'San Serif'},
                      width=width, height=350,
                      xaxis_showgrid=False,
                      xaxis={'side': 'bottom'},
                      yaxis_showgrid=False,
                      yaxis_autorange='reversed',                   
                      paper_bgcolor='#E6E6E6',
                      )
    fig.show()
    
####################################################################   
def categorical_scatter(df1, df2, title, l=150, r=50, b=50, t=100):
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=df1['Category'],
                         x=df1['Percentage'],
                         orientation= 'h',
                         name="World",
                         mode='markers',
                         marker_color='salmon',
                         #opacity =0.6
                        ),
                 )
    fig.add_trace(go.Scatter(y=df2['Category'],
                         x=df2['Percentage'],
                         orientation= 'h',
                         name="Asia",
                         mode='markers',
                         marker_color='gold',
                         #opacity =0.6
                        ),
                 )
    fig.update_traces(marker_line_color='black',
                  marker_line_width=1.5)
    fig.update_layout(title=title, 
                  font_family="San Serif",
                  bargap=0.2,
                  barmode='group',
                  titlefont={'size': 28},
                  paper_bgcolor='#E6E6E6',
                  plot_bgcolor='#E6E6E6',
                  legend=dict(
                  orientation="v", 
                      y=1, 
                      yanchor="top", 
                      x=1.250, 
                      xanchor="right",)                 
                  ).update_yaxes(categoryorder='total ascending')

    fig.update_layout(xaxis_title='percentage',
    autosize=False,
    margin=dict(
        l=l, r=r, b=b, t=t,
    ),
    )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show() 
    
    
##############################################################################
def annotated_heatmap_Trans(df_w, df_a, title, width=850, height=750, l=150):
    x =list(df_w['Category'])
    #x = (np.array(x)).T

    y = ['World', 'Asia']
    #y = (np.array(y)).T

    z= [list(df_w['Percentage']),list(df_a['Percentage'])]
    z = (np.array(z)).T

    fig = ff.create_annotated_heatmap(z, x=y, y=x, annotation_text=z, colorscale='orrd', xgap=3, ygap=3)

    fig.update_layout(title_text=title,
                      title_x=0.5,
                      titlefont={'size': 24, 'family':'San Serif'},
                      width=width, height=height,
                      xaxis_showgrid=False,
                      xaxis={'side': 'top'},
                      yaxis_showgrid=False,
                      yaxis_autorange='reversed',                   
                      paper_bgcolor='#E6E6E6',
                      )
    fig.update_layout(xaxis_title=None,
    autosize=False,
    margin=dict(
        l=l,
        r=50,
        b=50,
        t=75,
    ),
    )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()
    
    
###############################################    
def head_count_suf(df, question_num, part, n):
    questions = []
    questions = ['Q'+ str(question_num)+str(part) +'_Part_'+ str(j) for j in range(1, n)]
    questions.append('Q'+ str(question_num) +str(part)+ '_OTHER')
    
    categories = []
    values = []
    for i in questions:
        category = df[i].value_counts().index[0]
        val = df[i].value_counts()[0]
        
        categories.append(category)
        values.append(val)
       
    combined_df = pd.DataFrame()
    combined_df['Category'] = categories
    combined_df['Value'] = values
    
    #combined_df = combined_df.sort_values(['Value'],ascending=False)
    
    return combined_df

def df_with_percentages_suf(df, q, part, n, region):
    
    dff = head_count_suf(df, q, part, n)
    perc = []

    for item  in dff.Value:          
        
        perc.append(np.round(item/len(df)*100, 2))

    data = pd.DataFrame()
    data['Percentage'] = perc
    df_perc = pd.concat([dff, data], axis=1)
    df_perc['Region'] = region
    
    return df_perc

In [None]:
# grouping african countries
africa17 = ['China','India', 'South Africa', 'Indonesia']
africa18= ['China','India', 'South Africa', 'Indonesia','Pakistan', 'Iran'] 
africa19 = ['China','India', 'South Africa', 'Indonesia','Pakistan', 'Iran','Saudi Arabia']
africa20 = ['China','India', 'South Africa', 'Indonesia','Pakistan', 'Iran','Saudi Arabia','South Korea']
africa21 = ['China','India', 'South Africa', 'Indonesia','Pakistan', 'Iran','Saudi Arabia','South Korea', 'Turkey']

africa = ['China','India', 'South Africa', 'Indonesia','Pakistan', 'Iran','Saudi Arabia','South Korea', 'Turkey', 'Nepal']
df21_africa = df21[df21['Q3'].isin(africa)]
df21_world = df21[~df21['Q3'].isin(africa )]
df21['region']=["Asia" if x in africa else "World" for x in df21['Q3']]

df20_africa = df20[df20['Q3'].isin(africa)]
df20_world = df20[~df20['Q3'].isin(africa )]
df20['region']=["Asia" if x in africa else "World" for x in df20['Q3']]

df19_africa = df19[df19['Q3'].isin(africa)]
df19_world = df19[~df19['Q3'].isin(africa)]
df19['region']=["Asia" if x in africa else "World" for x in df19['Q3']]

df18_africa = df18[df18['Q3'].isin(africa)]
df18_world = df18[~df18['Q3'].isin(africa)]
df18['region']=["Asia" if x in africa else "World" for x in df18['Q3']]

df17_africa = df17[df17['Country'].isin(africa)]
df17_world = df17[~df17['Country'].isin(africa )]
df17['region']=["Asia" if x in africa else "World" for x in df17['Country']]       

In [None]:

afro21 = len(df21_africa)
row21 = len(df21) - afro21

afro20 = len(df20_africa)
row20 = len(df20) - afro20

afro19 = len(df19_africa)
row19 = len(df19) - afro19

afro18 = len(df18_africa)
row18 = len(df18) - afro18

afro17 = len(df17_africa)
row17 = len(df17) - afro17


region = ['Asia', 'Rest of the World']
value = [afro21, row21]
percent =[afro21/(afro21 +row21)*100, row21/(afro21+row21)*100]

fig = go.Figure(data=[go.Bar(
            x=value, y=region,
            text=(np.round(percent,1)),
            textposition=['outside', 'inside'],
            texttemplate = ["<b style='color: #f'>%{text}%</b>"]*2,
            textfont=dict(  family="sans serif",
                            size=16,
                            color="black"),
            orientation='h',
            marker_color=['gold', 'salmon'],
            opacity=0.6,
                    )])
fig.update_traces(marker_line_color='black',
                  marker_line_width=2.5)
fig.update_layout(title='<b>Number of respondents: Asia vs Rest of the world (2021)<b>', 
                  font_family="San Serif",
                  yaxis_linewidth=2.5,
                  bargap=0.2,
                  barmode='group',
                  titlefont={'size': 24},
                  paper_bgcolor='#00FF00',
                  plot_bgcolor='#0000FF',                  
              
                  )
fig.update_layout(
#     xaxis = dict(
#       zeroline = False,
#       showline = False,
#       showticklabels = False,
#       gridwidth = 1
#    ),
    autosize=False,
    margin=dict(
        l=150,
        r=50,
        b=50,
        t=100,
    ),
    )

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [None]:

## historical data, all gender
year = ['2017', '2018', '2019', '2020', '2021']
value = [afro17, afro18, afro19, afro20, afro21]

percent =[
    afro17/(afro17 +row17)*100,
    afro18/(afro18 +row18)*100,
    afro19/(afro19 +row19)*100,
    afro20/(afro20 +row20)*100,
    afro21/(afro21 +row21)*100]  
   

color = 5* ['salmon'] 
color[4] = 'gold'
fig = go.Figure(data=[go.Bar(
            y=value, x=year,
            text=np.round(percent, 1), 
            textposition='outside',
            texttemplate = ["<b style='color: #f'>%{text}%</b>"]*5,
            textfont=dict(  family="sans serif",
                            size=16,
                            color="black"),
            orientation='v',
            marker_color= color, 
            opacity=0.6
                    )])
fig.update_traces(marker_line_color='black',
                  marker_line_width=2.5)
fig.update_layout(title='<b>The rise of Asia kagglers<b>', 
                  font_family="San Serif",
                  xaxis_linewidth=2.5,
                  bargap=0.2,
                  barmode='group',
                  titlefont={'size': 28},
                  paper_bgcolor='#008000',
                  plot_bgcolor='#FF00FF',                  
                  )
fig.update_layout(yaxis_title='Number of Respondents',xaxis_title='Year',
    autosize=False,
    margin=dict(
        l=100,
        r=50,
        b=50,
        t=70,
        pad=0,
    ),
    )

fig.show()

In [None]:
# code copied and adapted from: https://www.kaggle.com/datafan07/kaggle-survey-a-general-picture/notebook
def plotly_choroplethMap(locations,counts,title):
    data = [ dict(
            type = 'choropleth',
            locations = locations,
            z = counts,
            locationmode = 'country names',
            autocolorscale = True,
            marker = dict(
                line = dict(color = '#999000', width = 1.5)),
                colorbar = dict(autotick = True, legth = 3, len=0.75, title = 'Nr. of Participants')
                )
           ]
    layout = dict(
        title = title,
        titlefont={'size': 28, 'family': 'san serif'},
        width=750, 
        height=475,
        paper_bgcolor='#00FFFF',
        geo = dict(
            showframe = True,
            showcoastlines = True,
            #scope="africa",
            fitbounds="locations",
            ),
        
        annotations = [dict(
        x=0.55,
        y=1.1,
        xref='paper',
        yref='paper',
        text='<b> Central Asia - kaggle is missing you!',
            font=dict(family='san serif', size=20, color='#00FF00'),            
        showarrow = False
    )]
    )
    
    fig = dict(data=data, layout=layout)
    iplot(fig, validate=False, filename='world-map')
    
z = df21_africa['Q3'].value_counts()
plotly_choroplethMap(locations=z.index, counts=z.values,title= '<b> Asian Countries (2021 survey) <b>')

In [None]:
#### Heatmap
fig = go.Figure(data=go.Heatmap(
                   z=[[73, 59, 127, 66, 0 ,0, 0, 0, 0, 0, 0],
                    [208, 85, 147, 96, 74, 71, 0, 0, 0, 0],
                    [395, 114, 120, 122, 68, 123, 58, 0, 0, 0],
                    [476, 153, 141, 179, 99, 133, 0, 52, 0, 0],
                    [702, 248, 146, 482, 109, 140, 44, 99, 47, 43],                      
                     ],
                   x = ['China','India', 'South Africa', 'Indonesia','Pakistan', 'Iran','Saudi Arabia','South Korea', 'Turkey', 'Nepal']
,
                   y=['2017', '2018', '2019', '2020', '2021'],
                   hoverongaps = False,
                   opacity=1.0, xgap=2.5, ygap=2.5, colorscale='orrd'),
                   )
fig.update_layout(title='<b> More countries appearing on the picture <b>',
                  titlefont={'size': 24, 'family': "San Serif"},
                  height=400, width=700,
                  template='simple_white',
                  paper_bgcolor='#E6E6E6',
                  plot_bgcolor='#E6E6E6',
                  autosize=False,
                  margin=dict(l=50,r=50,b=50, t=150,
                             ),
                 )
annotations = []
annotations.append(dict(xref='paper', yref='paper',
                        x=-0.01, y=1.3,
                        text='> <b>China and India are rising quickly. China kagglers grew almost 10 folds since 2017!',
                             font=dict(family='Arial', size=12, color='#800000'),
                        showarrow=False))
annotations.append(dict(xref='paper', yref='paper',
                        x=-0.01, y=1.2,
                        text="> <b>Algeria<b> had a 'gap' year in 2020. Unusual.",
                             font=dict(family='Arial', size=12, color='#cc5500'),
                        showarrow=False))

fig.update_layout(annotations=annotations)
fig.show()

In [None]:
#### Pie-Chart

color =["seagreen",'red', 'gold', 'black',"salmon", "cyan", 'blue', "gold", 'white', 'purple'],

fig = go.Figure()

fig.add_trace(
    go.Pie(
        labels=list(df21_africa['Q3']),
        values=None,
        hole=.4,
        title=" Asia ('21)",
        titlefont={'color':None, 'size': 24},
        opacity=0.6,
        ),
       )
fig.update_traces(
    hoverinfo='label+value',
    textinfo='label+percent',
    textfont_size=12,
    marker=dict(
        colors=['orrd'],
        line=dict(color='#000000',
                  width=2)
        )
    )

fig.layout.update(title="<b>  Asia Countries In The 2021 Survey<b>",
                  titlefont={'color':None, 'size': 24, 'family': 'San-Serif'},
                  showlegend=False, 
                  height=600, 
                  width=600,
                  )
fig.show()


In [None]:
# age
title= "<b> Age Distribution <b>"
plotly_vBar(df21, "Q1", title=title, l=100)

#gender
title= "<b> Gender Distribution <b>"
plotly_hBar(df21, "Q2", title=title, l=200)

In [None]:
## historical data, women

df21_afrowomen = df21_africa[df21_africa['Q2'] == 'Woman']
df20_afrowomen = df20_africa[df20_africa['Q2'] == 'Woman']

df19_afrowomen = df19_africa[df19_africa['Q2'] == 'Female']
df18_afrowomen = df18_africa[df18_africa['Q1'] == 'Female']
df17_afrowomen = df17_africa[df17_africa['GenderSelect'] == 'Female']

afrowomen21 = len(df21_afrowomen)
afrowomen20 = len(df20_afrowomen)
afrowomen19 = len(df19_afrowomen)
afrowomen18 = len(df18_afrowomen)
afrowomen17 = len(df17_afrowomen)

    
year = ['2017', '2018', '2019', '2020', '2021']
value = [afrowomen17, afrowomen18, afrowomen19, afrowomen20, afrowomen21]

percent =[
    afrowomen17/len(df17_africa)*100,
    afrowomen18/len(df18_africa)*100,
    afrowomen19/len(df19_africa)*100,
    afrowomen20/len(df20_africa)*100,
    afrowomen21/len(df21_africa)*100,
    ]  

color = 5* ['#808000'] 
color[0] = 'gray'
fig = go.Figure(data=[go.Bar(
            y=value, x=year,
            text=np.round(percent, 1), 
            textposition='outside',
            texttemplate = ["<b style='color: #f'>%{text}%</b>"]*5,
            textfont=dict(  family="sans serif",
                            size=16,
                            color="black"),
            orientation='v',
            marker_color= color, 
            opacity=0.6
                    )])
fig.update_traces(marker_line_color='black',
                  marker_line_width=2.5)

fig.update_layout(title='<b>African women kagglers (2017-2021)<b>', 
                  font_family="San Serif",
                  bargap=0.2,
                  barmode='group',
                  titlefont={'size': 28},
                  template='simple_white',
                  paper_bgcolor='#E6E6E6',
                  plot_bgcolor='#E6E6E6',                  
                  )
fig.update_layout(yaxis_title='Number of Respondents',xaxis_title='Year',xaxis_linewidth=2.5,
    autosize=False,
    margin=dict(
        l=100,
        r=50,
        b=50,
        t=50,
        ),
    )

annotations = []
annotations.append(dict(xref='paper', yref='paper',
                        x=0.01, y=0.97,
                        text='<b>  <b>',
                             font=dict(family='Arial', size=12, color='#cc5500'),
                        showarrow=False))
annotations.append(dict(xref='paper', yref='paper',
                        x=0.01, y=.93,
                        text="<b> <b> ",
                             font=dict(family='Arial', size=12, color='#cc5500'),
                        showarrow=False))

fig.update_layout(annotations=annotations)
fig.show()


In [None]:
title= "<b>Highest level of formal education <b>"
plotly_hBar(df21, "Q4", title=title, l=350)


title= "<b>Current role/Job title <b>"
plotly_hBar(df21, "Q5", title=title, l=200)


title= "<b>Coding Experience <b>"
plotly_hBar(df21, "Q6", title=title, l=200)

In [None]:

# Q7, 
df_w = df_with_percentages(df21_world, 7, 13, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 7, 13, 'Asia').sort_values(['Value'],ascending=False)

title = '<b> Programing Language Preference <b>'
#plot_barH_percent(df_w, df_a, title)
annotated_heatmap(df_w, df_a, title)

In [None]:
# # Q9, 
# df_w = df_with_percentages(df21_world, 9, 13, 'World').sort_values(['Value'],ascending=False)
# df_a = df_with_percentages(df21_africa, 9, 13, 'Africa').sort_values(['Value'],ascending=False)

# title = "<b> Integrated Developments Environment (IDE's) usage <b>"
# plot_barH_percent(df_w, df_a, title, l=250)


df_w = df_with_percentages(df21_world, 9, 13, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 9, 13, 'Africa').sort_values(['Value'],ascending=False)

# merge Jupyter's together
df_w.loc[10] += df_w.loc[0]
df_a.loc[10] += df_a.loc[0]

# drop the duplicate
df_w.drop([0], inplace=True)
df_a.drop([0], inplace=True)

title = "<b> Integrated Developments Environment (IDE's) Usage <b>"
plot_barH_percent(df_w, df_a, title, l=350)

In [None]:
title= "<b>Programming Language Recommendation <b>"
plotly_hBar(df21, "Q8", title=title, l=150)

In [None]:
title= "<b>Machine Learning Experience <b>"
plotly_hBar(df21, "Q15", title=title)

In [None]:

# Q16, 
df_w = df_with_percentages(df21_world, 16, 18, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 16, 18, 'Africa').sort_values(['Value'],ascending=False)
title = "<b> Machine Learning Platform Usage <b>"
plot_barH_percent(df_w, df_a, title, l=150)

In [None]:
# Q17, 
df_w = df_with_percentages(df21_world, 17, 12, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 17, 12, 'Asia').sort_values(['Value'],ascending=False)

title = "<b> Machine Learning Algorithms Usage <b>"
plot_barH_percent(df_w, df_a, title, l=300)

In [None]:
# Q18, 
df_w = df_with_percentages(df21_world, 18, 7, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 18, 7, 'Asia').sort_values(['Value'],ascending=False)

title= "<b> Computer Vision Methods Used <b>"

annotated_heatmap_Trans(df_w, df_a, title, width=950, height=550, l=700)

# Q19, 
df_w = df_with_percentages(df21_world, 19, 6, 'World')
df_a = df_with_percentages(df21_africa, 19, 6, 'Africa')

title= "<b> Natural Language Processing Methods Used <b>"

annotated_heatmap_Trans(df_w, df_a, title, width=700, height=550, l=400)

In [None]:
# Q14, 12
df_w = df_with_percentages(df21_world, 14, 12, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 14, 12, 'Asia').sort_values(['Value'],ascending=False)

title = '<b> Data Visualizations Library<b>'
annotated_heatmap(df_w, df_a, title)

In [None]:
title= "<b> Computing platform usage <b>"
plotly_hBar(df21, "Q11", title=title, l=350)

In [None]:
title= "<b> Preferred Cloud Platform <b>"
#plotly_hBar(df21, "Q27", title=title, l=300)

#27_A, 12
df_w = df_with_percentages_suf(df21_world, 27, '_A', 12, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages_suf(df21_africa, 27, '_A', 12, 'Africa').sort_values(['Value'],ascending=False)

title = "<b> Frequently Used Cloud Computing Platforms<b>"
plot_barH_percent(df_w, df_a, title, l=250)

In [None]:
dfa = pd.DataFrame(df21_africa.iloc[1:]['Q25'].value_counts()).rename(columns={"Q25": "Asia"})
dfw = pd.DataFrame(df21_world.iloc[1:]['Q25'].value_counts()).rename(columns={"Q25": "World"})

DF = pd.concat([dfa, dfw], axis=1)
af_sum = DF['Asia'].sum()
rw_sum = DF['World'].sum()

DF['per_af'] = DF['Asia']/af_sum*100
DF['per_rw'] = DF['World']/rw_sum*100

order = ['$0-999', '1,000-1,999', '2,000-2,999',  '3,000-3,999','4,000-4,999','5,000-7,499',
         '7,500-9,999','10,000-14,999', '15,000-19,999','20,000-24,999', '25,000-29,999',
         '30,000-39,999', '40,000-49,999', '50,000-59,999','60,000-69,999','70,000-79,999', '80,000-89,999', '90,000-99,999',
       '100,000-124,999', '125,000-149,999','150,000-199,999', '200,000-249,999', '250,000-299,999', '300,000-499,999', 
       '$500,000-999,999','>$1,000,000',]

title = '<b> The Payment Pyramid <b>'


fig = go.Figure()
fig.add_trace(go.Bar(y=DF.index,
                     x=-DF['per_rw'],
                     orientation= 'h',
                     name="World",
                     marker_color='salmon',
                     opacity =0.6
                    ),
             )
fig.add_trace(go.Bar(y=DF.index,
                     x=DF['per_af'],
                     orientation= 'h',
                     name="Asia",
                     marker_color='gold',
                     opacity =0.6
                    ),
             )
fig.update_traces(marker_line_color='black',
              marker_line_width=1.5)
fig.update_layout(title=title,
                  height=700,
                  width=700,
              font_family="San Serif",
              bargap=0.2,
              barmode='group',
              titlefont={'size': 28},
              template='simple_white',
              paper_bgcolor='#E6E6E6',
              plot_bgcolor='#E6E6E6',
              legend=dict(
              orientation="v", 
                  y=1, 
                  yanchor="top", 
                  x=1.250, 
                  xanchor="right",)                 
              )
fig.add_shape(type="rect",
              xref="paper", yref="y",
              x0=0, x1=6, y0=-0.5, y1=6.5,
              fillcolor="lightseagreen",    
              opacity=0.2
              )  

fig.update_yaxes(categoryarray= order)
fig.update_layout(xaxis_title='percentage',
autosize=False,
margin=dict(
l=150, r=50, b=150, t=50,
))

                 
annotations = []
annotations.append(dict(xref='paper', yref='paper',
                        x=0.1, y=0.15,
                        text='<b> 42.7%',
                             font=dict(family='Arial', size=20, color='black'),
                        showarrow=False))
annotations.append(dict(xref='paper', yref='paper',
                        x=0.6, y=0.15,
                        text="<b> 77.5%",
                             font=dict(family='Arial', size=20, color='black'),
                        showarrow=False))

annotations.append(dict(xref='paper', yref='paper',
                        x=-0.2, y=-0.2,
                        text="<i>Note: This payment scale does not account for cost of living!",
                             font=dict(family='Arial', size=16, color='black'),
                        showarrow=False))

fig.update_layout(annotations=annotations)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()     

In [None]:
# Q40, 12
df_w = df_with_percentages(df21_world, 40, 12, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 40, 12, 'Asia').sort_values(['Value'],ascending=False)
title = "<b> Data Science Learning Platform<b>"
plot_barH_percent(df_w, df_a, title, l=350)

In [None]:
# Q42, 12, 
df_w = df_with_percentages(df21_world, 42, 12, 'World').sort_values(['Value'],ascending=False)
df_a = df_with_percentages(df21_africa, 42, 12, 'Asia').sort_values(['Value'],ascending=False)
    
title= "<b> Favourite Media Source for DS Topics  <b>"

annotated_heatmap_Trans(df_w, df_a, title, width=850, height=550, l=500)

Reference:
Reference:


[1]. https://www.kaggle.com/

[2]. https://www.kaggle.com/mhajabri/africai

[3]. https://www.worldometers.info/world-population/africa-population/

[4]. https://www.statista.com/statistics/1218173/life-expectancy-in-african-countries/

[4]. https://www.kaggle.com/desalegngeb/how-popular-is-kaggle-in-africa/notebook