In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In this notebook, we are looking into every question of the survey to help us get started with further analysis on it. So, if you are also getting started this might be a good starting point for you.

In this notebook, analysis is done for <b>Pakistan</b> but if you want to analyze trends in your country, feel free to change the code.

If you like this notebook or it helps you in any way, do remember to give an <b>upvote</b>.

### Importing data

In [None]:
df = pd.read_csv("../input/kaggle-survey-2021/kaggle_survey_2021_responses.csv", low_memory = False)

In [None]:
df.shape

In [None]:
df.head()

<img src="https://media.giphy.com/media/xUOxeYzt1Ei7AMnsFq/giphy.gif" width="750" align="center">

Learning from Kevin, we are going to do most of our talking using plots only. So, lets get started.

<b> Note </b>: We are using plotly for our analysis which provides interactive plots so feel free to play with it a little bit.


## Responses Analysis

As we are only gonna be focusing on responses from Pakistani Kagglers so, lets take an overview the total respondants that we'll be missing out on. 

In [None]:
lt = df.Q3.value_counts()[10::-1]
fig = px.histogram(lt, x=lt.index, y = lt.values,title='Top 10 Country with most responses')
fig.update_layout(
    xaxis_title="Responses",
    yaxis_title="Count"
)
fig.show()

In [None]:
def getCountries(df, country, column):
    countryData = df[df[column] == country]
    restOfTheWorld = df[df[column] != country]

    df["country"] = "Rest of the World"
    df["country"].iloc[countryData.index] = country
    
    return countryData, restOfTheWorld

If you want to analyze the data using some other country, change the second parameter of the getCountries function.

In [None]:
selectedCountry = "Pakistan"
country, restOfTheWorld = getCountries(df, selectedCountry, "Q3")

In [None]:
lt = df.country.value_counts()
percent =[len(restOfTheWorld)/len(df)*100,len(country)/len(restOfTheWorld)*100]
fig = px.bar(x=lt.index, y = lt.values,title='Propotion of ' +str(selectedCountry)+ ' responses compared to rest of the world',
                  text=(np.round(percent,2)))
fig.update_layout(
    xaxis_title="Country",
    yaxis_title="Responses count"
)
fig.show()

Pakistan do reside in the top 10 countries with most response but if we check its total contribution then it is just 2.08%. Has it improved from previous years? To validate that lets compare it with previous Kaggle ML and DS surveys.

In [None]:
df_2017 = pd.read_csv("../input/kaggle-survey-2017/multipleChoiceResponses.csv",encoding ="ISO-8859-1",low_memory=False)
df_2018 = pd.read_csv("../input/kaggle-survey-2018/multipleChoiceResponses.csv",low_memory=False)
df_2019 = pd.read_csv("../input/kaggle-survey-2019/multiple_choice_responses.csv",low_memory=False)
df_2020 = pd.read_csv("../input/kaggle-survey-2020/kaggle_survey_2020_responses.csv",low_memory=False)

In [None]:
country_2017, restOfTheWorld_2017 = getCountries(df_2017, selectedCountry, "Country")
country_2018, restOfTheWorld_2018 = getCountries(df_2018, selectedCountry, "Q3")
country_2019, restOfTheWorld_2019 = getCountries(df_2019, selectedCountry, "Q3")
country_2020, restOfTheWorld_2020 = getCountries(df_2020, selectedCountry, "Q3")

responses = pd.DataFrame()
responses["year"]=[2017,2018,2019,2020,2021]
responses["country"]=[len(country_2017),len(country_2018),len(country_2019),len(country_2020),len(country)]
responses["total"]=[len(restOfTheWorld_2017), len(restOfTheWorld_2018), len(restOfTheWorld_2019), len(restOfTheWorld_2020), len(restOfTheWorld)]
responses["percent"]=[len(country_2017)/len(restOfTheWorld_2017)*100, len(country_2018)/len(restOfTheWorld_2018)*100, len(country_2019)/len(restOfTheWorld_2019)*100, len(country_2020)/len(restOfTheWorld_2020)*100, len(country)/len(restOfTheWorld)*100]
responses["percentRest"]=[len(restOfTheWorld_2017)/len(df_2017)*100, len(restOfTheWorld_2018)/len(df_2018)*100, len(restOfTheWorld_2019)/len(df_2019)*100, len(restOfTheWorld_2020)/len(df_2020)*100, len(restOfTheWorld)/len(df)*100]

In [None]:
responses

In [None]:
fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=responses.year, y=responses.country,text=(np.round(responses.percent,2)),textposition='outside'),
    go.Bar(name='Rest of the world', x=responses.year, y=responses.total,text=(np.round(responses.percentRest,2)),textposition='outside')
])
fig.update_layout(barmode='group',title=str(selectedCountry) + " responses throughout the years")
fig.show()

We can see a little bit of improvement but not to a great extent. So, let's hope for next year.

## General Information

#### Age

In [None]:
c_age = country.Q1.value_counts()
rest_age = restOfTheWorld[1:].Q1.value_counts()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=c_age.index, values=c_age.values, name=selectedCountry),
              1, 1)
fig.add_trace(go.Pie(labels=rest_age.index, values=rest_age.values, name="Rest of the World"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name+value")

fig.update_layout(
    title_text="Age groups of Kagglers (" + str(selectedCountry) + " vs Rest of the World)",
    annotations=[dict(text=selectedCountry, x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Rest', x=0.81, y=0.5, font_size=20, showarrow=False)])
fig.show()

Most of the Kagglers in Pakistan are youngsters between 18-30 years of age. And none of the responses is from an individual who is above 70 years of age.

#### Employment status

In [None]:
emp = country.Q5.value_counts(normalize=True)
rest_emp = restOfTheWorld[1:].Q5.value_counts(normalize=True)
fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=emp.index, y=emp.values),
    go.Bar(name='Rest of the world', x=rest_emp.index, y=rest_emp.values)])
fig.update_layout(barmode='group',title="Employment Status")
fig.show()

We are using propotion instead of counts because comparing only the count of something won't give us a lot of information as there is a huge difference between number of response from Pakistan and from the rest of the world.

We can see that most of the Kagglers from Pakistan are Students. Which is basically the same as comparing with rest of the world.

#### Education

In [None]:
edu = country.Q4.value_counts(normalize=True)
rest_edu = restOfTheWorld[1:].Q4.value_counts(normalize=True)
fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=edu.index, y=edu.values),
    go.Bar(name='Rest of the world', x=rest_edu.index, y=rest_edu.values)])
fig.update_layout(barmode='group',title="Education")
fig.show()

Around the world, most of the Kagglers so have a Masters degree but in Pakistan, for most Kagglers the highest level of degree is Bachelors.

#### Experience

In [None]:
exp = country.Q6.value_counts(normalize=True)
rest_exp = restOfTheWorld[1:].Q6.value_counts(normalize=True)
fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=exp.index, y=exp.values),
    go.Bar(name='Rest of the world', x=rest_exp.index, y=rest_exp.values)])
fig.update_layout(barmode='group',title="Years of Experience")
fig.show()

Surprisingly there are more people in Pakistan who have never written a code in there life then the individuals which have more then 5 years of experience.

## Technical 

In [None]:
def getMultipleChoice(data,question_number,choices,part=None):
    """
    Creates a single dataframe for multiple choice questions as they are spread in multiple columns.

    Parameters
    ----------
    data : dataframe
    question_number : int
        The question number we want to extract
    choice: int
        Number of choices the question had
    part: str
        applicable if it has subparts

    Returns
    -------
    dataframe
        Dataframe with category, values, and their percentage
    """ 
    
    questions = []
    if(part==None):
        questions = ['Q'+ str(question_number) +'_Part_'+ str(i) for i in range(1, choices)]
        questions.append('Q'+ str(question_number) + '_OTHER')
    else:
        questions = ['Q'+ str(question_number) +'_' + str(part) +'_Part_' + str(i) for i in range(1, choices)]    
        questions.append('Q'+ str(question_number) +'_' + str(part) + '_OTHER')
    
    category = []
    values = []
    for i in questions:
        try:
            cat = data[i].value_counts().index[0]
        except:
            continue
        try:
            val = data[i].value_counts().values[0]
        except:
            continue
        
        category.append(cat)
        values.append(val)
       
    single_df = pd.DataFrame()
    single_df['category'] = category
    single_df['value'] = values
    
    single_df = single_df.sort_values(['value'],ascending=False)
    
    single_df['percent'] = (single_df['value'] / single_df['value'].sum()) * 100
    
    return single_df

### Languages

In [None]:
rest_languages = getMultipleChoice(restOfTheWorld[1:],7,12)
languages = getMultipleChoice(country,7,12)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=languages.category, y=languages.percent),
    go.Bar(name='Rest of the world', x=rest_languages.category, y=rest_languages.percent)])
fig.update_layout(barmode='group',title="Commonly used languages")
fig.show()

As expected, python is the most favourite language but R is not as popular as complated to rest of the world.

In [None]:
rec = country.Q8.value_counts()
rest_rec = restOfTheWorld[1:].Q8.value_counts()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=rec.index, values=rec.values, name=selectedCountry),
              1, 1)
fig.add_trace(go.Pie(labels=rest_rec.index, values=rest_rec.values, name="Rest of the World"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name+value")

fig.update_layout(
    title_text="Recommendation by language",
    annotations=[dict(text=selectedCountry, x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Rest', x=0.81, y=0.5, font_size=20, showarrow=False)])
fig.show()

People from all over the world are recommending python so if you don't know it yet, this might be a good time to gain its understanding. Also it seems like Bash is equally hated throughout the World.

### Development tools

In [None]:
rest_languages = getMultipleChoice(restOfTheWorld[1:],9,12)
languages = getMultipleChoice(country,9,12)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=languages.category, y=languages.percent),
    go.Bar(name='Rest of the world', x=rest_languages.category, y=rest_languages.percent)])
fig.update_layout(barmode='group',title="Integrated development tool")
fig.show()

In [None]:
rest_languages = getMultipleChoice(restOfTheWorld[1:],10,16)
languages = getMultipleChoice(country,10,16)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=languages.category, y=languages.percent),
    go.Bar(name='Rest of the world', x=rest_languages.category, y=rest_languages.percent)])
fig.update_layout(barmode='group',title="Hosted notebook environment")
fig.show()

Colab is most popular so it means that people love free things. Whereas more people are using Kaggle notebooks rather then colab then it means that they are unaware that Colab is free ;-). 

In [None]:
comp = country.Q11.value_counts()
rest_comp = restOfTheWorld[1:].Q11.value_counts()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=comp.index, values=comp.values, name=selectedCountry),
              1, 1)
fig.add_trace(go.Pie(labels=rest_comp.index, values=rest_comp.values, name="Rest of the World"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name+value")

fig.update_layout(
    title_text="Computing platform",
    annotations=[dict(text=selectedCountry, x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Rest', x=0.81, y=0.5, font_size=20, showarrow=False)],
legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))
fig.show()

In [None]:
rest_hardware = getMultipleChoice(restOfTheWorld[1:],12,5)
hardware = getMultipleChoice(country,12,5)

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'polar'}, {'type':'polar'}]])
fig.add_trace(go.Scatterpolar(
                     r=rest_hardware.value,
                     theta=rest_hardware.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r= hardware.value,
                     theta= hardware.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=1, col=2)

fig.update_layout(
    title_text="Specialized Hardware")
fig.show()

In [None]:
tpu = country.Q13.value_counts()
rest_tpu = restOfTheWorld[1:].Q13.value_counts()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=tpu.index, values=tpu.values, name=selectedCountry),
              1, 1)
fig.add_trace(go.Pie(labels=rest_tpu.index, values=rest_tpu.values, name="Rest of the World"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name+value")

fig.update_layout(
    title_text="Tensor Processing Unit Usage",
    annotations=[dict(text=selectedCountry, x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Rest', x=0.81, y=0.5, font_size=20, showarrow=False)])
fig.show()

## Libraries & Algorithms

In [None]:
rest_lib = getMultipleChoice(restOfTheWorld[1:],14,11)
lib = getMultipleChoice(country,14,11)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=lib.category, y=lib.percent),
    go.Bar(name='Rest of the world', x=rest_lib.category, y=rest_lib.percent)])
fig.update_layout(barmode='group',title="Visualization Libraries")
fig.show()

We can assume one more vote for Plotly because when I was filling in the survey, I didn't know Plotly.

In [None]:
ml = country.Q15.value_counts(normalize=True)
rest_ml = restOfTheWorld[1:].Q15.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=ml.index, y=ml.values),
    go.Bar(name='Rest of the world', x=rest_ml.index, y=rest_ml.values)])
fig.update_layout(barmode='group',title = "ML experience")
fig.show()

In Pakistan, there are more people who don't use ML models then the people who have more then 1 year of experience in ML.

In [None]:
rest_ml_lib = getMultipleChoice(restOfTheWorld[1:],16,17)
ml_lib = getMultipleChoice(country,16,17)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=ml_lib.category, y=ml_lib.percent),
    go.Bar(name='Rest of the world', x=rest_ml_lib.category, y=rest_ml_lib.percent)])
fig.update_layout(barmode='group',title="ML Libraries")
fig.show()

In [None]:
rest_ml_algo = getMultipleChoice(restOfTheWorld[1:],17,11)
ml_algo = getMultipleChoice(country,17,11)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=ml_algo.category, x=ml_algo.percent, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_ml_algo.category, x=rest_ml_algo.percent, orientation='h')])
fig.update_layout(barmode='group',title="ML Algorithms")
fig.show()

<img src="https://i.ibb.co/Vtp6z27/meme.jpg" align="center">

In [None]:
rest_cv_algo = getMultipleChoice(restOfTheWorld[1:],18,6)
cv_algo = getMultipleChoice(country,18,6)

rest_cv_cat = rest_cv_algo.category.str.split('(')
rest_cv_cat = [i[0] for i in rest_cv_cat]
cv_cat = cv_algo.category.str.split('(')
cv_cat = [i[0] for i in cv_cat]

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=cv_cat, x=cv_algo.percent, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_cv_cat, x=rest_cv_algo.percent, orientation='h')])
fig.update_layout(barmode='group',title="CV Algorithms")
fig.show()

In [None]:
rest_nlp_algo = getMultipleChoice(restOfTheWorld[1:],19,5)
nlp_algo = getMultipleChoice(country,19,5)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=nlp_algo.category, x=nlp_algo.percent, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_nlp_algo.category, x=rest_nlp_algo.percent, orientation='h')])
fig.update_layout(barmode='group',title="NLP Algorithms")
fig.show()

In [None]:
emp = country.Q20.value_counts(normalize=True)
rest_emp = restOfTheWorld[1:].Q20.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=emp.index, x=emp.values, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_emp.index, x=rest_emp.values, orientation='h')])
fig.update_layout(barmode='group',title = "Current Employer")
fig.show()

In [None]:
size = country.Q21.value_counts(normalize=True)
rest_size = restOfTheWorld[1:].Q21.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=size.index, x=size.values, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_size.index, x=rest_size.values, orientation='h')])
fig.update_layout(barmode='group',title = "Size of company")
fig.show()

In [None]:
size = country.Q21.value_counts(normalize=True)
rest_size = restOfTheWorld[1:].Q21.value_counts(normalize=True)

fig = make_subplots(rows=1, cols=2,specs=[[{'type': 'polar'},{"type": "polar"}]])  
    
fig.add_trace(go.Scatterpolar(
                     r=rest_size.values,
                     theta=rest_size.index,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r=size.values,
                     theta=size.index,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=1, col=2)


fig.update_layout(title_text="Cloud Platform")

In [None]:
work = country.Q22.value_counts(normalize=True)
rest_work = restOfTheWorld[1:].Q22.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=work.index, x=work.values, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_work.index, x=rest_work.values, orientation='h')])
fig.update_layout(barmode='group',title = "Size of company")
fig.show()

In [None]:
use = country.Q23.value_counts(normalize=True)
rest_use = restOfTheWorld[1:].Q23.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=use.index, y=use.values),
    go.Bar(name='Rest of the world', x=rest_use.index, y=rest_use.values)])
fig.update_layout(height=800, width=1200, barmode='group',title = "Use of ML in company")
fig.show()

In [None]:
rest_role = getMultipleChoice(restOfTheWorld[1:],24,7)
role = getMultipleChoice(country,24,7)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=role.category, y=role.percent),
    go.Bar(name='Rest of the world', x=rest_role.category, y=rest_role.percent)])
fig.update_layout(height=800, width=1200, barmode='group',title="Role in company")
fig.show()

In [None]:
salary = country.Q25.value_counts(normalize=True)
rest_salary = restOfTheWorld[1:].Q25.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=salary.index, y=salary.values),
    go.Bar(name='Rest of the world', x=rest_salary.index, y=rest_salary.values)])
fig.update_layout(barmode='group',title = "Salary")
fig.show()

In [None]:
spent = country.Q26.value_counts(normalize=True)
spent_index = ["$0","$1-99","$100-999","$1000-9,999","$10,000-99,999","$100,000"]
rest_spent = restOfTheWorld[1:].Q26.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=spent_index, y=spent.values),
    go.Bar(name='Rest of the world', x=spent_index, y=rest_spent.values)])
fig.update_layout(barmode='group',title = "Spent on ML")
fig.show()

## Cloud

In [None]:
rest_cloud = getMultipleChoice(restOfTheWorld[1:],27,11,'A')
cloud = getMultipleChoice(country,27,11,'A')

fig = make_subplots(rows=2, cols=1,specs=[[{'type': 'polar'}], [{"type": "polar"}]])  
    
fig.add_trace(go.Scatterpolar(
                     r=rest_cloud.value,
                     theta=rest_cloud.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r=cloud.value,
                     theta=cloud.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=2, col=1)


fig.update_layout(height=1000, width=800, title_text="Cloud Platform")

In [None]:
enj = country.Q28.value_counts()
rest_enj = restOfTheWorld[1:].Q28.value_counts()

fig = make_subplots(rows=2, cols=1,specs=[[{'type': 'polar'}], [{"type": "polar"}]])  
    
fig.add_trace(go.Scatterpolar(
                     r=rest_enj.values,
                     theta=rest_enj.index,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r=enj.values,
                     theta=enj.index,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=2, col=1)


fig.update_layout(height=1000, width=800, title_text="Enjoyable Cloud Platform")

In [None]:
rest_cloud_use = getMultipleChoice(restOfTheWorld[1:],29,4,'A')
cloud_use = getMultipleChoice(country,29,4,'A')

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=cloud_use.category, x=cloud_use.percent, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_cloud_use.category, x=rest_cloud_use.percent, orientation='h')])
fig.update_layout(barmode='group',title="Regular Cloud usage")
fig.show()

In [None]:
rest_cloud_sto = getMultipleChoice(restOfTheWorld[1:],30,7,'A')
cloud_sto = getMultipleChoice(country,30,7,'A')

fig = make_subplots(rows=2, cols=1,specs=[[{'type': 'polar'}], [{"type": "polar"}]])  
    
fig.add_trace(go.Scatterpolar(
                     r=rest_cloud_sto.value,
                     theta=rest_cloud_sto.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r=cloud_sto.value,
                     theta=cloud_sto.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=2, col=1)


fig.update_layout(height=1000, width=800, title_text="Cloud Storage Use")

In [None]:
rest_cloud_ml = getMultipleChoice(restOfTheWorld[1:],31,9,'A')
cloud_ml = getMultipleChoice(country,31,9,'A')

fig = make_subplots(rows=2, cols=1,specs=[[{'type': 'polar'}], [{"type": "polar"}]])  
    
fig.add_trace(go.Scatterpolar(
                     r=rest_cloud_ml.value,
                     theta=rest_cloud_ml.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r=cloud_ml.value,
                     theta=cloud_ml.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=2, col=1)


fig.update_layout(height=1000, width=800, title_text="Machine learning Products Usage")

In [None]:
rest_bigd_p = getMultipleChoice(restOfTheWorld[1:],32,20,'A')
bigd_p = getMultipleChoice(country,32,20,'A')

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=bigd_p.category, x=bigd_p.percent, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_bigd_p.category, x=rest_bigd_p.percent, orientation='h')])
fig.update_layout(barmode='group',title="Regularly used Big Data Products")
fig.show()

In [None]:
pro = country.Q33.value_counts(normalize=True)
rest_pro = restOfTheWorld[1:].Q33.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=pro.index, x=pro.values, orientation='h'),
    go.Bar(name='Rest of the world', y=rest_pro.index, x=rest_pro.values, orientation='h')])
fig.update_layout(barmode='group',title = "Most often used Big data product")
fig.show()

In [None]:
rest = getMultipleChoice(restOfTheWorld[1:],34,16,'A')
bi = getMultipleChoice(country,34,16,'A')

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=bi.category, x=bi.percent, orientation='h'),
    go.Bar(name='Rest of the world', y=rest.category, x=rest.percent, orientation='h')])
fig.update_layout(barmode='group',title="Regularly used Business intelligence tools")
fig.show()

In [None]:
mostUsedBi = country.Q35.value_counts(normalize=True)
rest = restOfTheWorld[1:].Q35.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=mostUsedBi.index, x=mostUsedBi.values, orientation='h'),
    go.Bar(name='Rest of the world', y=rest.index, x=rest.values, orientation='h')])
fig.update_layout(barmode='group',title = "Most often used Business intelligence tools")
fig.show()

In [None]:
rest = getMultipleChoice(restOfTheWorld[1:],36,7,'A')
mle = getMultipleChoice(country,36,7,'A')

fig = make_subplots(rows=2, cols=1,specs=[[{'type': 'polar'}], [{"type": "polar"}]])  
    
fig.add_trace(go.Scatterpolar(
                     r=rest.value,
                     theta=rest.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r=mle.value,
                     theta=mle.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=2, col=1)


fig.update_layout(height=1000, width=800, title_text="Tools for Machine Learning experiments")

In [None]:
rest = getMultipleChoice(restOfTheWorld[1:],37,7,'A')
aml = getMultipleChoice(country,37,7,'A')

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, x=aml.category, y=aml.percent),
    go.Bar(name='Rest of the world', x=rest.category, y=rest.percent)])
fig.update_layout(height=800, width=1000, barmode='group',title="Automated ML tools")
fig.show()

In [None]:
rest = getMultipleChoice(restOfTheWorld[1:],38,11,'A')
amlt = getMultipleChoice(country,38,11,'A')

fig = make_subplots(rows=2, cols=1,specs=[[{'type': 'polar'}], [{"type": "polar"}]])  
    
fig.add_trace(go.Scatterpolar(
                     r=rest.value,
                     theta=rest.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name='Rest of the world'),
                     row=1, col=1)

fig.add_trace(go.Scatterpolar(
                     r=amlt.value,
                     theta=amlt.category,
                     fill='toself',
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     name=selectedCountry),
                     row=2, col=1)


fig.update_layout(height=1000, width=800, title_text="Automated machine learning tools")

In [None]:
rest = getMultipleChoice(restOfTheWorld[1:],39,9)
pk = getMultipleChoice(country,39,9)

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=pk.category, values=pk.value, name=selectedCountry),
              1, 1)
fig.add_trace(go.Pie(labels=rest.category, values=rest.value, name="Rest of the World"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name+value")

fig.update_layout(
    title_text="Publicly sharing of data analysis or machine learning applications",
    annotations=[dict(text=selectedCountry, x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Rest', x=0.81, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [None]:
rest = getMultipleChoice(restOfTheWorld[1:],40,11)
pk = getMultipleChoice(country,40,11)

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=pk.category, values=pk.value, name=selectedCountry),
              1, 1)
fig.add_trace(go.Pie(labels=rest.category, values=rest.value, name="Rest of the World"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name+value")

fig.update_layout(height=800, width=1000,
    title_text="Online learning courses",
    annotations=[dict(text=selectedCountry, x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Rest', x=0.81, y=0.5, font_size=20, showarrow=False)],
legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))
fig.show()

In [None]:
pk = country.Q41.value_counts(normalize=True)
rest = restOfTheWorld[1:].Q41.value_counts(normalize=True)

fig = go.Figure(data=[
    go.Bar(name=selectedCountry, y=pk.index, x=pk.values, orientation='h'),
    go.Bar(name='Rest of the world', y=rest.index, x=rest.values, orientation='h')])
fig.update_layout(barmode='group',title = "Tools at work or school")
fig.show()

In [None]:
rest = getMultipleChoice(restOfTheWorld[1:],42,11)
pk = getMultipleChoice(country,42,11)

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=pk.category, values=pk.value, name=selectedCountry),
              1, 1)
fig.add_trace(go.Pie(labels=rest.category, values=rest.value, name="Rest of the World"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name+value")

fig.update_layout(height=800, width=1000,
    title_text="Media Sources for DS topics",
    annotations=[dict(text=selectedCountry, x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Rest', x=0.81, y=0.5, font_size=20, showarrow=False)],
legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))
fig.show()

In [None]:
#df.iloc[:,265:].head(2)
#df.iloc[0,190]
#df["Q41"][0]