In [None]:
import numpy as np 
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

plt.style.use('fivethirtyeight')
import plotly.graph_objects as go

# **2020 Kaggle ML & DS Survey**

# Introduction

**The challenge objective**: tell a data story about a subset of the data science community represented in this survey, through a combination of both narrative text and data exploration.
I used 3 datasets in 3 years (2017,2018,2019) for comparison. You see that the number of participants of Kaggle is growing (see below).

In [None]:
survey = pd.read_csv('../input/kaggle-survey-2020/kaggle_survey_2020_responses.csv', low_memory=False)
survey19 = pd.read_csv('../input/kaggle-2019-survey-copy/multiple_choice_responses.csv', low_memory=False)
survey18 = pd.read_csv('../input/kaggle-survey-2018/multipleChoiceResponses.csv', low_memory=False)
survey17 = pd.read_csv('../input/kaggle-survey-2017/multipleChoiceResponses.csv', encoding='ISO-8859-1', low_memory=False)

survey['Q3'].value_counts()
survey.replace({'Q3':'United States of America'},'USA', inplace=True)
survey.replace({'Q3':'United Kingdom of Great Britain and Northern Ireland'}, 'UK', inplace=True)
survey.replace({'Q3':'Iran, Islamic Republic of...'}, 'Iran', inplace=True)

survey17.replace({'Country':'United States'}, 'USA', inplace=True)
survey17.replace({'Country':'United Kingdom'}, 'UK', inplace=True)
survey17.replace({'Country':'People \'s Republic of China'}, 'China', inplace=True)
survey17.replace({'Country':'Republic of China'}, 'China', inplace=True)

survey18.replace({'Q3':'United States of America'}, 'USA', inplace=True)
survey18.replace({'Q3':'United Kingdom of Great Britain and Northern Ireland'}, 'UK', inplace=True)
survey18.replace({'Q3':'I do not wish to disclose my location'}, 'Undefined', inplace=True)
survey18.replace({'Q3':'Iran, Islamic Republic of...'}, 'Iran', inplace=True)

survey19.replace({'Q3':'United Kingdom of Great Britain and Northern Ireland'},'UK', inplace=True)
survey19.replace({'Q3':'United States of America'},'USA', inplace=True)
survey19.replace({'Q3':'Iran, Islamic Republic of...'},'Iran', inplace=True)


In [None]:
numbers18 = survey18.iloc[1:,1].count()
numbers17 = survey17.iloc[1:,1].count()
numbers19 = survey19.iloc[1:,1].count()
numbers20 = survey.iloc[1:,1].count()

#Country
country20_top = survey['Q3'].iloc[1:].value_counts()[:10]
country18_top = survey18['Q3'].iloc[1:].value_counts()[:10]
country17_top = survey17['Country'].iloc[1:].value_counts()[:10]
country19_top = survey19['Q3'].iloc[1:].value_counts()[:10]

country20_index = survey['Q3'].iloc[1:].value_counts()[:10].index

#Country
country20_top = survey['Q3'].iloc[1:].value_counts(ascending=False)[:10]
country18_top = survey18['Q3'].iloc[1:].value_counts(ascending=False)[:10]
country17_top = survey17['Country'].iloc[1:].value_counts(ascending=False)[:10]
country19_top = survey19['Q3'].iloc[1:].value_counts(ascending=False)[:10]


In [None]:
gender = survey.iloc[1:].groupby(['Q3','Q2'])['Q2'].count().unstack()
gender.fillna(0, inplace=True)
gender['Count'] = gender['Man'] + gender['Woman'] + gender['Nonbinary'] + gender['Prefer not to say']+gender['Prefer to self-describe']
gender.reset_index(inplace=True)
gender.sort_values(by=['Count'], ascending=False, inplace=True)
gender20 = gender[:10]

In [None]:
gender = survey19.iloc[1:].groupby(['Q3','Q2'])['Q2'].count().unstack()
gender.fillna(0, inplace=True)
gender['Count'] = gender['Male'] + gender['Female'] +  gender['Prefer not to say']+gender['Prefer to self-describe']
gender.reset_index(inplace=True)
gender.sort_values(by=['Count'], ascending=False, inplace=True)
gender19 = gender[:10]

In [None]:
gender = survey17.iloc[1:].groupby(['Country','GenderSelect'])['GenderSelect'].count().unstack()
gender.fillna(0, inplace=True)
gender['Count'] = gender['Male'] + gender['Female'] + gender['A different identity'] + gender['Non-binary, genderqueer, or gender non-conforming']
gender.reset_index(inplace=True)
gender.sort_values(by=['Count'], ascending=False, inplace=True)
gender17 = gender[:10]

In [None]:
gender = survey18.iloc[1:].groupby(['Q3','Q1'])['Q1'].count().unstack()
gender.fillna(0, inplace=True)
gender['Count'] = gender['Male'] + gender['Female'] +  gender['Prefer not to say']+gender['Prefer to self-describe']
gender.reset_index(inplace=True)
gender.sort_values(by=['Count'], ascending=False, inplace=True)
gender18 = gender[:10]

In [None]:
fig, ax = plt.subplots(1,4,figsize=(24,10))

ax[0].barh(country20_top.index,country20_top.values)
ax[0].barh(country20_top.index, gender20['Woman'])
ax[0].set_title('2020')
for col,row in country20_top.items():
    ax[0].text(row+100,col,row)

ax[1].barh(country19_top.index,country19_top.values)
ax[1].barh(country19_top.index, gender19['Female'])
ax[1].set_title('2019')
for col,row in country19_top.items():
    ax[1].text(row+100,col,row)

    
#plt.subplot(1,3,2)
ax[2].barh(country18_top.index,country18_top.values)
ax[2].barh(country18_top.index, gender18['Female'])
ax[2].set_title('2018')
for col,row in country18_top.items():
    ax[2].text(row+100,col,row)
    
    
#plt.subplot(1,3,3)
ax[3].barh(country17_top.index,country17_top.values)
ax[3].barh(country17_top.index, gender17['Female'])
ax[3].set_title('2017')
for col,row in country17_top.items():
    ax[3].text(row+100,col,row)
    
    


The number of women is highlighted in red. 

# The number of participants

 The number of participants grows every year. It's about 20 000 in 2020. The top  countries remains approximately the same: United States of America, India, Brazil, Japan, Russia,Germany, Canada, China and etc (see above on the chart). The category 'Other' ranks 3rd in trms of the number of participants. Probably, some of the people decided not to indicate their place of residence.
 The number of participants from China descreased. I think, it's a political aspect.

The most interesting insight is participants from India. Now they are leaders in the number of participants. This indicates a growing interest in data science. Recently, India has become an outsourcing destination for many multinational corporations. India has also developed into a significant exporter of software and financial and technology services ([Wiki](http://en.wikipedia.org/wiki/India)). The charts below show this growth.

In [None]:
country20_top = country20_top.rename('2020')
country19_top = country19_top.rename('2019')
country18_top = country18_top.rename('2018')
country17_top = country17_top.rename('2017')
all_countries = pd.concat([country20_top,country19_top, country18_top, country17_top],axis=1)

In [None]:
all_countries = all_countries.fillna(0)

In [None]:
sns.set(rc={'figure.figsize':(18,10)})
sns.lineplot(data=all_countries)
plt.title('Distribution of participants by country')


In [None]:
sns.set(rc={'figure.figsize':(18,10)})
sns.lineplot(x=all_countries.columns, y=all_countries.loc['India'])
plt.title('Increase  the number of participants from India')

In [None]:
survey_india = survey[survey['Q3']=='India']

country20_part = survey['Q3'].iloc[1:].value_counts(normalize=True)[:10]*100
country19_part = survey19['Q3'].iloc[1:].value_counts(normalize=True)[:10]*100
country17_part = survey17['Country'].iloc[1:].value_counts(normalize=True)[:10]*100
country18_part = survey18['Q3'].iloc[1:].value_counts(normalize=True)[:10]*100

In [None]:
fig, ax = plt.subplots(1,4,figsize=(24,10))

ax[0].barh(country20_part.index,country20_part.values)
ax[0].set_title('2020')
for col,row in country20_part.items():
    ax[0].text(row,col,'{:.1f}%'.format(row))
    
ax[1].barh(country19_part.index,country19_part.values)
ax[1].set_title('2019')
for col,row in country19_part.items():
    ax[1].text(row,col,'{:.1f}%'.format(row)) 
    
#plt.subplot(1,3,2)
ax[2].barh(country18_part.index,country18_part.values)
ax[2].set_title('2018')
for col,row in country18_part.items():
    ax[2].text(row,col,'{:.1f}%'.format(row))
    
    
#plt.subplot(1,3,3)
ax[3].barh(country17_part.index,country17_part.values)
ax[3].set_title('2017')
for col,row in country17_part.items():
    ax[3].text(row,col,'{:.1f}%'.format(row))

In [None]:
#Gender
gender20 = survey['Q2'].iloc[1:].value_counts(normalize=True)*100
gender19 = survey19['Q2'].iloc[1:].value_counts(normalize=True)*100
gender18 = survey18['Q1'].iloc[1:].value_counts(normalize=True)*100
gender17 = survey17['GenderSelect'].iloc[1:].value_counts(normalize=True)*100

gender20.index.values[0]='Male'
gender20.index.values[1]='Female'


In [None]:
fig, ax = plt.subplots(1,4,figsize=(24,10))
plt.title('Gender Distribution')
ax[0].barh(gender20.index,gender20.values)
ax[0].set_title('2020')
for col,row in gender20.items():
    ax[0].text(row+2,col,'{:.1f}%'.format(row))
    
ax[1].barh(gender19.index,gender19.values)
ax[1].set_title('2019')
for col,row in gender19.items():
    ax[1].text(row+2,col,'{:.1f}%'.format(row))
    
ax[2].barh(gender18.index,gender18.values)
ax[2].set_title('2018')
for col,row in gender18.items():
    ax[2].text(row+2,col,'{:.1f}%'.format(row))
    
ax[3].barh(gender17.index,gender17.values)
ax[3].set_title('2017')
for col,row in gender17.items():
    ax[3].text(row+2,col,'{:.1f}%'.format(row))

In [None]:
gender = survey.iloc[1:].groupby(['Q3','Q2'])['Q2'].count().unstack()
gender.fillna(0, inplace=True)
gender['Count'] = gender['Man'] + gender['Woman'] + gender['Nonbinary'] + gender['Prefer not to say']+gender['Prefer to self-describe']
gender.reset_index(inplace=True)
gender.sort_values(by=['Count'], ascending=False, inplace=True)
gender20 = gender[:10].rename(columns={'Q3':'Country'})

In [None]:
gender20[['Country','Man', 'Woman','Nonbinary','Prefer not to say','Prefer to self-describe']]

The number of women is growing, especially in India.

In [None]:
sns.barplot(y=gender20['Country'],x=gender20['Man'])
sns.barplot(y=gender20['Country'],x=gender20['Woman'], color='pink')
plt.title('Gender distribution')

# **Conclusion**

Right here I will write my findings. In 2020 typical participant of Kaggle is student from India. He has little programming experience (about 2 years), knows Python. The one is getting a higher education. This person uses popular IDE's and hosted notebook products. He uses personal computer/laptop and GPU's in his work. The one know the data visualization libraries (Matplotlib, Seaborn, Plotly). He has experience of using machine learning from 1 to 2 years. The one uses in his work Scikit-learn, TensorFlow, Keras, PyTorch. He works for a small company that uses machine learning. The one spends from 100 to 1000 dollars on machine learning. This person is studing on Coursera, Kaggle Learn Courses, Udemy. He wants to use automated machine learning tools, most of big data products. 

In [None]:
current_role = survey_india.groupby('Q5')['Q2'].count().sort_values(ascending=False)

Futher graphs and some comments follow.

# Participants from India

Let's take a closer look at participants from India. India (Hindi: Bhārat), officially the Republic of India, is a country in South Asia. It is the second-most populous country, the seventh-largest country by land area, and the most populous democracy in the world ([Wiki](http://en.wikipedia.org/wiki/India)).

## The current role
Most of participants are students. The [Kaggle](https://www.kaggle.com/) is accessible and free platform for getting experience, training and etc. This is important for people whoa are learning to have a 'living' experience. 

In [None]:
#sns.set(rc={'figure.figsize':(20,6), 'font-size': 12.0})
sns.barplot(y=current_role.index,  x=current_role.values)
plt.title('Distribution of current role')


## Programming experience

We see, most of them have little programming experience about 2 years. This correlates with the fact that most of them are students. About 360 people have never written code. Some of them about 43 are Machine Learning Engineers, Software Engineers, Data Scientists. This is probably a mistake.

In [None]:
survey_india[(survey_india['Q6']=='I have never written code')&(survey_india['Q5']=='Machine Learning Engineer')]['Q1'].count()

In [None]:
survey_india[(survey_india['Q6']=='I have never written code')&(survey_india['Q5']=='Software Engineer')]['Q1'].count()

In [None]:
survey_india[(survey_india['Q6']=='I have never written code')&(survey_india['Q5']=='Data Scientist')]['Q1'].count()

In [None]:
survey_india[(survey_india['Q6']=='I have never written code')]['Q1'].count()



In [None]:
programming_exp = survey_india['Q6'].value_counts()
sns.barplot(y=programming_exp.index.values, x=programming_exp.values)
plt.title('Programming experience')

## The highest level of formal education

Because most of paricapants are students, obviously their goal is getting higher education (Bachelor's degree, Master's degree).

In [None]:
#What is the highest level of formal education that you have attained or plan to attain within the next 2 years
level_education = survey_india['Q4'].value_counts()
sns.barplot(x=level_education.values, y=level_education.index.values)
plt.title('The highest level of formal education that participants have attained or plan to attain within the next 2 years')

## Programming language

The stack of languages that are currently popular is pesented here. The leader among them is Python. As of December 2020 Python ranked third in TIOBE’s index of most popular programming languages, behind C and Java ([Wiki](https://en.wikipedia.org/wiki/Python_(programming_language)))

In [None]:
have_written_code = survey_india[(survey_india['Q6']!='I have never written code')]

In [None]:
have_written_code.loc[:,'Q7_Part_1':'Q7_OTHER'].count()
programming_languages =['Python', 'R', 'SQL', 'C', 'C++', 'Java', 'Javascript', 'Julia', 'Swift', 'Bash', 'MATLAB', 'None', 'Other']
count_languages = have_written_code.loc[:,'Q7_Part_1':'Q7_OTHER'].count()
sns.barplot(x=programming_languages, y= count_languages)
plt.title('Distribution of programming languages')

## Recommended programming language

Python is leader here. If the majority knows this language, one will recommend it.

In [None]:
#What programming language would you recommend an aspiring data scientist to learn first?
recommend_language = have_written_code['Q8'].value_counts()
sns.barplot(y=recommend_language.index.values, x=recommend_language.values)
plt.title('What programming language would you recommend an aspiring data scientist to learn first?')

## The integrated development enviroments

The stack of popular IDE's that are currently popular is pesented here:Visual Studio Code (VSCode), PyCharm, Spyder, Notepad++ and etc. And of course JupyterLab is a leader among them. Project Jupyter's operating philosophy is to support interactive data science and scientific computing across all programming languages via the development of open-source software ([Wiki](https://en.wikipedia.org/wiki/Project_Jupyter))  

In [None]:
# Which of the following integrated development environments (IDE's) do you use on a regular basis?
ide = have_written_code.loc[:,'Q9_Part_1':'Q9_Part_11'].rename(columns={'Q9_Part_1':'JupyterLab',\
                                                        'Q9_Part_2':'RStudio',\
                                                        'Q9_Part_3':'Visual Studio',\
                                                        'Q9_Part_4':'Visual Studio Code (VSCode)',\
                                                        'Q9_Part_5':'PyCharm',\
                                                        'Q9_Part_6':'Spyder',\
                                                        'Q9_Part_7':'Notepad++',\
                                                        'Q9_Part_8':'Sublime Text',\
                                                        'Q9_Part_9':'Vim, Emacs, or similar',\
                                                        'Q9_Part_10':'MATLAB',\
                                                        'Q9_Part_11':'None',\
                                                        'Q9_OTHER':'Other' 
                                                                 }).count()
sns.barplot(y=ide.index.values,x=ide.values)
plt.title('The integrated development environments')

## The hosted notebook

Kaggle Notebooks, Colab Notebooks, JupyterHub are the most widely accessible, popular and free. About 1000 participants don't have hosted notebook.

In [None]:
#Which of the following hosted notebook products do you use on a regular basis?
notebook = have_written_code.loc[:,'Q10_Part_1':'Q10_OTHER'].rename(columns={'Q10_Part_1':'Kaggle Notebooks',\
                                                                            'Q10_Part_2':'Colab Notebooks',\
                                                                            'Q10_Part_3':'Azure Notebooks',\
                                                                            'Q10_Part_4':'Paperspace / Gradient',\
                                                                            'Q10_Part_5':'Binder / JupyterHub',\
                                                                            'Q10_Part_6':'Code Ocean',\
                                                                            'Q10_Part_7':'IBM Watson Studio',\
                                                                            'Q10_Part_8':'Amazon Sagemaker Studio',\
                                                                            'Q10_Part_9':'Amazon EMR Notebooks',\
                                                                            'Q10_Part_10':'Google Cloud AI Platform Notebooks',\
                                                                            'Q10_Part_11':'Google Cloud Datalab Notebooks',\
                                                                            'Q10_Part_12':'Databricks Collaborative Notebooks',\
                                                                            'Q10_Part_13':'None',\
                                                                            'Q10_OTHER':'Other'}).count()
sns.barplot(y=notebook.index.values,x=notebook.values)
plt.title('The hosted notebook product')

Most of them use a presonal computer or laptop. 

In [None]:
#What type of computing platform do you use most often for your data science projects?
computing_platform = have_written_code['Q11'].value_counts()
sns.barplot(y=computing_platform.index.values,x=computing_platform.values)
plt.title('Type of computing platform')

## The type of specialized hardware

Most of them use GPU's in their work. But many of people don't use the specialized hardware.

In [None]:
#Which types of specialized hardware do you use on a regular basis?
specialized_hardware = survey_india[['Q12_Part_1','Q12_Part_2', 'Q12_Part_3', 'Q12_OTHER']].rename(columns={'Q12_Part_1':'GPUs',\
                                                                                    'Q12_Part_2':'TPUs',\
                                                                                    'Q12_Part_3':'None',\
                                                                                    'Q12_OTHER':'Other'}).count()
sns.barplot(y=specialized_hardware.index.values,x=specialized_hardware.values)
plt.title('The types of specialized hardware are used by participants')

In [None]:
#Approximately how many times have you used a TPU (tensor processing unit)?
used_tpu = survey_india['Q13'].value_counts()
sns.barplot(y=used_tpu.index.values, x=used_tpu.values)
plt.title('TPU is used')

In [None]:
#What data visualization libraries or tools do you use on a regular basis?
visualization_libraries = survey_india[['Q14_Part_1','Q14_Part_2', 'Q14_Part_3', 'Q14_Part_4', 'Q14_Part_5',\
              'Q14_Part_6', 'Q14_Part_7', 'Q14_Part_8', 'Q14_Part_9',\
              'Q14_Part_10', 'Q14_Part_11', 'Q14_OTHER']].rename(columns={'Q14_Part_1':'Matplotlib',\
                                                                         'Q14_Part_2':'Seaborn',\
                                                                         'Q14_Part_3':'Plotly / Plotly Express',\
                                                                         'Q14_Part_4':'Ggplot / ggplot2',\
                                                                         'Q14_Part_5':'Shiny',\
                                                                         'Q14_Part_6':'D3 js',\
                                                                         'Q14_Part_7':'Altair',\
                                                                         'Q14_Part_8':'Bokeh',\
                                                                         'Q14_Part_9':'Geoplotlib',\
                                                                         'Q14_Part_10':'Leaflet / Folium',\
                                                                         'Q14_Part_11':'None',\
                                                                         'Q14_OTHER':'Other'}).count()
sns.barplot(y=visualization_libraries.index.values, x=visualization_libraries.values)
plt.title('The data visualization libraries')

In [None]:
#For how many years have you used machine learning methods?
years_used_ml = survey_india['Q15'].value_counts()
sns.barplot(y=years_used_ml.index.values, x=years_used_ml.values)
plt.title('ML methods are used (years)')

In [None]:
#Which of the following machine learning frameworks do you use on a regular basis?
ml_frameworks = survey_india[['Q16_Part_1','Q16_Part_2', 'Q16_Part_3',\
              'Q16_Part_4', 'Q16_Part_5', 'Q16_Part_6',\
              'Q16_Part_7', 'Q16_Part_8', 'Q16_Part_9',\
              'Q16_Part_10', 'Q16_Part_11', 'Q16_Part_12',\
              'Q16_Part_13', 'Q16_Part_14', 'Q16_Part_15',\
              'Q16_OTHER']].rename(columns={'Q16_Part_1':'Scikit-learn',\
                                           'Q16_Part_2':'TensorFlow',\
                                           'Q16_Part_3':'Keras',\
                                           'Q16_Part_4':'PyTorch',\
                                           'Q16_Part_5':'Fast.ai',\
                                           'Q16_Part_6':'MXNet',\
                                           'Q16_Part_7':'Xgboost',\
                                           'Q16_Part_8':'LightGBM',\
                                           'Q16_Part_9':'CatBoos',\
                                           'Q16_Part_10':'Prophet',\
                                           'Q16_Part_11':'H2O 3',\
                                           'Q16_Part_12':'Caret',\
                                           'Q16_Part_13':'Tidymodels',\
                                           'Q16_Part_14':'JAX',\
                                           'Q16_Part_15':'None',\
                                           'Q16_OTHER':'Other'}).count()
sns.barplot(y=ml_frameworks.index.values, x = ml_frameworks.values)
plt.title('Machine learning frameworks')


In [None]:
#Which of the following ML algorithms do you use on a regular basis
ml_algorithms = survey_india[['Q17_Part_1','Q17_Part_2', 'Q17_Part_3', \
              'Q17_Part_4', 'Q17_Part_5','Q17_Part_6', \
              'Q17_Part_7', 'Q17_Part_8', 'Q17_Part_9',\
              'Q17_Part_10', 'Q17_Part_11', 'Q17_OTHER']].rename(columns={'Q17_Part_1':'Linear or Logistic Regression',\
                                                                         'Q17_Part_2':'Decision Trees or Random Forests',\
                                                                         'Q17_Part_3':'Gradient Boosting Machines (xgboost, lightgbm, etc)',\
                                                                         'Q17_Part_4':'Bayesian Approaches',\
                                                                         'Q17_Part_5':'Evolutionary Approaches',\
                                                                         'Q17_Part_6':'Dense Neural Networks (MLPs, etc)',\
                                                                         'Q17_Part_7':'Convolutional Neural Networks',\
                                                                         'Q17_Part_8':'Generative Adversarial Networks',\
                                                                         'Q17_Part_9':'Recurrent Neural Networks',\
                                                                         'Q17_Part_10':'Transformer Networks (BERT, gpt-3, etc)',\
                                                                         'Q17_Part_11':'None',\
                                                                         'Q17_OTHER':'Other'}).count()
sns.barplot(y=ml_algorithms.index.values, x=ml_algorithms.values)
plt.title('ML algorithms used on a regular basis')

In [None]:
#Which categories of computer vision methods do you use on a regular basis?
cv_methods = survey_india[['Q18_Part_1','Q18_Part_2', 'Q18_Part_3',\
              'Q18_Part_4', 'Q18_Part_5','Q18_Part_6', \
              'Q18_OTHER']].rename(columns={'Q18_Part_1':'General purpose image/video tools (PIL, cv2, skimage, etc)',\
                                           'Q18_Part_2':'Image segmentation methods (U-Net, Mask R-CNN, etc',\
                                           'Q18_Part_3':'Object detection methods (YOLOv3, RetinaNet, etc)',\
                                           'Q18_Part_4':'Image classification and other general purpose networks (VGG, Inception, ResNet,ResNeXt, NASNet, EfficientNet, etc)',\
                                           'Q18_Part_5':'Generative Networks (GAN, VAE, etc)',\
                                           'Q18_Part_6':'None',\
                                           'Q18_OTHER':'Other'}).count()
sns.barplot(y=cv_methods.index.values, x=cv_methods.values)
plt.title('Computer vision methods used on a regular basis')


In [None]:
#Which of the following natural language processing (NLP) methods do you use on a regular basis?
nlp_methods = survey_india[['Q19_Part_1', 'Q19_Part_2',\
              'Q19_Part_3', 'Q19_Part_4', \
              'Q19_Part_5', 'Q19_OTHER']].rename(columns={'Q19_Part_1':'Word embeddings/vectors (GLoVe, fastText, word2vec)',\
                                                         'Q19_Part_2':'Encoder-decoder models (seq2seq, vanilla transformers)',\
                                                         'Q19_Part_3':'Contextualized embeddings (ELMo, CoVe)',\
                                                         'Q19_Part_4':'Transformer language models (GPT-3, BERT, XLnet, etc)',\
                                                         'Q19_Part_5':'None',\
                                                         'Q19_OTHER':'Other'}).count()
sns.barplot(y=nlp_methods.index.values, x=nlp_methods.values)
plt.title('The natural language processing are used on a regular basis')

In [None]:
#What is the size of the company where you are employed?
size_company = survey_india['Q20'].value_counts()
sns.barplot(y=size_company.index.values, x=size_company.values)
plt.title('The size of the company' )

In [None]:
#Approximately how many individuals are responsible for data science workloads at your place of business?
responsible_ds = survey_india['Q21'].value_counts()
sns.barplot(y=responsible_ds.values, x=responsible_ds.index.values)
plt.title('The number of people responsible for data science workloads')

In [None]:
#Does your current employer incorporate machine learning methods into their business?
incorporate_ml = survey_india['Q22'].value_counts()
sns.barplot(y=incorporate_ml.index.values, x=incorporate_ml.values)
plt.title('Incorporate machine learning methods into business')

In [None]:
#Select any activities that make up an important part of your role at work: 
any_activities = survey_india[['Q23_Part_1', 'Q23_Part_2', 'Q23_Part_3',\
              'Q23_Part_4', 'Q23_Part_5', 'Q23_Part_6', \
              'Q23_Part_7', 'Q23_OTHER']].rename(columns={'Q23_Part_1':'Analyze and understand data to influence product or business decisions',\
                                                         'Q23_Part_2':'Build and/or run the data infrastructure that my business uses for storing, analyzing, and operationalizing data',\
                                                         'Q23_Part_3':'Build prototypes to explore applying machine learning to new areas',\
                                                         'Q23_Part_4':'Build and/or run a machine learning service that operationally improves my product or workflows',\
                                                         'Q23_Part_5':'Experimentation and iteration to improve existing ML models',\
                                                         'Q23_Part_6':'Do research that advances the state of the art of machine learning',\
                                                         'Q23_Part_7':'None of these activities are an important part of my role at work',\
                                                         'Q23_OTHER':'Other'}).count()
sns.barplot(y=any_activities.index.values, x = any_activities.values)
plt.title('The activities that make up an important part of role at work')

In [None]:
#What is your current yearly compensation?
yearly_compensation = survey_india['Q24'].value_counts()
sns.barplot(y=yearly_compensation.index.values, x = yearly_compensation.values)
plt.title('The current yearly compensation')

In [None]:
#Approximately how much money have you (or your team) spent on machine learning and/or cloud computing services at home (or at work) in the past 5 years?
team_spent = survey_india['Q25'].value_counts()
sns.barplot(y=team_spent.index.values, x=team_spent.values)
plt.title('The amount of money are spent on machine learning')

In [None]:
#Which of the following cloud computing platforms do you use on a regular basis?
cloud_computing_platforms = survey_india.loc[:,'Q26_A_Part_1':'Q26_A_OTHER'].rename(columns={'Q26_A_Part_1':'Amazon Web Services (AWS)',\
                                                                'Q26_A_Part_2':'Microsoft Azure',\
                                                                'Q26_A_Part_3':'Google Cloud Platform (GCP)',\
                                                                'Q26_A_Part_4':'IBM Cloud / Red Hat',\
                                                                'Q26_A_Part_5':'Oracle Cloud',\
                                                                'Q26_A_Part_6':'SAP Cloud',\
                                                                'Q26_A_Part_7':'Salesforce Cloud',\
                                                                'Q26_A_Part_8':'VMware Cloud',\
                                                                'Q26_A_Part_9':'Alibaba Cloud',\
                                                                'Q26_A_Part_10':'Tencent Cloud',\
                                                                'Q26_A_Part_11':'None',\
                                                                'Q26_A_OTHER':'Other'}).count()
sns.barplot(y=cloud_computing_platforms.index.values, x = cloud_computing_platforms.values)
plt.title('The cloud platforms are used on a regular basis')

In [None]:
#Do you use any of the following cloud computing products on a regular basis?
cloud_computing_products=survey_india.loc[:,'Q27_A_Part_1':'Q27_A_OTHER'].rename(columns = {'Q27_A_Part_1':'Amazon EC2',\
                                                                  'Q27_A_Part_2':'AWS Lambda',\
                                                                  'Q27_A_Part_3':'Amazon Elastic Container Service',\
                                                                  'Q27_A_Part_4':'Azure Cloud Services',\
                                                                  'Q27_A_Part_5':'Microsoft Azure Container Instances',\
                                                                  'Q27_A_Part_6':'Azure Functions',\
                                                                  'Q27_A_Part_7':'Google Cloud Compute Engine',\
                                                                  'Q27_A_Part_8':'Google Cloud Functions',\
                                                                  'Q27_A_Part_9':'Google Cloud Run',\
                                                                  'Q27_A_Part_10':'Google Cloud App Engine',\
                                                                  'Q27_A_Part_11':'No/None',\
                                                                  'Q27_A_OTHER':'Other'}).count()
sns.barplot(y=cloud_computing_products.index.values, x= cloud_computing_products.values)
plt.title('The cloud computing products are used on a regular basis')

In [None]:
# Do you use any of the following machine learning products on a regular basis?
ml_products = survey_india.loc[:,'Q28_A_Part_1':'Q28_A_OTHER'].rename(columns={'Q28_A_Part_1':'Amazon SageMaker',\
                                                                'Q28_A_Part_2':'Amazon Forecast',\
                                                                'Q28_A_Part_3':'Amazon Rekognition',\
                                                                'Q28_A_Part_4':'Azure Machine Learning Studio',\
                                                                'Q28_A_Part_5':'Azure Cognitive Services',\
                                                                'Q28_A_Part_6':'Google Cloud AI Platform / Google Cloud ML Engine',\
                                                                'Q28_A_Part_7':'Google Cloud Video AI',\
                                                                'Q28_A_Part_8':'Google Cloud Natural Language',\
                                                                'Q28_A_Part_9':'Google Cloud Vision AI',\
                                                                'Q28_A_Part_10':'None'}).count()
sns.barplot(y=ml_products.index.values, x=ml_products.values)
plt.title('The machne learning products are used on a regular basis')

In [None]:
#Which of the following big data products (relational databases, data warehouses, data lakes, or similar) do you use on a regular basis?
db_products = survey_india.loc[:,'Q29_A_Part_1':'Q29_A_OTHER'].rename(columns={'Q29_A_Part_1':'MySQL',\
                                                                'Q29_A_Part_2':'PostgreSQL',\
                                                                'Q29_A_Part_3':'SQLite',\
                                                                'Q29_A_Part_4':'Oracle Database',\
                                                                'Q29_A_Part_5':'MongoDB',\
                                                                'Q29_A_Part_6':'Snowflake',\
                                                                'Q29_A_Part_7':'IBM Db2',\
                                                                'Q29_A_Part_8':'Microsoft SQL Server',\
                                                                'Q29_A_Part_9':'Microsoft Access',\
                                                                'Q29_A_Part_10':'Microsoft Azure Data Lake Storage',\
                                                                'Q29_A_Part_11':'Amazon Redshift',\
                                                                'Q29_A_Part_12':'Amazon Athena',\
                                                                'Q29_A_Part_13':'Amazon DynamoDB',\
                                                                'Q29_A_Part_14':'Google Cloud BigQuery',\
                                                                'Q29_A_Part_15':'Google Cloud SQL',\
                                                                'Q29_A_Part_16':'Google Cloud Firestore',\
                                                                'Q29_A_Part_17':'None',\
                                                                'Q29_A_OTHER':'Other'}).count()
sns.barplot(y=db_products.index.values, x=db_products.values)
plt.title('The big data products are used on a regular basis')

In [None]:
#Which of the following big data products (relational database, data warehouse, data lake, or similar)do you use most often?
use_most_often_db = survey_india['Q30'].value_counts()
sns.barplot(y=use_most_often_db.index.values, x=use_most_often_db.values)
plt.title('The big data products are used most often')

In [None]:
#Which of the following business intelligence tools do you use on a regular basis?
bi_tools = survey_india.loc[:,'Q31_A_Part_1':'Q31_A_OTHER'].rename(columns={'Q31_A_Part_1':'Amazon QuickSight',\
                                                                'Q31_A_Part_2':'Microsoft Power BI',\
                                                                'Q31_A_Part_3':'Google Data Studio',\
                                                                'Q31_A_Part_4':'Looker',\
                                                                'Q31_A_Part_5':'Tableau',\
                                                                'Q31_A_Part_6':'Salesforce',\
                                                                'Q31_A_Part_7':'Einstein Analytics',\
                                                                'Q31_A_Part_8':'Qlik',\
                                                                'Q31_A_Part_9':'Domo',\
                                                                'Q31_A_Part_10':'TIBCO Spotfire',\
                                                                'Q31_A_Part_11':'Alteryx',\
                                                                'Q31_A_Part_12':'Sisense',\
                                                                'Q31_A_Part_13':'SAP Analytics Cloud',\
                                                                'Q31_A_Part_14':'None',\
                                                                'Q31_A_OTHER':'Other'}).count()
sns.barplot(y=bi_tools.index.values, x=bi_tools.values)
plt.title('The business intelligence tools are used on a regular basis')

In [None]:
#Which of the following business intelligence tools do you use most often?
bi_use_often = survey_india['Q32'].value_counts()
sns.barplot(y=bi_use_often.index.values, x=bi_use_often.values)
plt.title('The most business intellegence used tools ')

In [None]:
#Do you use any automated machine learning tools (or partial AutoML tools) on a regular basis?
automated_ml = survey_india.loc[:,'Q33_A_Part_1':'Q33_A_OTHER'].rename(columns={'Q33_A_Part_1':'Automated data augmentation (e.g. imgaug, albumentations)',\
                                                                'Q33_A_Part_2':'Automated feature engineering/selection (e.g. tpot, boruta_py',\
                                                                'Q33_A_Part_3':'Automated model selection (e.g. auto-sklearn, xcessiv)',\
                                                                'Q33_A_Part_4':'Automated model architecture searches (e.g. darts, enas)',\
                                                                'Q33_A_Part_5':'Automated hyperparameter tuning (e.g. hyperopt, ray.tune, Vizier)',\
                                                                'Q33_A_Part_6':'Automation of full ML pipelines (e.g. Google AutoML, H20 Driverless AI)',\
                                                                'Q33_A_Part_7':'None',\
                                                                'Q33_A_OTHER':'Other'}).count()
sns.barplot(y=automated_ml.index.values, x=automated_ml.values)
plt.title('The automated machine learning tools')

In [None]:
#Which of the following automated machine learning tools (or partial AutoML tools) do you use on a regular basis? 
auto_ml_tools = survey_india.loc[:,'Q34_A_Part_1':'Q34_A_OTHER'].rename(columns={'Q34_A_Part_1':'Google Cloud AutoML',\
                                                                'Q34_A_Part_2':'H20 Driverless AI',\
                                                                'Q34_A_Part_3':'Databricks AutoML',\
                                                                'Q34_A_Part_4':'DataRobot AutoML',\
                                                                'Q34_A_Part_5':'Tpot',\
                                                                'Q34_A_Part_6':'Auto-Keras',\
                                                                'Q34_A_Part_7':'Auto-Sklearn',\
                                                                'Q34_A_Part_8':'Auto_ml',\
                                                                'Q34_A_Part_9':'Xcessiv',\
                                                                'Q34_A_Part_10':'MLbox',\
                                                                'Q34_A_Part_11':'No/None',\
                                                                'Q34_A_OTHER':'Other'}).count()
sns.barplot(y=auto_ml_tools.index.values, x=auto_ml_tools.values)

In [None]:
#Do you use any tools to help manage machine learning experiments?
managed_tools = survey_india.loc[:,'Q35_A_Part_1':'Q35_A_OTHER'].rename(columns={'Q35_A_Part_1':'Neptune.ai',\
                                                                'Q35_A_Part_2':'Weights & Biases',\
                                                                'Q35_A_Part_3':'Comet.ml',\
                                                                'Q35_A_Part_4':'Sacred + Omniboard',\
                                                                'Q35_A_Part_5':'TensorBoard',\
                                                                'Q35_A_Part_6':'Guild.ai',\
                                                                'Q35_A_Part_7':'Polyaxon',\
                                                                'Q35_A_Part_8':'Trains',\
                                                                'Q35_A_Part_9':'Domino Model Monitor',\
                                                                'Q35_A_Part_10':'No/None',\
                                                                'Q35_A_OTHER':'Other'}).count()
sns.barplot(y = managed_tools.index.values, x = managed_tools.values )

In [None]:
#Where do you publicly share or deploy your data analysis or machine learning applications?
publicly_share = survey_india.loc[:,'Q36_Part_1':'Q36_OTHER'].rename(columns={'Q36_Part_1':'Plotly Dash',\
                                                            'Q36_Part_2':'Streamlit',\
                                                            'Q36_Part_3':'NBViewer',\
                                                            'Q36_Part_4':'GitHub',\
                                                            'Q36_Part_5':'Personal blog',\
                                                            'Q36_Part_6':'Kaggle',\
                                                            'Q36_Part_7':'Colab',\
                                                            'Q36_Part_8':'Shiny',\
                                                            'Q36_Part_9':'None / I do not share my work publicly',\
                                                            'Q36_OTHER':'Other'}).count()
sns.barplot(y=publicly_share.index.values, x = publicly_share.values)

In [None]:
#On which platforms have you begun or completed data science courses?
courses_ds = survey_india.loc[:,'Q37_Part_1':'Q37_OTHER'].rename(columns={'Q37_Part_1':'Coursera',\
                                                                         'Q37_Part_2':'edX',\
                                                                         'Q37_Part_3':'Kaggle Learn Course',\
                                                                         'Q37_Part_4':'DataCamp',\
                                                                         'Q37_Part_5':'Fast.ai',\
                                                                         'Q37_Part_6':'Udacity',\
                                                                         'Q37_Part_7':'Udemy',\
                                                                         'Q37_Part_8':'LinkedIn Learning',\
                                                                         'Q37_Part_9':'Cloud-certification programs (direct from AWS, Azure, GCP, or similar)',\
                                                                         'Q37_Part_10':'University Courses (resulting in a university degree)',\
                                                                         'Q37_Part_11':'None',\
                                                                         'Q37_OTHER':'Other'}).count()
sns.barplot(y=courses_ds.index.values, x=courses_ds.values)
plt.title('The data science courses')

In [None]:
#What is the primary tool that you use at work or school to analyze data?
primary_tools = survey_india['Q38'].value_counts()
sns.barplot(y=primary_tools.index.values, x = primary_tools.values)
plt.title('The primary tool to analyze data')

In [None]:
#Who/what are your favorite media sources that report on data science topics?
media_sources = survey_india.loc[:,'Q39_Part_1':'Q39_OTHER'].rename(columns={'Q39_Part_1':'Twitter (data science influencers)',\
                                                            'Q39_Part_2':'Email newsletters (Data Elixir, O\'Reilly Data & AI, etc)',\
                                                            'Q39_Part_3':'Reddit (r/machinelearning, etc)',\
                                                            'Q39_Part_4':'Kaggle (notebooks, forums, etc)',\
                                                            'Q39_Part_5':'Course Forums (forums.fast.ai, Coursera forums, etc)',\
                                                            'Q39_Part_6':'YouTube (Kaggle YouTube, Cloud AI Adventures, etc)',\
                                                            'Q39_Part_7':'Podcasts (Chai Time Data Science, O’Reilly Data Show, etc',\
                                                            'Q39_Part_8':'Blogs (Towards Data Science, Analytics Vidhya, etc)',\
                                                            'Q39_Part_9':'Journal Publications (peer-reviewed journals, conference proceedings, etc)',\
                                                            'Q39_Part_10':'Slack Communities (ods.ai, kagglenoobs, etc)',\
                                                            'Q39_Part_11':'None',\
                                                            'Q39_OTHER':'Other'}).count()
sns.barplot(y = media_sources.index.values, x = media_sources.values)
plt.title('The media resources')

In [None]:
#Which of the following cloud computing platforms do you hope to become more familiar with in the next 2 years?
future_platforms = survey_india.loc[:,'Q26_B_Part_1':'Q26_B_OTHER'].rename(columns={'Q26_B_Part_1':'Amazon Web Services (AWS)',\
                                                                'Q26_B_Part_2':'Microsoft Azure',\
                                                                'Q26_B_Part_3':'Google Cloud Platform (GCP)',\
                                                                'Q26_B_Part_4':'IBM Cloud / Red Hat',\
                                                                'Q26_B_Part_5':'Oracle Cloud',\
                                                                'Q26_B_Part_6':'SAP Cloud',\
                                                                'Q26_B_Part_7':'VMware Cloud',\
                                                                'Q26_B_Part_8':'Salesforce Cloud',\
                                                                'Q26_B_Part_9':'Alibaba Cloud',\
                                                                'Q26_B_Part_10':'Tencent Cloud',\
                                                                'Q26_B_Part_11':'None',\
                                                                'Q26_B_OTHER':'Other'}).count()
sns.barplot(y=future_platforms.index.values, x = future_platforms.values)

In [None]:
#In the next 2 years, do you hope to become more familiar with any of these specific cloud computing products?
future_platform = survey_india.loc[:,'Q27_B_Part_1':'Q27_B_OTHER'].rename(columns={'Q27_B_Part_1':'Amazon EC2',\
                                                                'Q27_B_Part_2':'AWS Lambda',\
                                                                'Q27_B_Part_3':'Amazon Elastic Container Service',\
                                                                'Q27_B_Part_4':'Azure Cloud Services',\
                                                                'Q27_B_Part_5':'Microsoft Azure Container Instances',\
                                                                'Q27_B_Part_6':'Azure Functions',\
                                                                'Q27_B_Part_7':'Google Cloud Compute Engine',\
                                                                'Q27_B_Part_8':'Google Cloud Functions',\
                                                                'Q27_B_Part_9':'Google Cloud Run',\
                                                                'Q27_B_Part_10':'Google Cloud App Engine',\
                                                                'Q27_B_Part_11':'None',\
                                                                'Q27_B_OTHER':'Other'}).count()
sns.barplot(y=future_platform.index.values, x =future_platform.values)

In [None]:
#In the next 2 years, do you hope to become more familiar with any of these specific cloud computing products?
future_cloud_platform = survey_india.loc[:,'Q27_B_Part_1':'Q27_B_OTHER'].rename(columns={'Q27_B_Part_1':'Amazon EC2',\
                                                                'Q27_B_Part_2':'AWS Lambda',\
                                                                'Q27_B_Part_3':'Amazon Elastic Container Service',\
                                                                'Q27_B_Part_4':'Azure Cloud Services',\
                                                                'Q27_B_Part_5':'Microsoft Azure Container Instances',\
                                                                'Q27_B_Part_6':'Azure Functions',\
                                                                'Q27_B_Part_7':'Google Cloud Compute Engine',\
                                                                'Q27_B_Part_8':'Google Cloud Functions',\
                                                                'Q27_B_Part_9':'Google Cloud Run',\
                                                                'Q27_B_Part_10':'Google Cloud App Engine',\
                                                                'Q27_B_Part_11':'None',\
                                                                'Q27_B_OTHER':'Other'}).count()
sns.barplot(y = future_cloud_platform.index.values, x = future_cloud_platform.values)

In [None]:
#In the next 2 years, do you hope to become more familiar with any of these specific machine learning products?
future_ml_products = survey_india.loc[:,'Q28_B_Part_1':'Q28_B_OTHER'].rename(columns = {'Q28_B_Part_1':'Amazon SageMaker',\
                                                                  'Q28_B_Part_2':'Amazon Forecast',\
                                                                  'Q28_B_Part_3':'Amazon Rekognition',\
                                                                  'Q28_B_Part_4':'Azure Machine Learning Studio',\
                                                                  'Q28_B_Part_5':'Azure Cognitive Services',\
                                                                  'Q28_B_Part_6':'Google Cloud AI Platform / Google Cloud ML Engine',\
                                                                  'Q28_B_Part_7':'Google Cloud Video AI',\
                                                                  'Q28_B_Part_8':'Google Cloud Natural Language',\
                                                                  'Q28_B_Part_9':'Google Cloud Vision AI',\
                                                                  'Q28_B_Part_10':'None',\
                                                                  'Q28_B_OTHER':'Other'}).count()
sns.barplot(y = future_ml_products.index.values, x =future_ml_products.values)

In [None]:
# Which of the following big data products (relational databases, data warehouses, data lakes, or similar) do you hope to become more familiar with in the next 2 years?
future_bd = survey_india.loc[:,'Q29_B_Part_1':'Q29_B_OTHER'].rename(columns={'Q29_B_Part_1':'MySQL',\
                                                                'Q29_B_Part_2':'PostgreSQL',\
                                                                'Q29_B_Part_3':'SQLite',\
                                                                'Q29_B_Part_4':'Oracle Database',\
                                                                'Q29_B_Part_5':'MongoDB',\
                                                                'Q29_B_Part_6':'Snowflake',\
                                                                'Q29_B_Part_7':'IBM Db2',\
                                                                'Q29_B_Part_8':'Microsoft SQL Server',\
                                                                'Q29_B_Part_9':'Microsoft Access',\
                                                                'Q29_B_Part_10':'Microsoft Azure Data Lake Storage',\
                                                                'Q29_B_Part_11':'Amazon Redshift',\
                                                                'Q29_B_Part_12':'Amazon Athena',\
                                                                'Q29_B_Part_13':'Amazon DynamoDB',\
                                                                'Q29_B_Part_14':'Google Cloud BigQuery',\
                                                                'Q29_B_Part_15':'Google Cloud SQL',\
                                                                'Q29_B_Part_16':'Google Cloud Firestore',\
                                                                'Q29_B_Part_17':'None',\
                                                                'Q29_B_OTHER':'Other'}).count()
sns.barplot(y = future_bd.index.values, x =future_bd.values)

In [None]:
#Which of the following business intelligence tools do you hope to become more familiar with in the next 2 years?
future_bi = survey_india.loc[:,'Q31_B_Part_1':'Q31_B_OTHER'].rename(columns={'Q31_B_Part_1':'Microsoft Power BI',\
                                                                'Q31_B_Part_2':'Amazon QuickSight',\
                                                                'Q31_B_Part_3':'Google Data Studio',\
                                                                'Q31_B_Part_4':'Looker',\
                                                                'Q31_B_Part_5':'Tableau',\
                                                                'Q31_B_Part_6':'Salesforce',\
                                                                'Q31_B_Part_7':'Einstein Analytics',\
                                                                'Q31_B_Part_8':'Qlik',\
                                                                'Q31_B_Part_9':'Domo',\
                                                                'Q31_B_Part_10':'TIBCO Spotfire',\
                                                                'Q31_B_Part_11':'Alteryx',\
                                                                'Q31_B_Part_12':'Sisense',\
                                                                'Q31_B_Part_13':'SAP Analytics Cloud',\
                                                                'Q31_B_Part_14':'None',\
                                                                'Q31_B_OTHER':'Other'}).count()
sns.barplot(y =future_bi.index.values, x= future_bi.values)

In [None]:
#Which categories of automated machine learning tools (or partial AutoML tools) do you hope to become more familiar with in the next 2 years?
future_mltools = survey_india.loc[:,'Q33_B_Part_1':'Q33_B_OTHER'].rename(columns = {'Q33_B_Part_1':'Automated data augmentation (e.g. imgaug, albumentations)',\
                                                                  'Q33_B_Part_2':'Automated feature engineering/selection (e.g. tpot, boruta_py)',\
                                                                  'Q33_B_Part_3':'Automated model selection (e.g. auto-sklearn, xcessiv)',\
                                                                  'Q33_B_Part_4':'Automated model architecture searches (e.g. darts, enas)',\
                                                                  'Q33_B_Part_5':'Automated hyperparameter tuning (e.g. hyperopt, ray.tune, Vizier)',\
                                                                  'Q33_B_Part_6':'Automation of full ML pipelines (e.g. Google Cloud AutoML, H20 Driverless AI)',\
                                                                  'Q33_B_Part_7':'None',\
                                                                  'Q33_B_OTHER':'Other'}).count()
sns.barplot(y=future_mltools.index.values, x =future_mltools.values)

In [None]:
#Which specific automated machine learning tools (or partial AutoML tools) do you hope to become more familiar with in the next 2 years?
future_auto_ml = survey_india.loc[:,'Q34_B_Part_1':'Q34_B_OTHER'].rename(columns={'Q34_B_Part_1':'Google Cloud AutoML',\
                                                                'Q34_B_Part_2':'H20 Driverless AI',\
                                                                'Q34_B_Part_3':'Databricks AutoML',
                                                                'Q34_B_Part_4':'DataRobot AutoML',\
                                                                'Q34_B_Part_5':'Tpot',\
                                                                'Q34_B_Part_6':'Auto-Keras',\
                                                                'Q34_B_Part_7':'Auto-Sklearn',\
                                                                'Q34_B_Part_8':'Auto_ml',\
                                                                'Q34_B_Part_9':'Xcessiv',\
                                                                'Q34_B_Part_10':'MLbox',\
                                                                'Q34_B_Part_11':'None',\
                                                                'Q34_B_OTHER':'Other'}).count()
sns.barplot(y = future_auto_ml.index.values, x =future_auto_ml.values)

In [None]:
#In the next 2 years, do you hope to become more familiar with any of these tools for managing ML experiments?
future_ml_experiments = survey_india.loc[:,'Q35_B_Part_1':'Q35_B_OTHER'].rename(columns={'Q35_B_Part_1':'Neptune.ai',\
                                                                'Q35_B_Part_2':'Weights & Biases',\
                                                                'Q35_B_Part_3':'Comet.ml',\
                                                                'Q35_B_Part_4':'Sacred + Omniboard',\
                                                                'Q35_B_Part_5':'TensorBoard',\
                                                                'Q35_B_Part_6':'Guild.ai',\
                                                                'Q35_B_Part_7':'Polyaxon',\
                                                                'Q35_B_Part_8':'Trains',\
                                                                'Q35_B_Part_9':'Domino Model Monitor',\
                                                                'Q35_B_Part_10':'None',\
                                                                'Q35_B_OTHER':'Other'}).count()
sns.barplot(y = future_ml_experiments.index.values, x =future_ml_experiments.values)