#### Import libs

In [None]:
# Suppress warnings 
import warnings

from IPython.display import HTML

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.offline as py
import plotly.express as px
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objects as go
from plotly import tools
init_notebook_mode(connected=True)

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

pd.set_option("display.max_rows",500)
pd.set_option("display.max_columns",200)

%matplotlib inline

In [None]:
mcr = pd.read_csv('/kaggle/input/kaggle-survey-2020/kaggle_survey_2020_responses.csv',low_memory=False, header=1)

In [None]:
mcr = mcr[mcr['In which country do you currently reside?'] =='Brazil']

In [None]:
def compute_percentage(df,col):
    return df[col].value_counts(normalize=True) * 100

def bi_variant_chart(col1,col2,x_title,y_title, mcr_brazil):
    index = mcr_brazil[col1].dropna().unique()
    vals = mcr_brazil[col2].unique()
    layout = go.Layout()
    trace = []
    for j,y_axis in enumerate(vals):
        trace.append(go.Bar(x = mcr_brazil[mcr_brazil[col2] == y_axis][col1].value_counts().index,
                            y = mcr_brazil[mcr_brazil[col2] == y_axis][col1].sort_values().value_counts().values,
                opacity = 0.6, name = vals[j]))
    fig = go.Figure(data = trace, layout = layout)
    fig.update_layout(
        title = x_title,
        yaxis = dict(title = y_title),
        legend = dict( bgcolor = 'rgba(255, 255, 255, 0)', bordercolor = 'rgba(255, 255, 255, 0)'),
        bargap = 0.15, bargroupgap = 0.1,legend_orientation="h")
    fig.show()
    
def bar_graph(col,type_of_graph, mcr_brazil):
    data_frame = compute_percentage(mcr_brazil,col)
    layout = go.Layout()
    
    if type_of_graph == 'bar':
        data = [go.Bar(
                x = data_frame.values,
                y = data_frame.index,
                opacity = 0.6,
                orientation='h',
               marker=dict(color=data_frame.values,colorscale='portland') 

            )]
    elif type_of_graph == 'pie':
        data = [go.Pie(
            labels = data_frame.index,
            values = data_frame.values,
            textfont = dict(size = 20)
        )]
    fig = go.Figure(data = data, layout = layout)
    py.iplot(fig)

## Glimpse of Data

In [None]:
print('Size of data', mcr.shape)

## Checking missing data in train
number and percentage of missing values in each column.

In [None]:
total = mcr.isnull().sum().sort_values(ascending = False)
percent = (mcr.isnull().sum()/mcr.isnull().count()*100).sort_values(ascending = False)
missing_train_data  = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_train_data.head(10)

## Column Types

In [None]:
mcr.dtypes.value_counts()

## Duplicate values
Let's now check how many duplicate values exists per columns.

In [None]:
features = mcr.columns.values[1:11]
unique_max = []
for feature in features:
    values = mcr[feature].value_counts()
    unique_max.append([feature, values.max(), values.idxmax()])
    

In [None]:
np.transpose((pd.DataFrame(unique_max, columns=['Feature', 'Max duplicates', 'Value'])).\
            sort_values(by = 'Max duplicates', ascending=False).head(10))

# Visualizations

For how many years have you used machine learning methods?

In [None]:
bar_graph("For how many years have you used machine learning methods?","bar",mcr)

For how many years have you been writing code and/or programming?

In [None]:
bar_graph("For how many years have you been writing code and/or programming?","bar",mcr)

In [None]:
bi_variant_chart("For how many years have you been writing code and/or programming?", "In which country do you currently reside?", "Experience VS Country","Count",mcr)

In [None]:
bi_variant_chart("For how many years have you been writing code and/or programming?","What programming languages do you use on a regular basis? (Select all that apply) - Selected Choice - Python", "Experience VS programming languages regular basis","Count",mcr)

Select the title most similar to your current role (or most recent title if retired)

In [None]:
bar_graph('Select the title most similar to your current role (or most recent title if retired): - Selected Choice',"bar", mcr)