# Analyzing Kaggle Data 2020

![](https://i.pinimg.com/originals/ec/22/21/ec2221f03145276371eff4a398ce0065.jpg)

i'm starting to learn Data Science from 9 month and this is my third Notebook in kaggle i'm in beginner level

In [None]:
!pip install pywaffle

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
from pywaffle import Waffle


In [None]:
data= pd.read_csv("/kaggle/input/kaggle-survey-2020/kaggle_survey_2020_responses.csv", low_memory=False)
questions=data.loc[0,:]
data=data.loc[1:,:]

# What is your age (# years)?

In [None]:
Age=data['Q1'].value_counts()
colors= ['#74d2e7','#48a9c5','#0085ad','#8db9ca','#4298b5','#005670','#00205b','#009f4d','#84bd00','#efdf00','#fe5000']
fig = go.Figure(data=[go.Bar(x=Age.index,y=Age.values,marker_color=colors)])
fig.update_traces(texttemplate=list(Age.values), textposition='outside',marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Q1 Age Distribution')
fig.show()
fig = go.Figure(data=[go.Pie(labels=Age.index,values=Age.values)])
fig.update_traces(hoverinfo='label+percent', textinfo='percent', textfont_size=18,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Q1 Age Distribution')
fig.show()

# What is your gender? - Selected Choice

In [None]:
gender =data['Q2'].value_counts()

fig = plt.figure(
    FigureClass=Waffle, 
    rows=6,
    columns=10,
    values=gender.values[0:2],
    colors = (['#09347a','#ae63e4']),
    title={'label': 'Gender Distribution', 'loc': 'center'},
    labels=["{}({})".format(a, b) for a, b in zip(gender.index[0:2], gender) ],
    legend={'loc': 'upper left', 'bbox_to_anchor': (1,1)},
    font_size=50, 
    icons = ['male','female'],
    icon_legend=True,
    figsize=(12,10)
)

In [None]:
gender=data.groupby('Q1')['Q2'].value_counts().unstack()
x=gender.index
fig = go.Figure(go.Bar(x=x, y=gender['Nonbinary'].values, name='Nonbinary'))
fig.add_trace(go.Bar(x=x, y=gender['Prefer to self-describe'].values, name='Prefer to self-describe'))
fig.add_trace(go.Bar(x=x, y=gender['Prefer not to say'].values, name='Prefer not to say'))
fig.add_trace(go.Bar(x=x, y=gender['Man'].values, name='Man',marker_color='#213e97'))
fig.add_trace(go.Bar(x=x, y=gender['Woman'].values, name='Woman',marker_color='#ae63e4'))

fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Q1 , Q2 Age and Gender')

fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# In which country do you currently reside ?

In [None]:
country_df = data['Q3'].value_counts().rename_axis('country').reset_index(name='counts')
country_df = country_df[country_df['country'] != 'Other']

from geopy.geocoders import Nominatim
import folium
from folium.plugins import MarkerCluster

geolocator = Nominatim(user_agent='world_map')
def geolocate(country):
    try:
        loc = geolocator.geocode(country)
        return (loc.latitude, loc.longitude)
    except:
        return np.nan

country_df['country'].replace({'Iran, Islamic Republic of...' : 'Iran','Republic of Korea':'North Korea'},inplace=True)
country_df['coord'] = country_df['country'].apply(lambda x : geolocate(x))

country_df['latitude']=  [x[0] for x in country_df['coord']]
country_df['longitude'] = [x[1] for x in country_df['coord']]
all_users = country_df.counts.sum()

world_map= folium.Map(tiles="cartodbpositron")
marker_cluster = MarkerCluster().add_to(world_map)
for i in range(len(country_df)):
        lat = country_df.iloc[i]['latitude']
        long = country_df.iloc[i]['longitude']
        radius=5
        popup_text = """
                    {}% of all Users <br>"""
        popup_text = popup_text.format('{:.2f}'.format(country_df.iloc[i]['counts']*100/country_df.counts.sum()))
        folium.CircleMarker(location = [lat, long], radius=radius, popup= popup_text, fill =True).add_to(marker_cluster)
world_map

In [None]:
country=pd.DataFrame()
country['country']=data.groupby(['Q3']).count().index
country['count']=data.groupby(['Q3']).size().values
country=country.sort_values(by=['count'],ascending=False)
fig=px.bar(country,y='country',x='count',color='country',title="Q3 Country")
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    autosize=True,
    width=850,
    height=1200,
    margin=dict(
        l=5,
        r=5,
        b=25,
        t=25,
        pad=1))
fig.show()

In [None]:
fig_sunburst = px.sunburst(data,
                           path=['Q3', 'Q2', 'Q1'],#[Continent, Country, Sex, Age]
                           template='seaborn',
                           maxdepth=2,
                           title='Q3,Q2,Q1')
fig_sunburst.update_traces(textinfo='label+percent entry')# Shows % in graph
fig_sunburst.update_layout(margin=dict(t=25, l=0, r=0, b=0))

# What is the highest level of formal education that you have attained or plan to attain within the next 2 years?



In [None]:
Dgree=pd.DataFrame()
Dgree['counts']=data['Q4'].value_counts().values
Dgree['names']=data['Q4'].value_counts().index
Dgree=Dgree.sort_values(by=['counts'])
colors= ['#8B0000','#B22222','#FF0000','#DC143C','#CD5C5C','#F08080','#E9967A','#FA8072']
fig = px.bar(Dgree, y="names", x="counts")
fig.update_traces(texttemplate=list(np.around(Dgree['counts']/data.index.size*100,2)), textposition='inside',marker_color=colors,marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Q4 Education')

fig.show()

# Select the title most similar to your current role (or most recent title if retired): - Selected Choice


In [None]:
Dgree=pd.DataFrame()
Dgree['counts']=data['Q5'].value_counts().values
Dgree['names']=data['Q5'].value_counts().index
Dgree=Dgree.sort_values(by=['counts'])
colors= ['#0099e5','#0099e5','#ff4c4c','#34bf49','#00a98f','#00a98f','#000000','#be0027','#cf8d2e','#e4e932','#2c9f45','#371777','#52325d','#511378']
fig = px.bar(Dgree, y="names", x="counts")
fig.update_traces(texttemplate=list(np.around(Dgree['counts']/data.index.size*100,2)), textposition='inside',marker_color=colors,marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Q5 Title')

fig.show()

# For how many years have you been writing code and/or programming?


In [None]:
Dgree=pd.DataFrame()
Dgree['counts']=data['Q6'].value_counts().values
Dgree['names']=data['Q6'].value_counts().index
Dgree=Dgree.sort_values(by=['counts'])
colors= ['#004d73','#004d73','#00334e','#007fdb','#dbebfa','#832561','#11862f','#bfca02','#848a8c']
fig = px.bar(Dgree, y="names", x="counts")
fig.update_traces(texttemplate=list(np.around(Dgree['counts']/data.index.size*100,2)), textposition='inside',marker_color=colors,marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Q6 Experience Years')

fig.show()

In [None]:
data[['Q5','Q6']]=data[['Q5','Q6']].fillna("none")
fig_sunburst = px.sunburst(data,
                           path=['Q5','Q6'],#[Continent, Country, Sex, Age]
                           template='seaborn',
                           maxdepth=3,
                           title='Q5,Q6')
fig_sunburst.update_traces(textinfo='label+percent entry')# Shows % in graph
fig_sunburst.update_layout(margin=dict(t=25, l=0, r=0, b=0))

In [None]:
linkColor = ['#f3f3f3'] * 50
for x in range(4,16): linkColor[x] = '#B5EAD7'

fig = go.Figure(data=[go.Sankey( arrangement = "snap",
    node = dict(
      pad = 10,
      thickness = 5,
      line = dict(color = "black", width = 0),
      label = ["2020 Survey", "Worker", "Student", "Started Coding 30+", "Currently not employed",
               "Other", "Data Scientist", "Data Analyst", "Research Scientist", "Product/Project Manager", "Software Engineer", "Business Analyst", "Machine Learning Engineer", "Statistician", "Data Engineer", "DBA/Database Engineer",
               ],
        x = [0, 0.25, 0.25, 0.35, 1],
        y = [0, 0.25, 0.25, 0.05, 1],
      color = 'lightblue'
    ),
    link = dict(
      source = [   0,    0,     0,    1,   3,   3,   3,  3,    3,   3,   3,   3,   3,   3,   3,  3,    1,    1,    1,    1,    1,    1,   1,   1,   1,   1,  1], 
      target = [   2,    4,     1,    3,   5,   6,   4,  7,    8,   9,  10,  11,  12,  13,  14, 15,    6,   10,    5,    7,    8,   12,  11,   9,  14,  13, 15],
      value =  [5171, 1652, 12454, 5038, 894, 612, 589, 507, 462, 423, 368, 368, 171, 114, 110, 56, 2676, 1968, 1737, 1475, 1174, 1082, 798, 692, 437, 290, 125],
      color = linkColor

  ))])

fig.update_layout(title="Kagglers Current Role" \
          '<br><span style="font-size:10px"><i>Do You Want to Be a Data Scientist Too?</span></i>')
fig.show()