In [None]:
import pandas as pd
import plotly.express as px

df = pd.read_csv('../input/kaggle-survey-2021/kaggle_survey_2021_responses.csv', low_memory=False)

In [None]:
new_header = df.iloc[0] #grab the first row for the header
df = df[1:] #take the data less the header row
df.columns = new_header #set the header row as the df header

In [None]:
countries = df.iloc[:, 3]
country_counts = (countries.value_counts().reset_index()).rename(columns = {'index':'Country','In which country do you currently reside?': 'Count'})
country_counts['Country']=country_counts['Country'].replace('Viet Nam', 'Vietnam')

from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
    
info = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

di = {}
for _,row in info.iterrows():
    di[row['COUNTRY']]=row['CODE']
    
count=0
n=[]
c=[]
code=[]
for key, value in di.items():
    for _,row in country_counts.iterrows():
        if key in row['Country']:
            count=count+1
            n.append(key)
            c.append(row['Count'])
            code.append(value)

dfc=pd.DataFrame()
dfc['Country']=n
dfc['Count']=c
dfc['Code']=code

dfcn = pd.pivot_table(dfc, index=['Country'],values=['Count'],aggfunc='sum').reset_index()
dfc.drop('Count', axis=1, inplace=True)
dfcn = pd.merge(left=dfcn, right=dfc, on='Country')

dfcn =dfcn.sort_values(by="Count")
dfcn.drop_duplicates(inplace=True)

In [None]:
fig = px.bar(dfcn, x='Country', y='Count', height=600, color='Count', color_continuous_scale='pinkyl', template='plotly_white')
fig.update_xaxes(type='category')
fig.update_layout(title={
        'text': "Country wise user count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [None]:
fig = px.choropleth(dfcn, locations='Code', color='Count', color_continuous_scale='pinkyl',
                    hover_name="Country", template='plotly_white',
                    hover_data={
                        'Code':False
                    })
fig.update_layout(title={
        'text': "Demographic distribution of Users",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},dragmode=False)
fig.show()

In [None]:
fig = px.pie(dfcn, values='Count', names='Country', title='% user from Country', color_discrete_sequence=px.colors.sequential.RdBu)
fig.update_layout(title={
        'text': "Percentage of users from countries",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_traces(textinfo='none')

fig.show()

# USA vs India

As seen, most of the users belong to India and then USA. We will be performing a comparative study of the user from India vs the user from USA

In [None]:
indian = df[df['In which country do you currently reside?']=='India']

In [None]:
usa = df[df['In which country do you currently reside?']=='United States of America']

# India

In [None]:
clm={'index':'Degree','What is the highest level of formal education that you have attained or plan to attain within the next 2 years?': 'Count'}
indian_degree = indian.iloc[:,4].value_counts().reset_index().rename(columns=clm)

In [None]:
fig = px.bar(indian_degree, x='Degree', y='Count', height=600, width=1000, color='Count', color_continuous_scale='mint', template='plotly_white')
fig.update_xaxes(type='category')
fig.update_layout(title={
        'text': "India : Degree vs No. of Users",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()

In [None]:
clm=clm={'index':'Profession','Select the title most similar to your current role (or most recent title if retired): - Selected Choice': 'Count'}
indian_profession=(indian.iloc[:,5].value_counts().reset_index()).rename(columns=clm)

# USA

In [None]:
clm={'index':'Degree','What is the highest level of formal education that you have attained or plan to attain within the next 2 years?': 'Count'}
usa_degree = usa.iloc[:,4].value_counts().reset_index().rename(columns=clm)

In [None]:
fig = px.bar(usa_degree, x='Degree', y='Count', height=600, color='Count', color_continuous_scale='mint', template='plotly_white')
fig.update_xaxes(type='category')
fig.update_layout(title={
        'text': "USA : Degree vs No. of Users",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()

In [None]:
clm=clm={'index':'Profession','Select the title most similar to your current role (or most recent title if retired): - Selected Choice': 'Count'}
usa_profession=(usa.iloc[:,5].value_counts().reset_index()).rename(columns=clm)

# Comparative Study

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

indian_degree.Degree = indian_degree.Degree.replace('Some college/university study without earning a bachelor’s degree', 'Some college/university without degree')
usa_degree.Degree = usa_degree.Degree.replace('Some college/university study without earning a bachelor’s degree', 'Some college/university without degree')

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]],subplot_titles=('India', 'USA'))
fig.add_trace(go.Pie(labels=indian_degree.Degree.values, values=indian_degree.Count.values, name="India", marker_colors=px.colors.sequential.RdBu),
              1, 1)
fig.add_trace(go.Pie(labels=usa_degree.Degree.values, values=usa_degree.Count.values, name="USA", marker_colors=px.colors.sequential.RdBu),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.5, hoverinfo="label+percent+name")

fig.update_layout(title={
        'text': "Profession : India vs USA",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

## Conclusions :

As from the survey results, we get to see that more than **50%** of the users from have completed **bachelor's degree.**
Also **master's and bachelor's degree** holders make about **86.7%** of the total users from India.


India have lesser percentage of users with a **Doctoral degree** compared to USA



In [None]:


# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]],subplot_titles=('India', 'USA'))
fig.add_trace(go.Pie(labels=indian_profession.Profession.values, values=indian_profession.Count.values, name="India", marker_colors=px.colors.sequential.RdBu),
              1, 1)
fig.add_trace(go.Pie(labels=usa_profession.Profession.values, values=usa_profession.Count.values, name="USA", marker_colors=px.colors.sequential.RdBu),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.5, hoverinfo="label+percent+name")

fig.update_layout(title={
        'text': "Profession : India vs USA",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

## Conclusions

Majority of the Indian users are students

USA users have almost equal number of students and data scientist

Both countries have similar percentage of users except for student percentage

In [None]:
indian.to_csv('India.csv', index=False)
usa.to_csv('USA.csv', index=False)