In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import graph_objects
import plotly.figure_factory as ff

In [None]:
df_2021 = pd.read_csv('../input/my-data/kaggle_survey_2021_responses.csv')
df_2018 = pd.read_csv('../input/my-data/2018_kaggle_ds_and_ml_survey_responses_only.csv')

In [None]:
df_2021_data= df_2021[df_2021['Q3'].isin(['United States of America', 'Japan', 'China', 'South Korea'])]
df_2021_asia = df_2021[df_2021['Q3'].isin(['Japan', 'China', 'South Korea'])]
df_2021_usa = df_2021[df_2021['Q3'].isin(['United States of America'])]
df_2021_data.reset_index(drop=True)

In [None]:
df_2021_usa.reset_index(drop=True)

In [None]:
df_2021_asia.reset_index(drop=True)

In [None]:
df_2021_data['Q3'].replace(['United States of America', 'South Korea', 'Japan', 'China'],['USA', 'KOR', 'JAP', 'CH'], inplace=True)

In [None]:
df_2021_asia['Q3'].replace(['South Korea', 'Japan', 'China'],['KOR', 'JAP', 'CH'], inplace=True)

In [None]:
df_2021_usa['Q3'].replace('United States of America', 'USA', inplace=True)

In [None]:
df_2021_asia_q5 = df_2021_asia['Q5'].value_counts(ascending=True).reset_index()

In [None]:
df_2021_usa_q5 = df_2021_usa['Q5'].value_counts(ascending=True).reset_index()

In [None]:
df_2021_asia_q5 = pd.DataFrame(df_2021_asia['Q5'].value_counts())
df_2021_asia_q5.column =["Q5"]
df_2021_asia_q5["ratio"] = np.round(df_2021_asia_q5.Q5/sum(df_2021_asia_q5.Q5),2)
df_2021_asia_q5['%'] = np.round(df_2021_asia_q5['ratio'] * 100, 1)
df_asia_q5=df_2021_asia_q5.reset_index()

fig = px.bar(df_asia_q5,
             x= df_asia_q5['index'],
             y=df_asia_q5['%'],
             color='%',
             text = df_asia_q5['%'].astype(str)+'%',
             title='Classification of jobs in the Asia'
            )
fig.show()

In [None]:
df_2021_usa_q5 = pd.DataFrame(df_2021_usa['Q5'].value_counts())
df_2021_usa_q5.column =["Q5"]
df_2021_usa_q5["ratio"] = np.round(df_2021_usa_q5.Q5/sum(df_2021_usa_q5.Q5),2)
df_2021_usa_q5['%'] = np.round(df_2021_usa_q5['ratio'] * 100, 1)
df_usa_q5=df_2021_usa_q5.reset_index()

fig = px.bar(df_usa_q5,
             x= df_usa_q5['index'],
             y=df_usa_q5['%'],
             color='%',
             text = df_usa_q5['%'].astype(str)+'%',
             title='Classification of jobs in the USA'
            )
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
     x= df_asia_q5['index'],
     y=df_asia_q5['%'],
    text = df_asia_q5['%'].astype(str)+'%',
     name='Asia of Age',
     marker_color='indianred'
))
fig.add_trace(go.Bar(
     x= df_usa_q5['index'],
     y=df_usa_q5['%'],
    text = df_usa_q5['%'].astype(str)+'%',
     name='USA of Age',
     marker_color='lightsalmon'
))

fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()

In [None]:
q3_q5 = df_2021_data.groupby(['Q3', 'Q5']).size().reset_index().rename(columns = {0:"Count"})

def get_pnt(data, country):
    data_country = data[data['Q3'] == country].reset_index(drop = True)
    data_country['percentage'] = data_country["Count"] / data_country["Count"].sum()
    data_country['%'] = np.round(data_country['percentage'] * 100, 1)
    
    return data_country

usa_df = get_pnt(q3_q5, "USA")
china_df = get_pnt(q3_q5, "CH")
japan_df = get_pnt(q3_q5, "JAP")
korea_df = get_pnt(q3_q5, "KOR")

In [None]:
import plotly.figure_factory as ff

fig = make_subplots(rows = 2, cols = 2, 
                    shared_xaxes=True, 
                    shared_yaxes=True, 
                    vertical_spacing = 0.05,
                    subplot_titles=("USA with Q5", "China with Q5", "Japan with Q5", "Korea with Q5"),
                    column_widths = [0.5, 0.5]) 

fig.add_trace(go.Bar(x = usa_df['Q5'], 
                     y = usa_df['%'], 
                     text = usa_df['%'].astype(str) + "%", 
                     textposition='auto'), 
              row = 1, col = 1)

fig.add_trace(go.Bar(x = china_df['Q5'], 
                     y = china_df['%'], 
                     text = china_df['%'].astype(str) + "%", 
                     textposition='auto'), 
              row = 1, col = 2)

fig.add_trace(go.Bar(x = japan_df['Q5'], 
                     y = japan_df['%'], 
                     text = japan_df['%'].astype(str) + "%", 
                     textposition='auto'), 
              row = 2, col = 1)

fig.add_trace(go.Bar(x = korea_df['Q5'], 
                     y = korea_df['%'], 
                     text = korea_df['%'].astype(str) + "%", 
                     textposition='auto'), 
              row = 2, col = 2)

fig.update_layout(height = 1000,
                  showlegend=False,
                 template = "plotly_white")

fig.show()

In [None]:
fig = make_subplots(rows=2, cols=2, subplot_titles=("USA with Q5", "China with Q5", "Japan with Q5", "Korea with Q5"), column_widths = [2, 2],
                    specs=[[{'type':'domain'}, {'type':'domain'}],
                          [{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels = usa_df['Q5'], 
                     values = usa_df['%'], 
                     ),  row = 1, col = 1)

fig.add_trace(go.Pie(labels = china_df['Q5'], 
                     values = china_df['%'], 
                      ), row = 1, col = 2)

fig.add_trace(go.Pie(labels = japan_df['Q5'], 
                     values = japan_df['%'], 
                    ),  row = 2, col = 1)

fig.add_trace(go.Pie(labels = korea_df['Q5'], 
                     values = korea_df['%'], 
                     ), row = 2, col = 2)
fig.update_layout(height = 1000,
                  showlegend=True,
                 template = "plotly_white")

fig.show()