In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import plotly.express as px
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Input the data

In [None]:
# Input
df = pd.read_csv('../input/asian-countries-by-population/AsiaPopulation2020.csv')
df.head()

## Import viz. libraries

In [None]:
!pip install chart_studio

In [None]:
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
import pandas as pd

## Check for missing values

In [None]:
plt.figure(figsize = (10,7))
import seaborn as sns
sns.heatmap(df.isna())

In [None]:
df[df['UrbanPop'].isna()]

## Dealing with missing values

In [None]:
# Since only 4 values were missing in given data, I researched for them manually
df.iloc[11, 9] = 7707400 #HongKong
df.iloc[20, 9] = 4207083 #Kuwait
df.iloc[24, 9] = 670445 #Macao
df.iloc[36, 9] = 5703569 #Singapore

## Adding Country codes for easy work with plotly

In [None]:
import pycountry
def return_code(country_name):
    '''Return country code for any country'''
    details = pycountry.countries.get(name=country_name)
    if(details):
        return(details.alpha_2)
    return np.NaN

df['Country_Code'] = df['Country'].apply(return_code)

## Start geographic plotting

In [None]:
data = dict(type = 'choropleth',
            locations = df['Country'],
            locationmode = 'country names',
            colorscale = 'ice',
            text = df['Country'],
            z = df['Population'],
            colorbar = {'title': 'Population'})

In [None]:
layout = dict(geo ={'scope': 'asia'})
# passing data dictionary as a list 
choromap = go.Figure(data = [data], layout = layout)
  
# plotting graph
iplot(choromap)

In [None]:
# Most populous countries
plt.figure(figsize = (10, 5))
sns.barplot(x = 'Country', y = 'Population', data = df.sort_values(by='Population', ascending=False).head(5))

In [None]:
fig = px.bar(
    df.reset_index(), x="Country", y="YearlyChange", title="Yearly Change in Population"
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
fig = px.line(
    df.reset_index(), x="Country", y="NetChange", title="Net Change in Population"
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
fig = px.bar(
    df.sort_values(by = 'MedAge', ascending=False), x="Country", y="MedAge", title="Median age of people"
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
df_land_share = df[['Country', 'LandArea']].sort_values(by = 'LandArea', ascending=False)[:25]
others_land_area = df[['LandArea']].sort_values(by = 'LandArea', ascending=False)[25:].sum()['LandArea']
df_land_share = df_land_share.append({'Country':'Others', 'LandArea':others_land_area}, ignore_index=True)
fig = px.pie(
    df_land_share, names="Country", values="LandArea", title="Land Density (Others = 3.93%)"
)
fig.show()

In [None]:
asia_share = df['WorldShare'].sum()
fig = px.pie(
    df, names=['World', 'Asia'], values=[100-asia_share,asia_share], title="Asia's population share in the world"
)
fig.show()

### I am new to DataScience and am learning things. Please review if you want :)