In [4]:
#%pip install pandas
import pandas as pd # type: ignore
import datetime
import numpy as np # type: ignore

In [5]:
# dependencies
#%pip install pyarrow
#%pip install fastparquet

In [6]:
year_list = list(map(str, range(2000, 2022)))

In [7]:
# function - delete column - takes df and list of column names in str
def delete_col(target_df, target_col):
    target_df.drop(target_col, axis=1, inplace=True)
    return target_df

# function - change date to year in the df - takes df
def to_year(target_df):
    if 'date' in target_df.columns: target_df['date'] = pd.to_datetime(target_df['date'])
    target_df['date'] = target_df['date'].dt.strftime('%Y')
    target_df['date'] = target_df['date'].astype(int)
    return target_df

# function - change columns names - takes df and list of column names in str
def change_col_names(target_df, names_list):
    target_df.columns = names_list
    return target_df

In [8]:
URL_DATA = 'https://storage.dosm.gov.my/population/population_malaysia.parquet'

df_population = pd.read_parquet(URL_DATA)
df_population = df_population[(df_population['sex'] == 'both') &
                              (df_population['ethnicity'] == 'overall') &
                              (df_population['age'] != 'overall')]
df_population = delete_col(df_population, ['sex', 'ethnicity'])
df_population = to_year(df_population)
df_population = change_col_names(df_population, ['Year', 'Age', 'Population'])

df_population_00_22 = df_population[df_population['Year'] > 1999]
df_population_00_22 = df_population_00_22[df_population_00_22['Year'] < 2023].reset_index()
df_population_00_22.drop('index', axis=1, inplace=True)

df_population_00_22.tail()

Unnamed: 0,Year,Age,Population
409,2022,65-69,974.5
410,2022,70-74,674.5
411,2022,75-79,377.1
412,2022,80-84,203.1
413,2022,85+,134.5


In [9]:
# population of fertile adults (20 - 40 age)
df_population_00_22_fertile_adult = df_population_00_22[(df_population_00_22['Age'] == '20-24') |
                                                        (df_population_00_22['Age'] == '25-29') |
                                                        (df_population_00_22['Age'] == '30-34') |
                                                        (df_population_00_22['Age'] == '35-39')]
df_population_00_22_fertile_adult = df_population_00_22_fertile_adult.groupby(['Year'])['Population'].sum().reset_index()
df_population_00_22_fertile_adult = change_col_names(df_population_00_22_fertile_adult, ['Year', 'Number of Fertile Adults (Age 20 - 39, x 1000)'])
df_population_00_22_fertile_adult.tail()

Unnamed: 0,Year,"Number of Fertile Adults (Age 20 - 39, x 1000)"
18,2018,11821.9
19,2019,11834.6
20,2020,11408.4
21,2021,11388.1
22,2022,11374.9


In [10]:
URL_DATA = 'https://storage.dosm.gov.my/demography/birth.parquet'

df_birth = pd.read_parquet(URL_DATA)
df_birth = to_year(df_birth)
df_birth = change_col_names(df_birth, ['Year', 'Birth', 'Birth Rate'])

df_birth_00_22 = df_birth[df_birth['Year'] > 1999]
df_birth_00_22 = df_birth_00_22[df_birth_00_22['Year'] < 2023].reset_index()
df_birth_00_22.drop('index', axis=1, inplace=True)

df_birth_00_22.head()

Unnamed: 0,Year,Birth,Birth Rate
0,2000,537853,22.9
1,2001,505479,21.0
2,2002,494538,20.2
3,2003,481399,19.2
4,2004,481800,18.9


In [11]:
URL_DATA = 'https://storage.dosm.gov.my/demography/marriages.parquet'

df_marriage = pd.read_parquet(URL_DATA)
df_marriage = to_year(df_marriage)
df_marriage = change_col_names(df_marriage, ['Year', 'Sex', 'Marriage Number', 'Marriage Rate'])
df_marriage = delete_col(df_marriage, ['Sex', 'Marriage Rate'])
df_marriage.drop_duplicates(subset='Marriage Number', inplace=True)

df_marriage_00_22 = df_marriage[df_marriage['Year'] > 1999].reset_index()
#df_marriage_00_22 = df_marriage_00_20[df_marriage_00_20['Year'] < 2023].reset_index()
df_marriage_00_22.drop('index', axis=1, inplace=True)

df_marriage_00_22.tail()

Unnamed: 0,Year,Marriage Number
1,2018,206352
2,2019,203661
3,2020,184589
4,2021,215973
5,2022,214824


In [12]:
URL_DATA = 'https://storage.dosm.gov.my/demography/fertility.parquet'

df_fertility = pd.read_parquet(URL_DATA)

df_fertility = to_year(df_fertility)

df_fertility = df_fertility[df_fertility['age_group'] == 'tfr']
df_fertility = delete_col(df_fertility, 'age_group')
df_fertility = change_col_names(df_fertility, ['Year', 'Fertility Rate'])

df_fertility_00_22 = df_fertility[df_fertility['Year'] > 1999]
df_fertility_00_22 = df_fertility_00_22[df_fertility_00_22['Year'] < 2023].reset_index()
df_fertility_00_22.drop('index', axis=1, inplace=True)

df_fertility_00_22.tail()

Unnamed: 0,Year,Fertility Rate
18,2018,1.84
19,2019,1.78
20,2020,1.71
21,2021,1.7
22,2022,1.63


In [13]:
df_birth_vs_fertile_adult = pd.merge(df_birth_00_22, df_population_00_22_fertile_adult, how='inner')
df_birth_vs_fertile_adult.drop('Birth Rate', axis=1, inplace=True)
df_birth_vs_fertile_adult['Birth'] = df_birth_vs_fertile_adult['Birth'] / 1000
df_birth_vs_fertile_adult = change_col_names(df_birth_vs_fertile_adult, ['Year', 'Birth (x 1000)', 'Number of Fertile Adults (Age 20 - 39, x 1000)'])
df_birth_vs_fertile_adult.tail()

Unnamed: 0,Year,Birth (x 1000),"Number of Fertile Adults (Age 20 - 39, x 1000)"
18,2018,501.945,11821.9
19,2019,489.863,11834.6
20,2020,471.504,11408.4
21,2021,439.744,11388.1
22,2022,423.124,11374.9


In [14]:
URL_DATA = 'https://storage.dosm.gov.my/hies/hh_income.parquet'

df_income = pd.read_parquet(URL_DATA)
df_income = to_year(df_income)
df_income = change_col_names(df_income, ['Year', 'Mean Household Income', 'Median Household Income'])

df_income_00_22 = df_income[df_income['Year'] > 1999]
df_income_00_22 = df_income_00_22[df_income_00_22['Year'] < 2023].reset_index()
df_income_00_22.drop('index', axis=1, inplace=True)

df_income_00_22.tail()

Unnamed: 0,Year,Mean Household Income,Median Household Income
5,2014,6141,4585
6,2016,6958,5228
7,2019,7901,5873
8,2020,7089,5209
9,2022,8479,6338


In [15]:
URL_DATA = 'https://storage.dosm.gov.my/cpi/cpi_2d.parquet'

df_price_index = pd.read_parquet(URL_DATA)
df_price_index = to_year(df_price_index)
df_price_index = delete_col(df_price_index, 'division')
df_price_index = pd.DataFrame(df_price_index.groupby('date')['index'].mean().reset_index())
df_price_index = change_col_names(df_price_index, ['Year', 'Price Index'])

df_price_index_10_22 = df_price_index[(df_price_index['Year'] > 1999) & (df_price_index['Year'] < 2023)]
df_price_index_10_22.tail()

Unnamed: 0,Year,Price Index
8,2018,118.898214
9,2019,119.536905
10,2020,119.160714
11,2021,120.817262
12,2022,123.580952


In [16]:
URL_DATA = 'https://storage.dosm.gov.my/hies/hh_poverty.parquet'

df_poverty = pd.read_parquet(URL_DATA)
df_poverty = to_year(df_poverty)
df_poverty = delete_col(df_poverty, ['poverty_hardcore', 'poverty_relative'])
df_poverty = change_col_names(df_poverty, ['Year', 'Poverty Percentage'])

df_poverty_00_22 = df_poverty[(df_poverty['Year'] > 1999) & (df_poverty['Year'] < 2023)].reset_index()
df_poverty_00_22 = delete_col(df_poverty_00_22, 'index')

df_poverty_00_22.head()

Unnamed: 0,Year,Poverty Percentage
0,2002,5.1
1,2004,5.7
2,2007,3.6
3,2009,3.8
4,2012,1.7


In [17]:
URL_DATA = 'https://storage.dosm.gov.my/hies/hh_inequality.parquet'

df_inequality = pd.read_parquet(URL_DATA)
df_inequality = to_year(df_inequality)
df_inequality = change_col_names(df_inequality, ['Year', 'Income Inequality'])

df_inequality_00_22 = df_inequality[(df_inequality['Year'] > 1999) & (df_inequality['Year'] < 2023)].reset_index()
df_inequality_00_22 = delete_col(df_inequality_00_22, 'index')
df_inequality_00_22.head()

Unnamed: 0,Year,Income Inequality
0,2002,0.461
1,2004,0.462
2,2007,0.441
3,2009,0.441
4,2012,0.431


In [18]:
#%pip install matplotlib
#%pip install seaborn

In [19]:
#%pip install plotly

In [20]:
import matplotlib.pyplot as plt # type: ignore
import seaborn as sns # type: ignore
import plotly.graph_objects as go # type: ignore
import plotly.express as px # type: ignore
from plotly.subplots import make_subplots # type: ignore
#%matplotlib inline

In [21]:
#import plotly.io as pio # type: ignore
#pio.renderers.default = 'iframe' # or 'colab' or 'iframe' or 'iframe_connected' or 'sphinx_gallery'

In [22]:
#%pip install nbformat

In [96]:
fig1 = make_subplots(specs=[[{"secondary_y": True}]])

fig1.add_trace(go.Scatter(x=df_birth_00_22['Year'], y=df_birth_00_22['Birth'], mode='lines+markers', name='Newborn'), secondary_y=False)
fig1.add_trace(go.Scatter(x=df_fertility_00_22['Year'], y=df_fertility_00_22['Fertility Rate'], mode='lines+markers', name='Fertility Rate'), secondary_y=True)

# Add figure title
fig1.update_layout(
    title_text="<b>Newborn and Fertility Rate over Years</b>",
    legend=dict(orientation="h",
                      yanchor="bottom",
                      y=0,
                      xanchor="left",
                      x=0),
    autosize=False,
    width=1200,
    height=500,
    margin=dict(l=100, r=100, b=100, t=100))

# Set x-axis title
fig1.update_xaxes(title_text="Year")

# Set y-axes titles
fig1.update_yaxes(title_text="Newborn Number", secondary_y=False, range=[0, 600000])
fig1.update_yaxes(title_text="Fertility Rate", secondary_y=True, range=[0, 3.5])

fig1.show()

In [91]:
fig2_3 = make_subplots(
    rows=1, cols=2,
    subplot_titles=("<b>Number of Newborn and Fertile Adult by Year</b>",
                    "<b>Marriage Registered by Year</b>"))

# Number of Newborn and Fertile Adult by Year
fig2_3.add_trace(go.Scatter(x=df_birth_vs_fertile_adult['Year'],
                            y=df_birth_vs_fertile_adult['Number of Fertile Adults (Age 20 - 39, x 1000)'],
                            fill='tozeroy',
                            stackgroup='one',
                            name='Number of Fertile Adults (Age 20 - 39, x 1000)',
                            legend='legend1'),
                          row=1,
                          col=1)
fig2_3.add_trace(go.Scatter(x=df_birth_vs_fertile_adult['Year'],
                            y=df_birth_vs_fertile_adult['Birth (x 1000)'],
                            fill='tonexty',
                            stackgroup='one',
                            name='Newborn (x 1000)',
                            legend='legend1'),
                          row=1,
                          col=1)

# Marriage Registered by Year
fig2_3.add_trace(go.Scatter(x=df_marriage_00_22['Year'],
                          y=df_marriage_00_22['Marriage Number'],
                          mode='lines+markers',
                          name='Marriage Registered',
                          showlegend=False),
                          row=1,
                          col=2)

fig2_3.update_layout(legend1=dict(orientation="h",
                      yanchor="bottom",
                      y=-0.16,
                      xanchor="left",
                      x=0),
                      autosize=False,
                      width=1200,
                      height=500,
                      margin=dict(l=100, r=150, b=100, t=100))

#fig2_3.update_yaxes(title_text="Number" row=1, col=1)
#fig2_3.update_yaxes(title_text="Number" row=1, col=2)

fig2_3.show()

In [89]:
fig4_5 = make_subplots(
    rows=1, cols=2,
    subplot_titles=("<b>Average Household Income and Price Index by Year</b>",
                    "<b>Poverty and Income Inequality by Year</b>"),
    specs=[[{"secondary_y": True}, {"secondary_y": True}]])

# Income and Living Cost by Year (not living cost by year dataset, so use price index as replacement)
fig4_5.add_trace(go.Scatter(x=df_income_00_22['Year'],
                            y=df_income_00_22['Mean Household Income'],
                            mode='lines+markers',
                            name='Mean Household Income',
                            legend='legend1'),
                            secondary_y=False,
                            row=1,
                            col=1)
fig4_5.add_trace(go.Scatter(x=df_price_index_10_22['Year'],
                            y=df_price_index_10_22['Price Index'],
                            mode='lines+markers',
                            name='Price Index',
                            legend='legend1'),
                            secondary_y=True,
                            row=1,
                            col=1)

# Poverty and Income Inequality by Year
fig4_5.add_trace(go.Scatter(x=df_poverty_00_22['Year'],
                            y=df_poverty_00_22['Poverty Percentage'],
                            mode='lines+markers',
                            name='Poverty Percentage',
                            legend='legend2'),
                            secondary_y=False,
                            row=1,
                            col=2)
fig4_5.add_trace(go.Scatter(x=df_inequality_00_22['Year'],
                            y=df_inequality_00_22['Income Inequality'],
                            mode='lines+markers',
                            name='Income Inequality (Gini coefficient, 1 = total inequality)',
                            legend='legend2'),
                            secondary_y=True,
                            row=1,
                            col=2)

fig4_5.update_layout(legend1=dict(orientation="h",
                      yanchor="bottom",
                      y=-0.16,
                      xanchor="left",
                      x=0),
                      legend2=dict(orientation="h",
                      yanchor="bottom",
                      y=-0.16,
                      xanchor="right",
                      x=1),
                      autosize=False,
                      width=1200,
                      height=500,
                      margin=dict(l=100, r=100, b=100, t=100))

fig4_5.update_yaxes(title_text="Mean Household Income", secondary_y=False, row=1, col=1)
fig4_5.update_yaxes(title_text="Price Index", secondary_y=True, row=1, col=1)
fig4_5.update_yaxes(title_text="Poverty Rate", secondary_y=False, row=1, col=2)
fig4_5.update_yaxes(title_text="Income Inequality (Gini coefficient)", secondary_y=True, row=1, col=2)

fig4_5.show()

In [25]:
# @hidden_cell
# The project token is an authorization token that is used to access project resources like data sources, connections, and used by platform APIs.
#from project_lib import Project
#project = Project(project_id='a2e7ffc5-f1d8-43b9-b7d5-7ed528777524', project_access_token='p-2+kJVKIeETJ/Q+4uOYczkVAw==;+f1Ev4sNWIGLz3hKvGPM/Q==:VFGLUdxJCCZtNJ8sYXNtzWjEShCw3Zo3nx5SiutXusuWscm5fBM2WYVyuCayaw9Y+Dsqo44u4BKd7XRgOIxpy06MBrKl4ZN27w==')
#pc = project.project_context

#from ibm_watson_studio_lib import access_project_or_space
#wslib = access_project_or_space({'token':'p-2+kJVKIeETJ/Q+4uOYczkVAw==;+f1Ev4sNWIGLz3hKvGPM/Q==:VFGLUdxJCCZtNJ8sYXNtzWjEShCw3Zo3nx5SiutXusuWscm5fBM2WYVyuCayaw9Y+Dsqo44u4BKd7XRgOIxpy06MBrKl4ZN27w=='})


In [94]:
figs = [fig1, fig2_3, fig4_5] #, fig6, fig7

dashboard = open("Dashboard (Current Progression).html", "w")
dashboard.write("<html><body>" + "\n")
dashboard.write("<head> <link href='https://fonts.googleapis.com/css?family=Open Sans' rel='stylesheet'>" + "\n")
dashboard.write("<link href='styles.css' rel='stylesheet'></head>" + "\n")
dashboard.write("<h1>Newborn Trend in Malaysia</h1>" + "\n")
for fig in figs:
    dashboard.write(fig.to_html(full_html=False, include_plotlyjs='cdn') + "\n")
dashboard.write("</body></html>")
dashboard.close()