## Project Overview  
### ðŸ“ˆ Determinants of Fertility in Nepal (2000â€“2022)

- **Objective**  
  - Analyze and quantify how key socioeconomic and demographic variables have influenced Nepalâ€™s fertility rate over time, including:  
    - Life expectancy  
    - GDP per capita  
    - Female education  
    - Labor force participation  
    - Urbanization  

- **Goal**  
  - Use statistical and quantitative methods to identify the major drivers of fertility decline in Nepal.  
  - Provide insights into the country's demographic and socioeconomic transition.


In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import plotly.express as px
import plotly.graph_objects as go

# Optional: plotly defaults
px.defaults.width = 900
px.defaults.height = 500

In [13]:
# Loading the dataset
df = pd.read_csv('../../datasets/Region_WB_sm.csv')
# Filtering for Nepal
df_nepal = df[df['country'] == 'Nepal']
df_nepal.isnull().sum()
df_nepal.head()


Unnamed: 0,country,iso2c,iso3c,year,fertility,adolescent_fert,contraceptive,infant_mort,life_exp,gdp_pc,fem_labor,sec_school_f,urban,region
3096,Nepal,NP,NPL,2000,3.984,118.816,37.3,61.8,62.642,547.221082,21.36,27.949949,13.397,South Asia
3097,Nepal,NP,NPL,2001,3.793,118.047,39.3,58.8,63.288,564.286229,21.571,30.452579,13.947,South Asia
3098,Nepal,NP,NPL,2002,3.604,115.0,,55.9,63.507,556.438455,21.795,34.056541,14.24,South Asia
3099,Nepal,NP,NPL,2003,3.439,111.926,,53.3,64.229,570.289674,22.026,35.71677,14.538,South Asia
3100,Nepal,NP,NPL,2004,3.279,106.216,38.3,50.9,64.744,589.469929,22.285,,14.841,South Asia


In [14]:
# Filter Nepal data
df_nepal = df[df['country'] == 'Nepal'].copy()
# Handle missing values by interpolation
df_nepal['contraceptive'] = df_nepal['contraceptive'].interpolate(method='linear')
df_nepal['sec_school_f'] = df_nepal['sec_school_f'].interpolate(method='linear')
# Convert year to integer
df_nepal['year'] = df_nepal['year'].astype(int)
df_nepal

Unnamed: 0,country,iso2c,iso3c,year,fertility,adolescent_fert,contraceptive,infant_mort,life_exp,gdp_pc,fem_labor,sec_school_f,urban,region
3096,Nepal,NP,NPL,2000,3.984,118.816,37.3,61.8,62.642,547.221082,21.36,27.949949,13.397,South Asia
3097,Nepal,NP,NPL,2001,3.793,118.047,39.3,58.8,63.288,564.286229,21.571,30.452579,13.947,South Asia
3098,Nepal,NP,NPL,2002,3.604,115.0,38.966667,55.9,63.507,556.438455,21.795,34.056541,14.24,South Asia
3099,Nepal,NP,NPL,2003,3.439,111.926,38.633333,53.3,64.229,570.289674,22.026,35.71677,14.538,South Asia
3100,Nepal,NP,NPL,2004,3.279,106.216,38.3,50.9,64.744,589.469929,22.285,38.378639,14.841,South Asia
3101,Nepal,NP,NPL,2005,3.115,101.456,43.15,48.8,65.275,603.190109,22.575,41.040508,15.149,South Asia
3102,Nepal,NP,NPL,2006,2.967,95.034,48.0,46.9,65.891,617.477639,22.897,39.517818,15.462,South Asia
3103,Nepal,NP,NPL,2007,2.857,89.634,48.34,45.1,66.169,633.226129,23.253,39.880692,15.781,South Asia
3104,Nepal,NP,NPL,2008,2.737,85.221,48.68,43.4,66.442,666.865095,23.676,46.195919,16.105,South Asia
3105,Nepal,NP,NPL,2009,2.63,82.717,49.02,41.8,66.6,692.400001,24.169,47.009979,16.434,South Asia


In [15]:
# Rename columns for clarity
df_nepal.rename(columns={
    'fertility': 'fertility_rate',
    'adolescent_fert': 'adolescent_fert_rate',
    'contraceptive': 'contraceptive_use',
    'infant_mort': 'infant_mortality',
    'life_exp': 'life_expectancy',
    'gdp_pc': 'gdp_per_capita',
    'fem_labor': 'female_labor_participation',
    'sec_school_f': 'female_secondary_school'
}, inplace=True)

In [16]:
# check data info
df_nepal.info()
df_nepal.head()

#urban categories

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 3096 to 3119
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   country                     24 non-null     object 
 1   iso2c                       24 non-null     object 
 2   iso3c                       24 non-null     object 
 3   year                        24 non-null     int64  
 4   fertility_rate              24 non-null     float64
 5   adolescent_fert_rate        24 non-null     float64
 6   contraceptive_use           24 non-null     float64
 7   infant_mortality            24 non-null     float64
 8   life_expectancy             24 non-null     float64
 9   gdp_per_capita              24 non-null     float64
 10  female_labor_participation  24 non-null     float64
 11  female_secondary_school     24 non-null     float64
 12  urban                       24 non-null     float64
 13  region                      24 non-nu

Unnamed: 0,country,iso2c,iso3c,year,fertility_rate,adolescent_fert_rate,contraceptive_use,infant_mortality,life_expectancy,gdp_per_capita,female_labor_participation,female_secondary_school,urban,region
3096,Nepal,NP,NPL,2000,3.984,118.816,37.3,61.8,62.642,547.221082,21.36,27.949949,13.397,South Asia
3097,Nepal,NP,NPL,2001,3.793,118.047,39.3,58.8,63.288,564.286229,21.571,30.452579,13.947,South Asia
3098,Nepal,NP,NPL,2002,3.604,115.0,38.966667,55.9,63.507,556.438455,21.795,34.056541,14.24,South Asia
3099,Nepal,NP,NPL,2003,3.439,111.926,38.633333,53.3,64.229,570.289674,22.026,35.71677,14.538,South Asia
3100,Nepal,NP,NPL,2004,3.279,106.216,38.3,50.9,64.744,589.469929,22.285,38.378639,14.841,South Asia


In [17]:
numeric_cols = ['fertility_rate','adolescent_fert_rate','contraceptive_use','infant_mortality',
                'life_expectancy','gdp_per_capita','female_labor_participation','female_secondary_school','urban']

# Fertility Trend (Interactive)
fig = px.line(df_nepal, x='year', y='fertility_rate', 
              title='Fertility Rate Trend in Nepal (2000-2022)',
              markers=True)
fig.update_layout(xaxis_title='Year', yaxis_title='Fertility Rate')
fig.show()


In [18]:
# Socioeconomic Trends (Interactive)
fig = px.line(df_nepal, x='year', y=['life_expectancy','gdp_per_capita',
                                     'female_secondary_school','urban'],
              title='Socioeconomic Trends in Nepal (2000-2022)',
              markers=True)
fig.update_layout(xaxis_title='Year', yaxis_title='Value')
fig.show()

In [26]:

# Correlation Heatmap (Interactive)
corr_matrix = df_nepal[numeric_cols].corr()
fig = go.Figure(data=go.Heatmap(
                   z=corr_matrix.values,
                   x=corr_matrix.columns,
                   y=corr_matrix.columns,
                   colorscale='Viridis',
                   text=corr_matrix.values,
                   texttemplate="%{text:.2f}"
                ))
fig.update_layout(title='Correlation Heatmap of Nepal Socioeconomic Variables')

fig.write_image("correlation_heatmap.png")  # ðŸ”¥ this makes it appear in HTML

fig.show() 

ValueError: 
Image export using the "kaleido" engine requires the Kaleido package,
which can be installed using pip:

    $ pip install --upgrade kaleido


In [None]:
# Regression Analysis
# ===============================
# Dependent Variable: fertility_rate
# Independent Variables: life_expectancy, gdp_per_capita, female_labor_participation, female_secondary_school, urban

X = df_nepal[['life_expectancy','gdp_per_capita','female_labor_participation','female_secondary_school','urban']]
y = df_nepal['fertility_rate']

# Add constant for intercept
X = sm.add_constant(X)

# Fit OLS model
model = sm.OLS(y, X).fit()


# Regression Summary
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:         fertility_rate   R-squared:                       0.983
Model:                            OLS   Adj. R-squared:                  0.978
Method:                 Least Squares   F-statistic:                     207.2
Date:                Tue, 18 Nov 2025   Prob (F-statistic):           2.97e-15
Time:                        08:15:59   Log-Likelihood:                 27.325
No. Observations:                  24   AIC:                            -42.65
Df Residuals:                      18   BIC:                            -35.58
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

In [21]:
# Elasticity Estimation
# ===============================
# Elasticity = (beta * mean(X)) / mean(Y)
elasticity = {}
for col in ['life_expectancy','gdp_per_capita','female_labor_participation','female_secondary_school','urban']:
    beta = model.params[col]
    elasticity[col] = (beta * df_nepal[col].mean()) / df_nepal['fertility_rate'].mean()

# Rank by absolute elasticity
elasticity = dict(sorted(elasticity.items(), key=lambda x: abs(x[1]), reverse=True))
print("Elasticity of Fertility Rate with respect to predictors:")
for k,v in elasticity.items():
    print(f"{k}: {v:.3f}")


Elasticity of Fertility Rate with respect to predictors:
life_expectancy: -4.411
urban: -1.181
female_labor_participation: -1.128
gdp_per_capita: 0.836
female_secondary_school: -0.038


In [22]:
# Visualization of Regression Results (Interactive)
# ===============================
# Predicted vs Actual Fertility
df_nepal['predicted_fertility'] = model.predict(X)
fig = px.scatter(df_nepal, x='fertility_rate', y='predicted_fertility',
                 title='Actual vs Predicted Fertility Rate')
fig.add_shape(
    type='line', x0=df_nepal['fertility_rate'].min(), y0=df_nepal['fertility_rate'].min(),
    x1=df_nepal['fertility_rate'].max(), y1=df_nepal['fertility_rate'].max(),
    line=dict(color='red', dash='dash')
)
fig.update_layout(xaxis_title='Actual Fertility', yaxis_title='Predicted Fertility')
fig.show()

In [23]:
# Regression Coefficients
coef_df = pd.DataFrame({
    'Variable': model.params.index[1:],  # skip constant
    'Coefficient': model.params.values[1:]
})
fig = px.bar(coef_df, x='Coefficient', y='Variable', orientation='h',
             title='Regression Coefficients for Fertility Rate', text='Coefficient',
             color='Coefficient', color_continuous_scale='Viridis')
fig.show()

In [24]:
# Elasticity Ranking
elasticity_df = pd.DataFrame({
    'Variable': list(elasticity.keys()),
    'Elasticity': list(elasticity.values())
})
fig = px.bar(elasticity_df, x='Elasticity', y='Variable', orientation='h',
             title='Elasticity of Fertility Rate with Respect to Predictors',
             text='Elasticity', color='Elasticity', color_continuous_scale='Cividis')
fig.show()