# INFLATION AND FERTILITY
the analysis is found in the README file

Imports and set magics:

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from matplotlib_venn import venn2
import pandas_datareader # install with `pip install pandas-datareader`
from dstapi import DstApi # install with `pip install git+https://github.com/alemartinello/dstapi`

import plotly.express as px
import matplotlib.pyplot as plt
plt.rcParams.update({"axes.grid":True,"grid.color":"black","grid.alpha":"0.25","grid.linestyle":"--"})
plt.rcParams.update({'font.size': 14})
# autoreload modules when code is run
%load_ext autoreload
%autoreload 2

# user written modules
import dataproject



# Read and clean data

Import your data, either through an API or manually, and load it. 

In [2]:
fert = DstApi('FERT1')
gdp = DstApi('NAN1')
fpi = DstApi ('PRIS112')

We have found our desired variables now it is time to clean the data

In [3]:
tabsum_fert= fert.tablesummary(language='en')
display(tabsum_fert)


Table FERT1: Total fertility rate (ages 15-49) by ancestry and time
Last update: 2024-02-12T08:00:00


Unnamed: 0,variable name,# values,First value,First value label,Last value,Last value label,Time variable
0,HERKOMST,6,AK,All women,DKK,Women of Danish origin,False
1,Tid,38,1986,1986,2023,2023,True


In [4]:
# The available values for a each variable: 
for variable in tabsum_fert['variable name']:
    print(variable+':')
    display(fert.variable_levels(variable, language='en'))

HERKOMST:


Unnamed: 0,id,text
0,AK,All women
1,IKV,Immigrant women from western countries
2,IKIV,Immigrant women from non-western countries
3,EKV,Descendant women from western countries
4,EKIV,Descendant women from non-western countries
5,DKK,Women of Danish origin


Tid:


Unnamed: 0,id,text
0,1986,1986
1,1987,1987
2,1988,1988
3,1989,1989
4,1990,1990
5,1991,1991
6,1992,1992
7,1993,1993
8,1994,1994
9,1995,1995


In [5]:
tabsum_gdp= gdp.tablesummary(language='en')
display(tabsum_gdp)
for var in tabsum_gdp['variable name']:
    print(var+':')
    display(gdp.variable_levels(var, language='en'))


Table NAN1: Demand and supply by transaction, price unit and time
Last update: 2024-03-27T08:00:00


Unnamed: 0,variable name,# values,First value,First value label,Last value,Last value label,Time variable
0,TRANSAKT,31,B1GQK,B.1*g Gross domestic product,EMPM_DC,"Total employment (1,000 persons)",False
1,PRISENHED,6,V_M,"Current prices, (bill. DKK.)",LAN_C,"Pr. capita, 2010-prices, chained values, (1000...",False
2,Tid,58,1966,1966,2023,2023,True


TRANSAKT:


Unnamed: 0,id,text
0,B1GQK,B.1*g Gross domestic product
1,P7K,P.7 Imports of goods and services
2,P71K,P.71 Import of goods
3,P72K,P.72 Import of services
4,TFSPR,Supply
5,P6D,P.6 Exports of goods and services
6,P61D,P.61 Export of goods
7,P62D,P.62 Export of services
8,P31S1MD,P.31 Private consumption
9,P31S14D,P.31 Household consumption expenditure


PRISENHED:


Unnamed: 0,id,text
0,V_M,"Current prices, (bill. DKK.)"
1,LAN_M,"2010-prices, chained values, (bill. DKK.)"
2,L_V,Period-to-period real growth (per cent)
3,V_C,"Pr. capita. Current prices, (1000 DKK.)"
4,L_VB,"Contribution to GDP growth, (percentage point)"
5,LAN_C,"Pr. capita, 2010-prices, chained values, (1000..."


Tid:


Unnamed: 0,id,text
0,1966,1966
1,1967,1967
2,1968,1968
3,1969,1969
4,1970,1970
5,1971,1971
6,1972,1972
7,1973,1973
8,1974,1974
9,1975,1975


In [6]:
tabsum_fpi= fpi.tablesummary(language='en')
display(tabsum_fpi)
for var in tabsum_fpi['variable name']:
    print(var+':')
    display(fpi.variable_levels(var, language='en'))


Table PRIS112: Consumer price index by main figures and time
Last update: 2024-01-10T08:00:00


Unnamed: 0,variable name,# values,First value,First value label,Last value,Last value label,Time variable
0,HOVED,2,1005,"Average, yearly",1010,Annual increase,False
1,Tid,44,1980,1980,2023,2023,True


HOVED:


Unnamed: 0,id,text
0,1005,"Average, yearly"
1,1010,Annual increase


Tid:


Unnamed: 0,id,text
0,1980,1980
1,1981,1981
2,1982,1982
3,1983,1983
4,1984,1984
5,1985,1985
6,1986,1986
7,1987,1987
8,1988,1988
9,1989,1989


Now we have a quick overview of the avaible datapoints

In [7]:
params = fert.define_base_params(language='en')
params

{'table': 'fert1',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'HERKOMST', 'values': ['*']},
  {'code': 'Tid', 'values': ['*']}]}

In [8]:
params ={'table': 'fert1',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'HERKOMST', 'values': ['AK']},
  {'code': 'Tid', 'values': ['>2008<=2023']}]}

In [9]:
fert_api = fert.get_data(params=params)
fert_api.head(15)

Unnamed: 0,HERKOMST,TID,INDHOLD
0,All women,2009,1839.6
1,All women,2010,1871.2
2,All women,2011,1752.4
3,All women,2012,1729.2
4,All women,2013,1668.7
5,All women,2014,1691.2
6,All women,2015,1713.6
7,All women,2016,1785.4
8,All women,2017,1751.9
9,All women,2018,1729.7


In [10]:
params = gdp.define_base_params(language='en')
params

{'table': 'nan1',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'TRANSAKT', 'values': ['*']},
  {'code': 'PRISENHED', 'values': ['*']},
  {'code': 'Tid', 'values': ['*']}]}

In [11]:
params ={'table': 'nan1',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'TRANSAKT', 'values': ['B1GQK']},
  {'code': 'PRISENHED', 'values': ['V_M']},
  {'code': 'Tid', 'values': ['>2008<=2023']}]}

In [12]:
gdp_api = gdp.get_data(params=params)
gdp_api.head(15)

Unnamed: 0,TRANSAKT,PRISENHED,TID,INDHOLD
0,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2009,1722.1
1,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2010,1810.9
2,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2011,1846.9
3,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2012,1895.0
4,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2013,1929.7
5,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2014,1981.2
6,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2015,2036.4
7,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2016,2107.8
8,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2017,2193.0
9,B.1*g Gross domestic product,"Current prices, (bill. DKK.)",2018,2253.3


In [13]:
params = fpi.define_base_params(language='en')
params

{'table': 'pris112',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'HOVED', 'values': ['*']},
  {'code': 'Tid', 'values': ['*']}]}

In [14]:
params = {'table': 'pris112',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'HOVED', 'values': ['1005']},
  {'code': 'Tid', 'values': ['>2008<=2023']}]}

In [15]:
fpi_api = fpi.get_data(params=params)
fpi_api.head(15)

Unnamed: 0,HOVED,TID,INDHOLD
0,"Average, yearly",2009,91.2
1,"Average, yearly",2010,93.3
2,"Average, yearly",2011,95.9
3,"Average, yearly",2012,98.2
4,"Average, yearly",2013,99.0
5,"Average, yearly",2014,99.6
6,"Average, yearly",2015,100.0
7,"Average, yearly",2016,100.3
8,"Average, yearly",2017,101.4
9,"Average, yearly",2018,102.2


We have now found the desired data where we want to merge it into one dataset.

## Merging

We want to merge our three data sets into one to draw a graph.

In [16]:
# Merge fpi_api and fert_api on 'TID'
merged_first = pd.merge(fpi_api, fert_api, on='TID', how='inner')


In [17]:
# Now merge the result with gdp_api on 'TID'
final_merged = pd.merge(merged_first, gdp_api, on='TID', how='inner')


In [18]:
merged_first.rename(columns={
    'INDHOLD_x': 'FertilityRate',
    'INDHOLD_y': 'ConsumerPriceIndex'
}, inplace=True)

# Assuming gdp_api has been loaded and contains a column that should be renamed to 'GrossDomesticProduct'
gdp_api.rename(columns={
    'INDHOLD': 'GrossDomesticProduct'
}, inplace=True)

# Merge the result with gdp_api on 'TID'
final_merged = pd.merge(merged_first, gdp_api, on='TID', how='inner')

# Select only the desired columns
final_selected = final_merged[['TID', 'FertilityRate', 'ConsumerPriceIndex', 'GrossDomesticProduct']]

print(final_selected)



     TID FertilityRate ConsumerPriceIndex GrossDomesticProduct
0   2009          91.2             1839.6               1722.1
1   2010          93.3             1871.2               1810.9
2   2011          95.9             1752.4               1846.9
3   2012          98.2             1729.2               1895.0
4   2013          99.0             1668.7               1929.7
5   2014          99.6             1691.2               1981.2
6   2015         100.0             1713.6               2036.4
7   2016         100.3             1785.4               2107.8
8   2017         101.4             1751.9               2193.0
9   2018         102.2             1729.7               2253.3
10  2019         103.0             1699.4               2311.0
11  2020         103.4             1674.7               2320.9
12  2021         105.4             1724.1               2550.6
13  2022         113.5             1552.9               2831.6
14  2023         117.2             1495.9              

In [19]:
# Convert columns to numeric, handling non-convertible values
final_selected.loc[:, 'FertilityRate'] = pd.to_numeric(final_selected['FertilityRate'], errors='coerce')
final_selected.loc[:, 'ConsumerPriceIndex'] = pd.to_numeric(final_selected['ConsumerPriceIndex'], errors='coerce')
final_selected.loc[:, 'GrossDomesticProduct'] = pd.to_numeric(final_selected['GrossDomesticProduct'], errors='coerce')

# Calculate year-over-year percentage changes
final_selected.loc[:, 'FertilityRate_Pct_Change'] = final_selected['FertilityRate'].pct_change() * 100
final_selected.loc[:, 'ConsumerPriceIndex_Pct_Change'] = final_selected['ConsumerPriceIndex'].pct_change() * 100
final_selected.loc[:, 'GrossDomesticProduct_Pct_Change'] = final_selected['GrossDomesticProduct'].pct_change() * 100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_selected.loc[:, 'FertilityRate_Pct_Change'] = final_selected['FertilityRate'].pct_change() * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_selected.loc[:, 'ConsumerPriceIndex_Pct_Change'] = final_selected['ConsumerPriceIndex'].pct_change() * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versu

## Drawing the Graphs

We print the actual values in a table and more importantly draw the graph

In [20]:
pct_change_df = final_selected[['TID', 'FertilityRate_Pct_Change', 'ConsumerPriceIndex_Pct_Change', 'GrossDomesticProduct_Pct_Change']]
pct_change_df = pct_change_df.iloc[1:].reset_index(drop=True)


print("Year-Over-Year Percentage Changes (Preview):")
print(pct_change_df.head(14).to_string(index=False))

Year-Over-Year Percentage Changes (Preview):
 TID  FertilityRate_Pct_Change  ConsumerPriceIndex_Pct_Change  GrossDomesticProduct_Pct_Change
2010                  2.302632                       1.717765                         5.156495
2011                  2.786710                      -6.348867                         1.987962
2012                  2.398332                      -1.323899                         2.604364
2013                  0.814664                      -3.498728                         1.831135
2014                  0.606061                       1.348355                         2.668809
2015                  0.401606                       1.324503                         2.786190
2016                  0.300000                       4.190009                         3.506187
2017                  1.096710                      -1.876330                         4.042129
2018                  0.788955                      -1.267196                         2.749658
2019 

In [21]:

# Create an interactive line plot with Plotly
fig = px.line(final_selected, x='TID', y=[
    'FertilityRate_Pct_Change', 'ConsumerPriceIndex_Pct_Change', 'GrossDomesticProduct_Pct_Change'],
    labels={
        'value': 'Percentage Change (%)',
        'variable': 'Metrics',
        'TID': 'Year'
    },
    title='Year-over-Year Percentage Change'
)

# Update layout for clearer display
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Percentage Change',
    legend_title='Metric'
)

# Show the plot
fig.show()


