# Economic Data Preparation
## Real Terms

In [1]:
# Adjust Notebook Display
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [2]:
# Hide Warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Libraries
import pandas as pd
import numpy as np

In [4]:
# Data Folder
dt_path = 'C:\\Users\\spiterisr\\OneDrive - centralbankmalta.org\\Working Papers\\Beat the Heat Hackathon\\Data\\Economic Data\\'

In [5]:
# Load Data
df_0218 = pd.read_csv(dt_path+'dataset_nuts3_2002-2018.csv')
df_1921 = pd.read_csv(dt_path+'dataset_nuts3_2019-2021.csv')
country_df = pd.read_csv(dt_path+'dataset_country_2002-2021.csv')

In [6]:
# Combine 2 Datasets
econ_df = pd.concat([df_0218, df_1921])
econ_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 580049 entries, 0 to 72906
Data columns (total 7 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   year      580049 non-null  int64  
 1   geo       580049 non-null  object 
 2   variable  580049 non-null  object 
 3   sector    521579 non-null  object 
 4   unit      580049 non-null  object 
 5   value     578233 non-null  float64
 6   namefile  580049 non-null  object 
dtypes: float64(1), int64(1), object(5)
memory usage: 35.4+ MB


In [7]:
# Create Country Column

# Create Countries Function
def nuts3_to_country(nuts3_code):
    # Code to Country Dictionary
    custom_nuts3_to_country = {
        'BE': 'Belgium',
        'BG': 'Bulgaria',
        'CZ': 'Czechia',
        'DK': 'Denmark',
        'DE': 'Germany',
        'EE': 'Estonia',
        'IE': 'Ireland',
        'EL': 'Greece',
        'ES': 'Spain',
        'FR': 'France',
        'HR': 'Croatia',
        'IT': 'Italy',
        'CY': 'Cyprus',
        'LV': 'Latvia',
        'LT': 'Lithuania',
        'LU': 'Luxembourg',
        'HU': 'Hungary',
        'MT': 'Malta',
        'NL': 'Netherlands',
        'AT': 'Austria',
        'PL': 'Poland',
        'PT': 'Portugal',
        'RO': 'Romania',
        'SI': 'Slovenia',
        'SK': 'Slovakia',
        'FI': 'Finland',
        'SE': 'Sweden'
    }
    # Check for partial matches
    for key in custom_nuts3_to_country:
        try:
            if nuts3_code.startswith(key):
                return custom_nuts3_to_country[key]
        except AttributeError:
            pass
    # Handle cases where no match is found
    return 'Unknown' 

# Apply Countries Function
econ_df['country'] = econ_df['geo'].apply(nuts3_to_country)
econ_df.head()

Unnamed: 0,year,geo,variable,sector,unit,value,namefile,country
0,2013,AT111,area,,KM2,701.0,area.xlsx,Austria
1,2014,AT111,area,,KM2,701.0,area.xlsx,Austria
2,2015,AT111,area,,KM2,701.0,area.xlsx,Austria
3,2016,AT111,area,,KM2,701.0,area.xlsx,Austria
4,2017,AT111,area,,KM2,701.0,area.xlsx,Austria


In [8]:
# View Variables
print('econ_df variables :', econ_df['variable'].unique())
print('country_df variables :', country_df['variable'].unique())

econ_df variables : ['area' 'gdp' 'gross value added' 'employment' 'population']
country_df variables : ['hicp' 'gdp_country']


In [9]:
# View Timeframes
for i in econ_df['variable'].unique():
    print(i)
    print(sorted(econ_df[econ_df['variable'] == i]['year'].unique()))

area
[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
gdp
[2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
gross value added
[2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
employment
[2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
population
[2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]


### Gross Value Added (GVA)

In [10]:
# Adjust GVA Data

# Subset
gva = econ_df[econ_df['variable'] == 'gross value added'].drop(['variable', 'unit', 'namefile'], axis=1)

# Seperate by NACE
counter = 1
for i in gva['sector'].unique():
    if counter == 1:
        gva_sector = gva[gva['sector'] == i].drop(['sector'], axis=1)
        gva_sector.rename(columns={'value': 'GVA [' + i + ']'}, inplace=True)
    else:
        gs = gva[gva['sector'] == i].drop(['sector', 'country'], axis=1)
        gs.rename(columns={'value': 'GVA [' + i + ']'}, inplace=True)
        gva_sector = pd.merge(gva_sector, gs, on=['year', 'geo'])
    counter += 1

gva_sector.head()

Unnamed: 0,year,geo,GVA [A],country,GVA [B-E],GVA [C],GVA [F],GVA [G-I],GVA [G-J],GVA [J],GVA [K],GVA [K-N],GVA [L],GVA [M_N],GVA [O-Q],GVA [O-U],GVA [R-U],GVA [TOTAL]
0,2003,BE100,1.2,Belgium,4647.4,2758.0,1175.6,9112.5,14310.1,5197.6,8044.6,17158.0,2912.2,6201.2,10113.3,11684.7,1571.4,48977.0
1,2004,BE100,5.3,Belgium,5043.0,3042.8,1327.0,9206.3,14471.9,5265.6,8954.6,18391.2,2853.6,6583.0,10484.7,12174.0,1689.3,51412.4
2,2005,BE100,3.9,Belgium,5185.0,3503.6,1232.6,9975.3,15221.7,5246.4,8631.4,18609.5,3231.7,6746.4,11114.3,12874.6,1760.3,53127.3
3,2006,BE100,4.4,Belgium,5176.5,3131.1,1613.8,10378.3,15731.3,5353.0,8844.2,19634.3,3540.9,7249.2,11384.1,13252.9,1868.8,55413.2
4,2007,BE100,6.1,Belgium,5081.4,3002.4,1579.8,10855.0,16344.1,5489.1,8822.1,20687.1,3721.9,8143.1,12005.6,13944.1,1938.5,57642.6


In [11]:
# Merge Prices and GVA
hicp = country_df[country_df['variable'] == 'hicp'].drop(['variable', 'sector', 'unit', 'namefile'], axis=1)
hicp.rename(columns={'value': 'HICP'}, inplace=True)
gva_sector = pd.merge(gva_sector, hicp, on=['year', 'country'])
gva_sector.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10774 entries, 0 to 10773
Data columns (total 19 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   year         10774 non-null  int64  
 1   geo          10774 non-null  object 
 2   GVA [A]      10774 non-null  float64
 3   country      10774 non-null  object 
 4   GVA [B-E]    10774 non-null  float64
 5   GVA [C]      10774 non-null  float64
 6   GVA [F]      10774 non-null  float64
 7   GVA [G-I]    10774 non-null  float64
 8   GVA [G-J]    10774 non-null  float64
 9   GVA [J]      10774 non-null  float64
 10  GVA [K]      10774 non-null  float64
 11  GVA [K-N]    10774 non-null  float64
 12  GVA [L]      10774 non-null  float64
 13  GVA [M_N]    10774 non-null  float64
 14  GVA [O-Q]    10774 non-null  float64
 15  GVA [O-U]    10774 non-null  float64
 16  GVA [R-U]    10774 non-null  float64
 17  GVA [TOTAL]  10774 non-null  float64
 18  HICP         10774 non-null  float64
dtypes: f

In [12]:
# Calculate Real GVA Growth

# Order Dataset
gva_sector = gva_sector.sort_values(by=['geo', 'year']).reset_index(drop=True)

# Adjust HICP
gva_sector['HICP'] = gva_sector['HICP'] / 100

# Get Real Values
gva_cols = ['GVA [A]', 'GVA [B-E]', 'GVA [C]', 'GVA [F]', 'GVA [G-I]', 'GVA [G-J]', 'GVA [J]', 'GVA [K]', 'GVA [K-N]', 'GVA [L]', 'GVA [M_N]', 'GVA [O-Q]', 'GVA [O-U]', 
            'GVA [R-U]', 'GVA [TOTAL]']
for i in gva_cols:
    gva_sector[i] = gva_sector[i] / gva_sector['HICP']
    for j in range(1, len(gva_sector)):
        gva_sector.loc[j, 'Gr '+i] = (gva_sector.loc[j, i] - gva_sector.loc[j-1, i]) / gva_sector.loc[j-1, i]
        if gva_sector.loc[j, 'geo'] != gva_sector.loc[j-1, 'geo']:
            gva_sector.loc[j, 'Gr '+i] = np.nan
        gva_sector.loc[0, 'Gr '+i] = np.nan

gva_sector.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10774 entries, 0 to 10773
Data columns (total 34 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   year            10774 non-null  int64  
 1   geo             10774 non-null  object 
 2   GVA [A]         10774 non-null  float64
 3   country         10774 non-null  object 
 4   GVA [B-E]       10774 non-null  float64
 5   GVA [C]         10774 non-null  float64
 6   GVA [F]         10774 non-null  float64
 7   GVA [G-I]       10774 non-null  float64
 8   GVA [G-J]       10774 non-null  float64
 9   GVA [J]         10774 non-null  float64
 10  GVA [K]         10774 non-null  float64
 11  GVA [K-N]       10774 non-null  float64
 12  GVA [L]         10774 non-null  float64
 13  GVA [M_N]       10774 non-null  float64
 14  GVA [O-Q]       10774 non-null  float64
 15  GVA [O-U]       10774 non-null  float64
 16  GVA [R-U]       10774 non-null  float64
 17  GVA [TOTAL]     10774 non-null 

In [13]:
# Get Lags of GVA

gva_cols = ['GVA [A]', 'GVA [B-E]', 'GVA [C]', 'GVA [F]', 'GVA [G-I]', 'GVA [G-J]', 'GVA [J]', 'GVA [K]', 'GVA [K-N]', 'GVA [L]', 'GVA [M_N]', 'GVA [O-Q]', 'GVA [O-U]', 
            'GVA [R-U]', 'GVA [TOTAL]', 'Gr GVA [A]', 'Gr GVA [B-E]', 'Gr GVA [C]', 'Gr GVA [F]', 'Gr GVA [G-I]', 'Gr GVA [G-J]', 'Gr GVA [J]', 'Gr GVA [K]', 'Gr GVA [K-N]', 'Gr GVA [L]',
            'Gr GVA [M_N]', 'Gr GVA [O-Q]', 'Gr GVA [O-U]', 'Gr GVA [R-U]', 'Gr GVA [TOTAL]']
for i in gva_cols:
    for j in range(1, len(gva_sector)):
        gva_sector.loc[j, i+' L1'] = gva_sector.loc[j-1, i]
        if gva_sector.loc[j, 'geo'] != gva_sector.loc[j-1, 'geo']:
            gva_sector.loc[j, i+' L1'] = np.nan
        gva_sector.loc[0, i+' L1'] = np.nan

gva_sector.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10774 entries, 0 to 10773
Data columns (total 64 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   year               10774 non-null  int64  
 1   geo                10774 non-null  object 
 2   GVA [A]            10774 non-null  float64
 3   country            10774 non-null  object 
 4   GVA [B-E]          10774 non-null  float64
 5   GVA [C]            10774 non-null  float64
 6   GVA [F]            10774 non-null  float64
 7   GVA [G-I]          10774 non-null  float64
 8   GVA [G-J]          10774 non-null  float64
 9   GVA [J]            10774 non-null  float64
 10  GVA [K]            10774 non-null  float64
 11  GVA [K-N]          10774 non-null  float64
 12  GVA [L]            10774 non-null  float64
 13  GVA [M_N]          10774 non-null  float64
 14  GVA [O-Q]          10774 non-null  float64
 15  GVA [O-U]          10774 non-null  float64
 16  GVA [R-U]          107

### Employment Rate (EMP)

In [14]:
# Get Individual Datasets
emp = econ_df[econ_df['variable'] == 'employment'].drop(['variable', 'unit', 'namefile', 'country'], axis=1)
pop = econ_df[econ_df['variable'] == 'population'].drop(['variable', 'sector', 'unit', 'namefile', 'country'], axis=1)

In [15]:
# Seperate Employment by NACE

counter = 1
for i in emp['sector'].unique():
    if counter == 1:
        emp_sector = emp[emp['sector'] == i].drop(['sector'], axis=1)
        emp_sector.rename(columns={'value': 'EMP [' + i + ']'}, inplace=True)
    else:
        es = emp[emp['sector'] == i].drop(['sector'], axis=1)
        es.rename(columns={'value': 'EMP [' + i + ']'}, inplace=True)
        emp_sector = pd.merge(emp_sector, es, on=['year', 'geo'])
    counter += 1
    
emp_sector.head()

Unnamed: 0,year,geo,EMP [A],EMP [B-E],EMP [C],EMP [F],EMP [G-I],EMP [G-J],EMP [J],EMP [K],EMP [K-N],EMP [L],EMP [M_N],EMP [O-Q],EMP [O-U],EMP [R-U],EMP [TOTAL]
0,2003,BE100,0.1,43.0,36.9,18.5,142.4,178.7,36.3,67.5,171.3,5.8,97.9,209.2,247.3,38.1,658.8
1,2004,BE100,0.1,41.7,35.8,17.7,139.0,173.1,34.1,67.4,170.5,6.0,97.1,210.9,250.0,39.1,653.1
2,2005,BE100,0.1,40.9,35.0,17.8,139.2,173.8,34.6,65.6,172.8,5.9,101.4,215.4,254.2,38.7,659.5
3,2006,BE100,0.1,39.7,33.5,19.0,139.1,172.3,33.3,62.6,171.3,5.8,102.9,215.1,253.0,37.9,655.4
4,2007,BE100,0.1,36.9,30.5,20.0,141.0,174.2,33.2,62.6,178.1,5.9,109.6,220.0,256.9,36.9,666.2


In [16]:
# Merge Population and Employment
pop.rename(columns={'value': 'POP'}, inplace=True)
emp_sector = pd.merge(emp_sector, pop, on=['year', 'geo'])
emp_sector.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12249 entries, 0 to 12248
Data columns (total 18 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   year         12249 non-null  int64  
 1   geo          12249 non-null  object 
 2   EMP [A]      12249 non-null  float64
 3   EMP [B-E]    12249 non-null  float64
 4   EMP [C]      12249 non-null  float64
 5   EMP [F]      12249 non-null  float64
 6   EMP [G-I]    12249 non-null  float64
 7   EMP [G-J]    12249 non-null  float64
 8   EMP [J]      12249 non-null  float64
 9   EMP [K]      12249 non-null  float64
 10  EMP [K-N]    12249 non-null  float64
 11  EMP [L]      12249 non-null  float64
 12  EMP [M_N]    12249 non-null  float64
 13  EMP [O-Q]    12249 non-null  float64
 14  EMP [O-U]    12249 non-null  float64
 15  EMP [R-U]    12249 non-null  float64
 16  EMP [TOTAL]  12249 non-null  float64
 17  POP          12249 non-null  float64
dtypes: float64(16), int64(1), object(1)
memory usa

In [17]:
# Calculate Employment Rates

# Order Dataset
emp_sector = emp_sector.sort_values(by=['geo', 'year']).reset_index(drop=True)

# Get Employment Rates
emp_cols = ['EMP [A]', 'EMP [B-E]', 'EMP [C]', 'EMP [F]', 'EMP [G-I]', 'EMP [G-J]', 'EMP [J]', 'EMP [K]', 'EMP [K-N]', 'EMP [L]', 'EMP [M_N]', 'EMP [O-Q]', 'EMP [O-U]', 
            'EMP [R-U]', 'EMP [TOTAL]']
for i in emp_cols:
    emp_sector[i] = emp_sector[i] / emp_sector['POP']
    for j in range(1, len(emp_sector)):
        emp_sector.loc[j, 'Gr '+i] = (emp_sector.loc[j, i] - emp_sector.loc[j-1, i]) / emp_sector.loc[j-1, i]
        if emp_sector.loc[j, 'geo'] != emp_sector.loc[j-1, 'geo']:
            emp_sector.loc[j, 'Gr '+i] = np.nan
        emp_sector.loc[0, 'Gr '+i] = np.nan

emp_sector.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12249 entries, 0 to 12248
Data columns (total 33 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   year            12249 non-null  int64  
 1   geo             12249 non-null  object 
 2   EMP [A]         12151 non-null  float64
 3   EMP [B-E]       12209 non-null  float64
 4   EMP [C]         12151 non-null  float64
 5   EMP [F]         12171 non-null  float64
 6   EMP [G-I]       12151 non-null  float64
 7   EMP [G-J]       12151 non-null  float64
 8   EMP [J]         12151 non-null  float64
 9   EMP [K]         12151 non-null  float64
 10  EMP [K-N]       12151 non-null  float64
 11  EMP [L]         12151 non-null  float64
 12  EMP [M_N]       12151 non-null  float64
 13  EMP [O-Q]       12189 non-null  float64
 14  EMP [O-U]       12189 non-null  float64
 15  EMP [R-U]       12151 non-null  float64
 16  EMP [TOTAL]     12209 non-null  float64
 17  POP             12249 non-null 

### Gross Domestic Product (GDP)

In [18]:
# Prepare GDP Dataset
gdp = econ_df[econ_df['variable'] == 'gdp'].drop(['variable', 'sector', 'unit', 'namefile'], axis=1)
gdp.rename(columns={'value': 'GDP'}, inplace=True)

In [19]:
# Get Real GDP
gdp = pd.merge(gdp, hicp, on=['year', 'country'])
gdp['HICP'] = gdp['HICP'] / 100
gdp['GDP'] = gdp['GDP'] / gdp['HICP']
gdp.head()

Unnamed: 0,year,geo,GDP,country,HICP
0,2002,AT111,804.049206,Austria,0.7804
1,2002,AT112,3651.217324,Austria,0.7804
2,2002,AT113,2205.381855,Austria,0.7804
3,2002,AT121,6264.364428,Austria,0.7804
4,2002,AT122,6377.793439,Austria,0.7804


In [20]:
# Calculate GDP Growth
gdp = gdp.sort_values(by=['geo', 'year']).reset_index(drop=True)
for j in range(1, len(gdp)):
    gdp.loc[j, 'Gr GDP'] = (gdp.loc[j, 'GDP'] - gdp.loc[j-1, 'GDP']) / gdp.loc[j-1, 'GDP']
    if gdp.loc[j, 'geo'] != gdp.loc[j-1, 'geo']:
        gdp.loc[j, 'Gr GDP'] = np.nan
    gdp.loc[0, 'Gr GDP'] = np.nan
gdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22560 entries, 0 to 22559
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   year     22560 non-null  int64  
 1   geo      22560 non-null  object 
 2   GDP      22560 non-null  float64
 3   country  22560 non-null  object 
 4   HICP     22560 non-null  float64
 5   Gr GDP   21361 non-null  float64
dtypes: float64(3), int64(1), object(2)
memory usage: 1.0+ MB


In [21]:
# Get Real GDP and GDP Growth by Country

# Prepare Dataframe
gdp_country = country_df[country_df['variable'] == 'gdp_country'].drop(['variable', 'sector', 'unit', 'namefile'], axis=1)
gdp_country.rename(columns={'value': 'GDP [CNT]'}, inplace=True)

# Get Real GDP
gdp_country = pd.merge(gdp_country, hicp, on=['year', 'country'])
gdp_country['HICP'] = gdp_country['HICP'] / 100
gdp_country = gdp_country.sort_values(by=['country', 'year']).reset_index(drop=True)
gdp_country['GDP [CNT]'] = gdp_country['GDP [CNT]'] / gdp_country['HICP']

# Get Real GDP Growth
for j in range(1, len(gdp_country)):
    gdp_country.loc[j, 'Gr GDP [CNT]'] = (gdp_country.loc[j, 'GDP [CNT]'] - gdp_country.loc[j-1, 'GDP [CNT]']) / gdp_country.loc[j-1, 'GDP [CNT]']
    if gdp_country.loc[j, 'country'] != gdp_country.loc[j-1, 'country']:
        gdp_country.loc[j, 'Gr GDP [CNT]'] = np.nan
    gdp_country.loc[0, 'Gr GDP [CNT]'] = np.nan
    
gdp_country.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          540 non-null    int64  
 1   country       540 non-null    object 
 2   GDP [CNT]     540 non-null    float64
 3   HICP          540 non-null    float64
 4   Gr GDP [CNT]  513 non-null    float64
dtypes: float64(3), int64(1), object(1)
memory usage: 21.2+ KB


### Combine Economic Variables in 1 Dataset

In [22]:
# Set GVA Dataset as Base
country_move = gva_sector.pop('country')
gva_sector.insert(1, 'country', country_move)
gva_sector = gva_sector.drop(['HICP'], axis=1)
gva_sector.head()

Unnamed: 0,year,country,geo,GVA [A],GVA [B-E],GVA [C],GVA [F],GVA [G-I],GVA [G-J],GVA [J],...,Gr GVA [G-J] L1,Gr GVA [J] L1,Gr GVA [K] L1,Gr GVA [K-N] L1,Gr GVA [L] L1,Gr GVA [M_N] L1,Gr GVA [O-Q] L1,Gr GVA [O-U] L1,Gr GVA [R-U] L1,Gr GVA [TOTAL] L1
0,2003,Belgium,BE100,1.515917,5870.894391,3484.08287,1485.093482,11511.495705,18077.4381,6565.942395,...,,,,,,,,,,
1,2004,Belgium,BE100,6.573236,6254.495845,3773.781471,1645.789408,11417.958576,17948.530324,6530.571747,...,,,,,,,,,,
2,2005,Belgium,BE100,4.717552,6271.924519,4238.054917,1490.988267,12066.408613,18412.60433,6346.195718,...,-0.007131,-0.005387,0.092826,0.052331,-0.037987,0.042215,0.017823,0.02288,0.055429,0.030587
3,2006,Belgium,BE100,5.200946,6118.794326,3701.06383,1907.565012,12267.49409,18594.917258,6327.423168,...,0.025856,-0.028233,-0.059879,-0.0131,0.104553,-0.000467,0.033891,0.031452,0.016316,0.007856
4,2007,Belgium,BE100,7.082317,5899.68652,3485.893417,1834.20411,12603.041913,18976.082666,6373.040752,...,0.009902,-0.002958,0.001278,0.030999,0.070681,0.050015,0.000908,0.0059,0.037418,0.019232


In [23]:
# Combine Seperate Datasets
df = pd.merge(gva_sector, emp_sector.drop(['POP'], axis=1), on=['year', 'geo'])
df = pd.merge(df, gdp.drop(['country', 'HICP'], axis=1), on=['year', 'geo'])
df = pd.merge(df, gdp_country.drop(['HICP'], axis=1), on=['year', 'country'])
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10642 entries, 0 to 10641
Data columns (total 97 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   year               10642 non-null  int64  
 1   country            10642 non-null  object 
 2   geo                10642 non-null  object 
 3   GVA [A]            10642 non-null  float64
 4   GVA [B-E]          10642 non-null  float64
 5   GVA [C]            10642 non-null  float64
 6   GVA [F]            10642 non-null  float64
 7   GVA [G-I]          10642 non-null  float64
 8   GVA [G-J]          10642 non-null  float64
 9   GVA [J]            10642 non-null  float64
 10  GVA [K]            10642 non-null  float64
 11  GVA [K-N]          10642 non-null  float64
 12  GVA [L]            10642 non-null  float64
 13  GVA [M_N]          10642 non-null  float64
 14  GVA [O-Q]          10642 non-null  float64
 15  GVA [O-U]          10642 non-null  float64
 16  GVA [R-U]          106

In [24]:
# Save Dataset
df.to_csv('economic_variables.csv', index=False)