In [754]:
import pandas as pd
import plotly.express as plt
import plotly.graph_objects as go

In [755]:
eustat = pd.read_csv('GVA.csv')

In [756]:
eustat.head(50)

Unnamed: 0,Country,1975,1976,1977,1978,1979,1980,1981,1982,1983,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Belgium,:,:,:,:,:,:,:,:,:,...,18713.5,19008.0,19294.0,19678.4,20149.9,21767.4,22628.0,22019.80,24298.90,26484.10
1,Bulgaria,:,:,:,:,:,:,:,:,:,...,1725.5,1651.3,1726.5,1646.6,1866.1,2034.6,2366.8,2614.30,2338.40,2636.40
2,Czechia,:,:,:,:,:,:,:,:,:,...,8215.7,8048.3,8636.5,8721.3,9547.7,10630.7,11489.5,11088.60,12110.80,14423.30
3,Denmark,2045.30,2390.60,2522.10,2602.50,2518.70,2790.80,2564.20,2627.70,2775.10,...,10145.2,10532.7,11749.9,13380.4,14328.8,14828.7,14716.5,14860.20,16275.10,19183.40
4,Germany,:,:,:,:,:,:,:,:,:,...,112226.0,119583.0,124907.0,132548.0,137994.0,148044.0,154323.0,167329.00,179802.00,211102.00
5,Estonia,:,:,:,:,:,:,:,:,:,...,1163.6,1079.1,1098.5,1229.5,1379.7,1561.8,1669.8,1651.80,1845.80,2198.50
6,Ireland,:,:,:,:,:,:,:,:,:,...,3477.5,4427.4,4908.8,5734.5,7478.1,8055.2,8778.9,8178.60,8717.80,11051.30
7,Greece,:,:,:,:,:,:,:,:,:,...,5153.9,3738.4,3493.4,3484.2,2259.1,2431.2,2573.7,2681.30,2925.10,3945.10
8,Spain,:,:,:,:,:,:,:,:,:,...,53763.0,53254.0,56422.0,59362.0,62061.0,64459.0,70821.0,61880.00,60865.00,63162.00
9,France,20036.70,21680.90,22869.30,25723.60,28587.40,33291.00,34214.40,36563.70,36565.60,...,111927.0,110116.0,107884.0,108362.0,113137.0,117386.0,124114.0,108963.00,125454.00,129235.00


In [757]:
# Melt the dataset by 'Countries'
melted_eustat = pd.melt(eustat, id_vars='Country', var_name='Year', value_name='GVA')

# Sort the dataset by 'Country' and 'Year' (optional)
melted_eustat = melted_df.sort_values(['Countries', 'Year']).reset_index(drop=True)

# Change the name of the 'Countries' column to 'Country'
melted_eustat = melted_eustat.rename(columns={'Countries': 'Country'})

# Print the resulting melted dataset
melted_eustat.head(50)

Unnamed: 0,Country,Year,GVA
0,Albania,1975,:
1,Albania,1976,:
2,Albania,1977,:
3,Albania,1978,:
4,Albania,1979,:
5,Albania,1980,:
6,Albania,1981,:
7,Albania,1982,:
8,Albania,1983,:
9,Albania,1984,:


### Why Gross Value Added for the construction sector?
Gross Value Added (GVA) for the construction sector measures the value created by the industry. It reflects the difference between the value of goods and services produced and the cost of inputs used in the construction process. GVA is a vital indicator of economic performance and productivity within the construction sector. It helps determine the sector's contribution to the overall economy and provides insights into its growth and health. By subtracting the cost of materials and services from the total value of output, GVA represents the value added by the construction sector through production activities such as wages, profits, and other factors.

In [758]:
country_codes = pd.read_csv('country_codes.tsv', sep='\t')

In [759]:
# Merge the two datasets by 'Country'
eustat = pd.merge(melted_eustat, country_codes, left_on='Country', right_on='Country', how='left')
eustat.head(50)


Unnamed: 0,Country,Year,GVA,Alpha-2 code,Alpha-3 code,Numeric
0,Albania,1975,:,AL,ALB,8
1,Albania,1976,:,AL,ALB,8
2,Albania,1977,:,AL,ALB,8
3,Albania,1978,:,AL,ALB,8
4,Albania,1979,:,AL,ALB,8
5,Albania,1980,:,AL,ALB,8
6,Albania,1981,:,AL,ALB,8
7,Albania,1982,:,AL,ALB,8
8,Albania,1983,:,AL,ALB,8
9,Albania,1984,:,AL,ALB,8


In [760]:
# Let's drop the Alpha-2 code and Numeric columns because we don't need them
eustat = eustat.drop(['Alpha-2 code', 'Numeric'], axis=1)
eustat.head(50)

Unnamed: 0,Country,Year,GVA,Alpha-3 code
0,Albania,1975,:,ALB
1,Albania,1976,:,ALB
2,Albania,1977,:,ALB
3,Albania,1978,:,ALB
4,Albania,1979,:,ALB
5,Albania,1980,:,ALB
6,Albania,1981,:,ALB
7,Albania,1982,:,ALB
8,Albania,1983,:,ALB
9,Albania,1984,:,ALB


In [761]:
import numpy as np

# Let's convert the : to NaN
eustat['GVA'] = eustat['GVA'].replace(':', np.nan)  # Use np.nan, not 'NaN'

eustat.head(50)


Unnamed: 0,Country,Year,GVA,Alpha-3 code
0,Albania,1975,,ALB
1,Albania,1976,,ALB
2,Albania,1977,,ALB
3,Albania,1978,,ALB
4,Albania,1979,,ALB
5,Albania,1980,,ALB
6,Albania,1981,,ALB
7,Albania,1982,,ALB
8,Albania,1983,,ALB
9,Albania,1984,,ALB


In [762]:
# Let's convert the Year column to datetime
eustat['Year'] = pd.to_datetime(eustat['Year'])

# Let's make the Year column format to YYYY
eustat['Year'] = eustat['Year'].dt.year

eustat.head(50)



Unnamed: 0,Country,Year,GVA,Alpha-3 code
0,Albania,1975,,ALB
1,Albania,1976,,ALB
2,Albania,1977,,ALB
3,Albania,1978,,ALB
4,Albania,1979,,ALB
5,Albania,1980,,ALB
6,Albania,1981,,ALB
7,Albania,1982,,ALB
8,Albania,1983,,ALB
9,Albania,1984,,ALB


In [763]:
import plotly.express as px
import pandas as pd

# Convert 'GVA' column to numeric type
eustat['GVA'] = pd.to_numeric(eustat['GVA'], errors='coerce')

min_value = eustat['GVA'].min()  # Get the minimum value of the 'GVA' column
max_value = eustat['GVA'].max()  # Get the maximum value of the 'GVA' column

fig = px.choropleth(
    eustat,
    locations='Alpha-3 code',
    hover_name='Country',
    color_continuous_scale='plasma',
    animation_frame='Year',
    color='GVA',
    title='GVA for Construction Sector',
    range_color=(min_value, max_value)  # Set color scale range from min_value to max_value
)

fig.update_layout(
    title_text='Gross Value Added for Construction Sector',
    geo=dict(
        scope='europe',  # Changed scope to 'world'
        projection_type='natural earth'
    )
)

fig.show()
