In [32]:
# Import the necessary libraries
import pandas as pd

In [33]:
# Read the forest dataset
forest = pd.read_csv('Forest Area.csv')

In [34]:
# Check forest's columns
forest.columns

Index(['CountryID', 'Country and Area', 'Forest Area, 1990 (1000 ha)',
       'Forest Area, 2000 (1000 ha)', 'Forest Area, 2010 (1000 ha)',
       'Forest Area, 2015 (1000 ha)', 'Forest Area, 2020 (1000 ha)',
       'Total Land Area, 2020 (1000 ha)',
       'Forest Area as a  Proportion of (%)\nTotal Land Area, 2020',
       'Deforestation, \n2015-2020 (1000 ha/year)',
       'Total Forest Area \nAffected by Fire, 2015 (1000 ha)',
       'European Region', 'Unnamed: 12'],
      dtype='object')

In [35]:
# Filter for only EU countries
forest = forest[forest['European Region'].notnull()]

# Remove unwanted columns
forest = forest[['Country and Area', 'Forest Area, 1990 (1000 ha)', 'Forest Area, 2000 (1000 ha)', 'Forest Area, 2010 (1000 ha)', 'Forest Area, 2015 (1000 ha)', 'Forest Area, 2020 (1000 ha)', 'European Region']]
forest.head()

Unnamed: 0,Country and Area,"Forest Area, 1990 (1000 ha)","Forest Area, 2000 (1000 ha)","Forest Area, 2010 (1000 ha)","Forest Area, 2015 (1000 ha)","Forest Area, 2020 (1000 ha)",European Region
13,Austria,3775.67,3838.14,3863.2,3881.19,3899.15,Western Europe
20,Belgium,677.4,667.3,689.87,689.3,689.3,Western Europe
32,Bulgaria,3327.0,3375.0,3737.0,3833.0,3893.0,Eastern Europe
50,Croatia,1850.0,1885.0,1920.0,1922.0,1939.11,Southern Europe
53,Cyprus,161.11,171.61,172.84,172.71,172.53,Southern Europe


In [36]:
# Perform the melting operation
# This will reshape the DataFrame from wide to long format, consolidating the various 'Forest Area' columns
# into two columns: one for 'Year' and one for 'Forest Area (1000 ha)'. The columns specified in id_vars 
# ('Country and Area', 'European Region') will remain unchanged
forest_melted = forest.melt(id_vars=['Country and Area', 'European Region'], 
                            var_name='Year', 
                            value_name='Forest Area (1000 ha)')

# After the melt operation, the 'Year' column contains strings from the original column headers
# (e.g., 'Forest Area, 1990 (1000 ha)'). The following line extracts the year part from these strings
# The regular expression '(\d{4})' matches any sequence of four digits (the year in this case)
forest_melted['Year'] = forest_melted['Year'].str.extract('(\d{4})')

# Display the first 10 rows of the newly reshaped DataFrame
forest_melted.head(10)

Unnamed: 0,Country and Area,European Region,Year,Forest Area (1000 ha)
0,Austria,Western Europe,1990,3775.67
1,Belgium,Western Europe,1990,677.4
2,Bulgaria,Eastern Europe,1990,3327.0
3,Croatia,Southern Europe,1990,1850.0
4,Cyprus,Southern Europe,1990,161.11
5,Czechia,Eastern Europe,1990,2629.42
6,Denmark,Northern Europe,1990,531.44
7,Estonia,Northern Europe,1990,2205.9
8,Finland,Northern Europe,1990,21875.33
9,France,Western Europe,1990,14436.0


In [37]:
# Check forest datatypes
forest_melted.dtypes

Country and Area         object
European Region          object
Year                     object
Forest Area (1000 ha)    object
dtype: object

In [38]:
# Export the dataframe to an Excel file
forest_melted.to_excel('forest_by_year.xlsx', index=False)