# Merge the CSV files

The data is provided in two separate CSV files: `yield_tables.csv` and `yield_tables_meta.csv`. The first file contains the actual yield tables, while the second file contains metadata about each yield table, including the tree type. We will merge these two files based on the `id` column, which is common to both files.

In [14]:
import pandas as pd

In [15]:
yield_tables = pd.read_csv("../data/raw/yield_tables.csv", sep=";")
yield_tables_meta = pd.read_csv("../data/raw/yield_tables_meta.csv", sep=";")

# Merge the CSV files
merged_yield_tables = pd.merge(
    yield_tables, yield_tables_meta[["id", "title", "tree_type"]], on="id", how="left"
)

# Save the merged file
merged_yield_tables.to_csv("../data/interim/merged_yield_tables.csv", index=False)

merged_yield_tables.head()

Unnamed: 0,id,yield_class,age,dominant_height,average_height,dbh,taper,trees_per_ha,basal_area,volume_per_ha,average_annual_age_increment,total_growth_performance,current_annual_increment,mean_annual_increment,title,tree_type
0,1,15.0,20,5.9,5.3,11.5,0.396,2585.0,26.8,54.0,2.7,63.0,,3.2,Fichte Hochgebirge,coniferous
1,1,15.0,30,12.2,10.6,16.7,0.458,1708.0,37.5,180.0,6.0,208.0,14.5,6.9,Fichte Hochgebirge,coniferous
2,1,15.0,40,18.3,15.7,21.6,0.46,1266.0,46.3,334.0,8.4,397.0,18.9,9.9,Fichte Hochgebirge,coniferous
3,1,15.0,50,23.4,20.5,26.1,0.456,1003.0,53.5,499.0,10.0,605.0,20.8,12.1,Fichte Hochgebirge,coniferous
4,1,15.0,60,27.6,24.6,30.2,0.451,830.0,59.4,659.0,11.0,815.0,21.0,13.6,Fichte Hochgebirge,coniferous


In [16]:
merged_yield_tables.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14398 entries, 0 to 14397
Data columns (total 16 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            14398 non-null  int64  
 1   yield_class                   14398 non-null  float64
 2   age                           14398 non-null  int64  
 3   dominant_height               8825 non-null   float64
 4   average_height                9340 non-null   float64
 5   dbh                           9113 non-null   float64
 6   taper                         4425 non-null   float64
 7   trees_per_ha                  9255 non-null   float64
 8   basal_area                    9511 non-null   float64
 9   volume_per_ha                 14282 non-null  float64
 10  average_annual_age_increment  4882 non-null   float64
 11  total_growth_performance      5155 non-null   float64
 12  current_annual_increment      8691 non-null   float64
 13  m

In [17]:
merged_yield_tables.describe()

Unnamed: 0,id,yield_class,age,dominant_height,average_height,dbh,taper,trees_per_ha,basal_area,volume_per_ha,average_annual_age_increment,total_growth_performance,current_annual_increment
count,14398.0,14398.0,14398.0,8825.0,9340.0,9113.0,4425.0,9255.0,9511.0,14282.0,4882.0,5155.0,8691.0
mean,125.767954,7.89959,79.438741,22.114483,20.367438,26.350253,0.455306,1166.835475,33.763197,361.887531,6.69852,682.758487,8.265104
std,65.677997,5.677468,48.892472,22.425561,8.747435,14.274192,0.073344,2194.385171,13.796179,239.798271,7.437787,457.677307,5.534094
min,1.0,0.0,4.0,0.0,0.0,0.4,0.066,0.0,0.0,0.0,0.1,0.0,-0.06
25%,89.0,4.0,40.0,15.0,13.8,16.1,0.435,403.0,23.5,176.0,3.1,306.0,4.4
50%,148.0,6.0,70.0,21.9,20.0,24.9,0.452986,668.0,32.38,316.0,4.85,600.0,7.0
75%,180.0,11.0,110.0,28.8,26.6,35.06,0.482,1249.0,43.6,505.0,7.5,993.5,10.8
max,213.0,36.0,300.0,1919.0,74.0,514.0,4.33,77650.0,80.3,1421.0,70.0,2462.0,44.9
