# Prerequisites

- Python 3.10.4

> Warning: Installation from conda environment may take few minutes

Configuring conda environment
```cmd
conda create -n ca2_env
conda activate ca2_env
conda install -c anaconda openpyxl
```

Installing jupyter notebook on ca2_env environment
```cmd
conda install jupyter notebook
python -m ipykernel install --name ca2_env
```

Run jupiter 
```cmd
jupyter notebook
```

How crop and cereals contributes to the agricultural value added indices in Ireland and Europe.

In [52]:
import pandas as pd
import numpy as np

crop_datasets = {
    "fao-crop-residues"                     : "../data/fao/EU-Crop-Residues.csv",
    "fao-crop-production-idx"           : "../data/fao/EU-Crops-Production-Indices.csv",
    "fao-employment-indicators-rural"       : "../data/fao/EU-Employment-Indicators-Rural-All.csv",
    "fao-employment-indicators-agri-hours"  : "../data/fao/EU-Employment-Indicators-Agricultural-Working-Hours.csv",
    "fao-cereals-producer-prices"           : "../data/fao/EU-Producer-Prices-Cereals.csv",
    "fao-land-use"                          : "../data/fao/EU-Land-Use.csv",
    "fao-total-energy-use"                  : "../data/fao/EU-Total-Energy-Use.csv",
    "fao-cereals-export-import-idx"             : "../data/fao/EU-Trade-Indices-Cereals-Export-Import.csv",
    "fao-agriculture-value-added"           : "../data/fao/EU-Value-Added-Agriculture.csv",
    "fadn-subsides"                         : "../data/fadn/fadn-subsides-year-ms-region-crops.xlsx",
    "fadn-summary"                          : "../data/fadn/fadn-custom-summary.xlsx",
    "fadn-rented-land"                      : "../data/fadn/sumary-year-ms-region-so.xlsx",
}


In [63]:
def init_df():
    eu = ["BE","BG","CY","CZ","DK","DE","EL","ES","EE","FR","HR","HU","IE","IT","LT","LV","LU","MT","NL","AT","PL","PT","RO","FI","SE","SK","SI"]
    years = np.arange(start=2000,stop=2022,step=1)
    # https://www.adamsmith.haus/python/answers/how-to-get-all-element-combinations-of-two-numpy-arrays-in-python
    data = np.array(np.meshgrid(eu, years)).T.reshape(-1, 2)
    df = pd.DataFrame(data, columns=["country","year"])
    df["year"] =df.year.astype(int)
    return df

def get_fao_dataset(dataset_name, query, value_col_name, aggfunc="mean", usecols=["Area Code (ISO2)","Year","Element Code","Item","Value"]):
    df = pd.read_csv(crop_datasets[dataset_name], usecols=usecols);    
    agg_df = df.query(query).groupby(["Area Code (ISO2)","Year"])["Value"].agg(aggfunc).reset_index()
    agg_df.columns = ["country","year",value_col_name]
    return agg_df

def get_fand_dataset(dataset_name, columns):
    df = pd.read_excel(crop_datasets[dataset_name]);
    df = df.query("`8 Types of Farming` == '(1) Fieldcrops'")
    df = df.drop(columns=["Member State","8 Types of Farming","Region", "(SYS03) Sample farms"])
    df = df.rename(columns={"ISO2":"country", "Year":"year"})
    df = df.replace('-',np.NaN)
    df.columns = columns
    return df

agriculture_df = init_df()

## FAO Datasets

In [64]:
# crop_residues_kg
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-crop-residues", "`Element Code` == 72392", "crop_residues_kg"), how="left")

# crop_production_idx
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-crop-production-idx", "`Element Code` == 432", "crop_production_idx"), how="left")

# cereals_produce_price_usd_tonne
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-cereals-producer-prices", "`Element Code` == 5532", "cereals_produce_price_usd_tonne"), how="left")

# employment_ratio_rural_areas_pct
agriculture_df = agriculture_df.merge(get_fao_dataset(dataset_name="fao-employment-indicators-rural",
                                                        value_col_name="employment_ratio_rural_areas_pct",
                                                        usecols=["Area Code (ISO2)","Year","Indicator Code","Sex", "Value"],
                                                        query="`Indicator Code` == 21069 & Sex == 'Total'"), how="left")
# female_employment_ratio_rural_areas_pct
agriculture_df = agriculture_df.merge(get_fao_dataset(dataset_name="fao-employment-indicators-rural",
                                                        value_col_name="female_employment_ratio_rural_areas_pct",
                                                        usecols=["Area Code (ISO2)","Year","Indicator Code","Sex", "Value"],
                                                        query="`Indicator Code` == 21069 & Sex == 'Female'"), how="left")
# male_employment_ratio_rural_areas_pct
agriculture_df = agriculture_df.merge(get_fao_dataset(dataset_name="fao-employment-indicators-rural",
                                                        value_col_name="male_employment_ratio_rural_areas_pct",
                                                        usecols=["Area Code (ISO2)","Year","Indicator Code","Sex", "Value"],
                                                        query="`Indicator Code` == 21069 & Sex == 'Male'"), how="left")
# mean_weekly_working_hours
agriculture_df = agriculture_df.merge(get_fao_dataset(dataset_name="fao-employment-indicators-agri-hours",
                                                        value_col_name="mean_weekly_working_hours",
                                                        usecols=["Area Code (ISO2)","Year","Indicator Code","Sex", "Value"],
                                                        query="`Indicator Code` == 21150 & Sex == 'Total'"), how="left")

# female_mean_weekly_working_hours
agriculture_df = agriculture_df.merge(get_fao_dataset(dataset_name="fao-employment-indicators-agri-hours",
                                                        value_col_name="female_mean_weekly_working_hours",
                                                        usecols=["Area Code (ISO2)","Year","Indicator Code","Sex", "Value"],
                                                        query="`Indicator Code` == 21150 & Sex == 'Female'"), how="left")

# male_mean_weekly_working_hours
agriculture_df = agriculture_df.merge(get_fao_dataset(dataset_name="fao-employment-indicators-agri-hours",
                                                        value_col_name="male_mean_weekly_working_hours",
                                                        usecols=["Area Code (ISO2)","Year","Indicator Code","Sex", "Value"],
                                                        query="`Indicator Code` == 21150 & Sex == 'Male'"), how="left")

# crop_land_use_1000ha
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-land-use", "`Element Code` == 5110", "crop_land_use_1000ha"), how="left")

# agri_energy_use_tj
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-total-energy-use", "`Element Code` == 72184", "agri_energy_use_tj"), how="left")

# avg_import_idx
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-cereals-export-import-idx", "`Element Code` == 465", "avg_import_idx"), how="left")

# avg_export_idx
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-cereals-export-import-idx", "`Element Code` == 495", "avg_export_idx"), how="left")

# value_added_agriculture
agriculture_df = agriculture_df.merge(get_fao_dataset("fao-agriculture-value-added", "`Element Code` == 6110", "value_added_agriculture"), how="left")


### FADN Dataset

In [69]:
agriculture_df = agriculture_df.merge(get_fand_dataset("fadn-subsides", ["year","country","subsidies_on_crops_€","compensatory_payments_area_€","set_aside_premiums_€","other_crops_subsidies_€", "farms_represented"] ), how="left")

In [73]:
# fadn data available only from 2003
agriculture_df = agriculture_df.query("year > 2003")

# set index
agriculture_df = agriculture_df.set_index(["country","year"])

# drop all rows wit no values
agriculture_df = agriculture_df.drop(agriculture_df[agriculture_df.isna().all(axis=1) == True].index)

In [104]:
agriculture_df.isna().sum()

crop_residues_kg                            90
crop_production_idx                         92
cereals_produce_price_usd_tonne             83
employment_ratio_rural_areas_pct           220
female_employment_ratio_rural_areas_pct    222
male_employment_ratio_rural_areas_pct      221
mean_weekly_working_hours                   64
female_mean_weekly_working_hours            83
male_mean_weekly_working_hours              64
crop_land_use_1000ha                       214
agri_energy_use_tj                          90
avg_import_idx                              64
avg_export_idx                              64
value_added_agriculture                    326
subsidies_on_crops_€                       174
compensatory_payments_area_€               174
set_aside_premiums_€                       174
other_crops_subsidies_€                    174
farms_represented                           41
dtype: int64