# Importing Libraries

Import the necessary libraries for the project such as pandas, numpy, matplotlib, pytorch and sklearn.

In [65]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch as pyt
import sklearn as sk
from IPython import display

# Loading the Data

Load the data from the csv file and check for missing values.

In [66]:
# Reading Consumer prices indicators data
consumer_prices_indicators = pd.read_csv('Data/Consumer prices indicators - FAOSTAT_data_en_2-22-2024.csv')
print("Consumer Prices:\n",consumer_prices_indicators.isnull().sum())

# Reading Crops production indicators data
crops_production_indicators = pd.read_csv('Data/Crops production indicators - FAOSTAT_data_en_2-22-2024.csv')
print("Crop Production:\n", crops_production_indicators.isnull().sum())

# Reading Emissions data
emissions = pd.read_csv('Data/Emissions - FAOSTAT_data_en_2-27-2024.csv')
print("Emissions:\n", emissions.isnull().sum())

# Reading Employment data
employment = pd.read_csv('Data/Employment - FAOSTAT_data_en_2-27-2024.csv')
print("Employment:\n", employment.isnull().sum())

# Reading Exchange rate data
exchange_rate = pd.read_csv('Data/Exchange rate - FAOSTAT_data_en_2-22-2024.csv')
print("Exchange Rate:\n", exchange_rate.isnull().sum())

# Reading Fertilizers use data
fertilizers_use = pd.read_csv('Data/Fertilizers use - FAOSTAT_data_en_2-27-2024.csv')
print("Fertilizers use:\n", fertilizers_use.isnull().sum())

# Reading Food balances indicators data
food_balances_indicators = pd.read_csv('Data/Food balances indicators - FAOSTAT_data_en_2-22-2024.csv')
print("Food Bal:\n", food_balances_indicators.isnull().sum())

# Reading Food security indicators data
food_security_indicators = pd.read_csv('Data/Food security indicators  - FAOSTAT_data_en_2-22-2024.csv')
print("Food Sec:\n", food_security_indicators.isnull().sum())

# Reading Food trade indicators data
food_trade_indicators = pd.read_csv('Data/Food trade indicators - FAOSTAT_data_en_2-22-2024.csv')
print("Food Trade:\n", food_trade_indicators.isnull().sum())

# Reading Foreign direct investment data
foreign_direct_investment = pd.read_csv('Data/Foreign direct investment - FAOSTAT_data_en_2-27-2024.csv')
print("Foreign Invest:\n", foreign_direct_investment.isnull().sum())

# Reading Land temperature change data
land_temperature_change = pd.read_csv('Data/Land temperature change - FAOSTAT_data_en_2-27-2024.csv')
print("Land T Change:\n", land_temperature_change.isnull().sum())

# Reading Land use data
land_use = pd.read_csv('Data/Land use - FAOSTAT_data_en_2-22-2024.csv', low_memory=False)
print("Land Use:\n", land_use.isnull().sum())

# Reading Pesticides use data
pesticides_use = pd.read_csv('Data/Pesticides use - FAOSTAT_data_en_2-27-2024.csv')
print("Pesticides:\n", pesticides_use.isnull().sum())

Consumer Prices:
 Domain Code             0
Domain                  0
Area Code (M49)         0
Area                    0
Year Code               0
Year                    0
Item Code               0
Item                    0
Months Code             0
Months                  0
Element Code            0
Element                 0
Unit                57663
Value                   0
Flag                    0
Flag Description        0
Note                55227
dtype: int64
Crop Production:
 Domain Code             0
Domain                  0
Area Code (M49)         0
Area                    0
Element Code            0
Element                 0
Item Code (CPC)         0
Item                    0
Year Code               0
Year                    0
Unit                    0
Value                   0
Flag                    0
Flag Description        0
Note                41649
dtype: int64
Emissions:
 Domain Code             0
Domain                  0
Area Code (M49)         0
Area            

# Preprocessing the Data

Preprocess the data by removing the columns that are not needed, separating datasets into manageable categories, filling or removing the missing values and normalizing the data.

In [67]:
# Drop 'Note' and columns from Consumer prices indicators data
consumer_prices_indicators.drop(columns=['Note'], inplace=True)
# Separate inflation data from indices data
inflation_data = consumer_prices_indicators[consumer_prices_indicators['Unit'] == '%']
indices_data = consumer_prices_indicators[consumer_prices_indicators['Unit'].isnull()]
# Removes Unit column from indices data
indices_data.drop(columns=['Unit'], inplace=True)

# Drop 'Note' column from Emissions data
emissions.drop(columns=['Note'], inplace=True)

# Drop 'Note' column from Employment data
employment.drop(columns=['Note'], inplace=True)
# Separates ILO estimates from mean working hours data
ilo_estimates = employment[employment['Indicator Code'] == '21144']
mean_working_hours = employment[employment['Indicator Code'] == '21150']

# Drop 'Unit' column from Exchange rate data
exchange_rate.drop(columns=['Unit'], inplace=True)

food_security_indicators.drop(columns=['Note'], inplace=True)
# Drop 'Note' column from Food trade indicators data
food_trade_indicators.drop(columns=['Note'], inplace=True)
# Drop 'Note' column from Land use data
land_use.drop(columns=['Note'], inplace=True)

# Drop 'Note' column from Pesticides use data
pesticides_use.drop(columns=['Note'], inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  indices_data.drop(columns=['Unit'], inplace=True)


In [68]:
# Impute missing values in land_temperature_change using mean of the country
land_temperature_change_cleaned = land_temperature_change
land_temperature_change_cleaned['Value'] = land_temperature_change_cleaned.groupby('Area')['Value'].transform(lambda x: x.fillna(x.mean()))
# Handle remaining missing values by filling with overall mean as no country mean is available
overall_mean = land_temperature_change_cleaned['Value'].mean()
land_temperature_change = land_temperature_change_cleaned['Value'].fillna(overall_mean)

In [ ]:
# Create a dataframe for the exported crop prices based on the 
