## Create a livestock emission table

In [None]:
import pandas as pd
import numpy as np 
import requests 
import zipfile
import psycopg2
import sqlalchemy

import _functions_sql as fs
import _functions_data_files as fdf

# Specify source directory and file (required for the fdf function to work as expected)
source_dir = 'faostat_emi_live'
source_file = 'Emissions_livestock_E_All_Data_(Normalized).csv'

### Import & examine emissions crops data from csv

In [None]:
# Import raw data into a pandas dataframe
df_emis_livestock = pd.read_csv(
      fdf.get_path(source_file, source_dir)
    , encoding='latin-1'
    , converters={'Note': str} # handle DtypeWarning without 'low_memory=False' 
)

In [None]:
df_emis_livestock.head()

In [None]:
df_emis_livestock.info()

In [None]:
# Check for full duplicates
df_emis_livestock.duplicated().value_counts()

In [None]:
# Check for NAs
df_emis_livestock.isnull().value_counts()

In [None]:
# List unique values in the Note column
print(df_emis_livestock["Note"].unique())

In [None]:
print(df_emis_livestock["Source"].unique())

In [None]:
# Dropping unnecessary columns
df_emis_livestock = df_emis_livestock.drop(columns = 'Area Code (M49)')
df_emis_livestock = df_emis_livestock.drop(columns = 'Item Code (CPC)')
df_emis_livestock = df_emis_livestock.drop(columns = 'Year Code')
df_emis_livestock = df_emis_livestock.drop(columns = 'Flag')
df_emis_livestock = df_emis_livestock.drop(columns = 'Note')
df_emis_livestock = df_emis_livestock.drop(columns = 'Source Code')

In [None]:
# Renaming columns
df_emis_livestock = df_emis_livestock.rename(columns={'Area Code' : 'area_code', 'Area' : 'area',
                                  'Item Code' : 'item_code', 'Item' : 'item',
                                  'Element Code' : 'element_code', 'Element' : 'element',
                                  'Year' : 'year', 'Unit' : 'unit', 'Value' : 'value',
                                  'Source' : 'source'})

In [None]:
df_emis_livestock.head()

In [None]:
# Import additional table for AreaCodes (provided by the same download zip)
source_file = 'Emissions_livestock_E_AreaCodes.csv'
df_emis_livestock_areacodes = pd.read_csv(
      fdf.get_path(source_file, source_dir)
    , encoding='latin-1' 
)

In [None]:
df_emis_livestock_areacodes.head()

In [None]:
# Verify no duplicated rows are present
df_emis_livestock_areacodes.duplicated(keep='first').sum()

In [None]:
# Left merge on 'df_emis_livestock' to inspect potentially redundant columns
df_emis_livestock_temp = df_emis_livestock.merge(df_emis_livestock_areacodes, how='left', left_on='area_code', right_on='Area Code')

In [None]:
df_emis_livestock_temp.head()

In [None]:
# Show divergent area values before and after merge
df_emis_livestock_temp[['area', 'Area']][df_emis_livestock_temp['area'] != df_emis_livestock_temp['Area']].drop_duplicates()

In [None]:
# compare divergent area values after replacing ';' with ','
df_emis_livestock_temp['Area'] = df_emis_livestock_temp['Area'].str.replace(';', ',')
df_emis_livestock_temp[['area', 'Area']][df_emis_livestock_temp['area'] != df_emis_livestock_temp['Area']].drop_duplicates()

In [None]:
# Drop 'area_code' column as it contains no additional info
df_emis_livestock = df_emis_livestock.drop(columns = ['area_code'])

In [None]:
# Import additional table for AreaCodes (provided by the same download zip)
source_file = 'Emissions_livestock_E_ItemCodes.csv'
df_emis_livestock_itemcodes = pd.read_csv(
      fdf.get_path(source_file, source_dir)
    , encoding='latin-1' 
)

In [None]:
df_emis_livestock_itemcodes.head()

In [None]:
# Verify no duplicated rows are present
df_emis_livestock_itemcodes.duplicated(keep='first').sum()

In [None]:
# Left merge on 'df_emis_livestock' to inspect potentially redundant columns
df_emis_livestock_temp = df_emis_livestock.merge(df_emis_livestock_itemcodes, how='left', left_on='item_code', right_on='Item Code')

In [None]:
df_emis_livestock_temp.head()

In [None]:
# Show divergent area values before and after merge
df_emis_livestock_temp[['item', 'Item']][df_emis_livestock_temp['item'] != df_emis_livestock_temp['Item']].drop_duplicates()

In [None]:
# Compare divergent Item values after replacing ';' with ','
df_emis_livestock_temp['Item'] = df_emis_livestock_temp['Item'].str.replace(';', ',')
df_emis_livestock_temp[['item', 'Item']][df_emis_livestock_temp['Item'] != df_emis_livestock_temp['Item']].drop_duplicates()

In [None]:
df_emis_livestock[['element_code', 'element', 'unit']].drop_duplicates().sort_values('element')

In [None]:
# Element_code stands for a unique combination of element and unit; can be dropped
df_emis_livestock = df_emis_livestock.drop(columns = ['element_code'])	

In [None]:
df_emis_livestock.head()

In [None]:
# Rearranging columns
df_emis_livestock = df_emis_livestock[['area', 'year', 'item', 'item_code', 'element', 'unit', 'value', 'source']]

In [None]:
df_emis_livestock.head()

In [None]:
fs.write_dataframe(df_emis_livestock, 'fao_emis_livestock_source')