In [1]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

import openpyxl

In [2]:
dfFao = pd.read_csv("FAOSTAT.csv")
dfCountries = pd.read_csv("iso-country-codes.csv")
dfConsumption = pd.read_csv("oecd_meat_consumption.csv")


## Pasos para limpiar dfCountries
1. Seleccionar solo aquellas columnas necesarias.
2. Renombrar las columnas al igual que las columnas en los otros dataset para luego hacer los joins.

In [3]:
dfCountries = dfCountries[['Alpha-3 code', 'English short name lower case']] \
    .rename(columns={'Alpha-3 code': 'LOCATION', 'English short name lower case':'Area'})
dfCountries.head(5)

Unnamed: 0,LOCATION,Area
0,ZWE,Zimbabwe
1,ZMB,Zambia
2,YEM,Yemen
3,ESH,Western Sahara
4,WLF,Wallis and Futuna


## dfConsumption y dfCountries y limpieza de dfConsumption
Pasos:
1. Unir los datos de Consumo con los países.
2. Quitar los códigos de 'Location' propios del análisis de la OECD.
3. Quitar columnas innecesarias.

### 1. Unir datos con dataframe de países

In [4]:
dfConsumption = dfConsumption.merge(dfCountries, on='LOCATION', how='left', indicator=True)
dfConsumption.head(5)

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes,Area,_merge
0,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1990,0.0,,Australia,both
1,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1991,27.942,,Australia,both
2,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1992,26.405,,Australia,both
3,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1993,26.37,,Australia,both
4,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1994,25.662,,Australia,both


In [5]:
dfConsumption[dfConsumption['_merge'] != 'both']['LOCATION'].unique()

array(['WLD', 'OECD', 'BRICS'], dtype=object)

### 2. Remover Location extras

Remover filas cuando el 'Location' es 'WLD', 'OECD' o 'BRICS'.

Esos son valores totales usados por OECD para comparaciones que no son necesarios para nuestro análisis.

In [6]:
dfConsumption = dfConsumption[dfConsumption['_merge'] == 'both']
dfConsumption.head(3)

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes,Area,_merge
0,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1990,0.0,,Australia,both
1,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1991,27.942,,Australia,both
2,AUS,MEATCONSUMP,BEEF,KG_CAP,A,1992,26.405,,Australia,both


### 3. Columnas innecesarias
Descubriendo cuáles columnas son innecesarias

In [7]:
dfConsumption['Flag Codes'].unique()

array([nan])

In [8]:
dfConsumption['FREQUENCY'].unique()

array(['A'], dtype=object)

In [9]:
dfConsumption = dfConsumption[['LOCATION', 'INDICATOR', 'SUBJECT', 'MEASURE', 'TIME', 'Value', 'Area']]
dfConsumption.head(3)

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,TIME,Value,Area
0,AUS,MEATCONSUMP,BEEF,KG_CAP,1990,0.0,Australia
1,AUS,MEATCONSUMP,BEEF,KG_CAP,1991,27.942,Australia
2,AUS,MEATCONSUMP,BEEF,KG_CAP,1992,26.405,Australia
