In [1]:
import numpy as np
import pandas as pd

# used for suppressing warning
import warnings 
warnings.filterwarnings('ignore') # suppress the warnings

In [2]:
# https://www.fao.org/faostat/en/#data/QCL
df = pd.read_csv('Production_Crops_Livestock_E_All_Data_NOFLAG.csv', encoding= 'unicode_escape')

In [3]:
df.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y1961,Y1962,Y1963,...,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,Y2018,Y2019,Y2020
0,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,ha,,,,...,13469.0,13490.0,14114.0,13703.0,14676.0,19481.0,19793.0,20053.0,29203.0,22134.0
1,2,Afghanistan,221,"Almonds, with shell",5419,Yield,hg/ha,,,,...,45000.0,45960.0,29910.0,19996.0,16521.0,16859.0,13788.0,17161.0,13083.0,17759.0
2,2,Afghanistan,221,"Almonds, with shell",5510,Production,tonnes,,,,...,60611.0,62000.0,42215.0,27400.0,24246.0,32843.0,27291.0,34413.0,38205.0,39307.0
3,2,Afghanistan,711,"Anise, badian, fennel, coriander",5312,Area harvested,ha,,,,...,19500.0,18500.0,18500.0,30000.0,25000.0,24500.0,26500.0,25333.0,25444.0,25759.0
4,2,Afghanistan,711,"Anise, badian, fennel, coriander",5419,Yield,hg/ha,,,,...,6414.0,6757.0,6757.0,7167.0,7200.0,7075.0,7149.0,7142.0,7123.0,7138.0


In [4]:
# create a new column indicating if all numeric columns are NaN
# I summed the columns in the row, any with 0 or NaN can be removed.
df["Remove0orNaN"] = (df.sum(axis = 1, skipna = True, numeric_only = True)
                        - df["Area Code"]  - df["Item Code"] - df["Element Code"]).replace(0, np.nan) 
print(df.isnull().sum())
print(df.shape)

# drop rows with NaN in all numeric columns and drop check column also
df = df.dropna(subset = ["Remove0orNaN"]).drop(columns = ["Remove0orNaN"])
print(df.shape)

Area Code           0
Area                0
Item Code           0
Item                0
Element Code        0
                ...  
Y2017            7326
Y2018            8069
Y2019            8256
Y2020           12345
Remove0orNaN     2569
Length: 68, dtype: int64
(77910, 68)
(75341, 67)


In [5]:
df.Item.unique()

array(['Almonds, with shell', 'Anise, badian, fennel, coriander',
       'Apples', 'Apricots', 'Asses', 'Barley', 'Berries nes',
       'Butter and ghee, sheep milk', 'Butter, cow milk', 'Camels',
       'Cattle', 'Cheese, goat milk', 'Cheese, sheep milk', 'Chickens',
       'Cotton lint', 'Cottonseed', 'Eggs, hen, in shell', 'Fat, camels',
       'Fat, cattle', 'Fat, goats', 'Fat, sheep', 'Figs',
       'Fruit, citrus nes', 'Fruit, fresh nes', 'Fruit, stone nes',
       'Goats', 'Grapes', 'Hides, cattle, fresh', 'Honey, natural',
       'Horses', 'Linseed', 'Maize', 'Meat, camel', 'Meat, cattle',
       'Meat, chicken', 'Meat, game', 'Meat, goat', 'Meat, sheep',
       'Melons, other (inc.cantaloupes)', 'Milk, skimmed cow',
       'Milk, whole fresh camel', 'Milk, whole fresh cow',
       'Milk, whole fresh goat', 'Milk, whole fresh sheep', 'Millet',
       'Molasses', 'Mules', 'Nuts nes', 'Offals, edible, camels',
       'Offals, edible, cattle', 'Offals, edible, goats',
       'Offa

In [6]:
dairy = ['milk', 'Milk', 'Cream', 'Butter', 'Cheese', 'Yoghurt']

findDairy = df.Item.apply(lambda x: any(item for item in dairy if item in x))
dfDairy = df[findDairy]

print(dfDairy.shape)
print(dfDairy.Item.unique()) # display unique items in Item column
print(dfDairy.Element.unique()) # display unique items in Item column

livestock = ['Meat', 'Buffaloes', 'Camels', 'Cattle', 'Chickens', 
             'Fat', 'Goats', 'Hides', 'Horses', 'Lard', 'Mules', 
             'Offals', 'Pigs', 'Birds', 'Rabbits', 'Sheep', 'Skins']

findLivestock = df.Item.apply(lambda x: any(item for item in livestock if item in x))
dfLivestock = df[findLivestock]

print(dfLivestock.shape)
print(dfLivestock.Item.unique()) # display unique items in Item column
print(dfLivestock.Element.unique()) # display unique items in Item column

(4304, 67)
['Butter and ghee, sheep milk' 'Butter, cow milk' 'Cheese, goat milk'
 'Cheese, sheep milk' 'Milk, skimmed cow' 'Milk, whole fresh camel'
 'Milk, whole fresh cow' 'Milk, whole fresh goat'
 'Milk, whole fresh sheep' 'Butter and Ghee' 'Cheese (All Kinds)'
 'Milk, Total' 'Cheese, whole cow milk' 'Milk, whole fresh buffalo'
 'Cheese, skimmed cow milk' 'Cream fresh' 'Ghee, butteroil of cow milk'
 'Milk, skimmed dried' 'Milk, whole condensed' 'Milk, whole dried'
 'Milk, whole evaporated' 'Evaporat&Condensed Milk'
 'Skim Milk&Buttermilk,Dry' 'Milk, dry buttermilk'
 'Milk, skimmed condensed' 'Butter, buffalo milk' 'Ghee, buffalo milk'
 'Yoghurt' 'Milk, skimmed evaporated' 'Cheese, buffalo milk']
['Production' 'Yield' 'Milk Animals']
(16849, 67)
['Camels' 'Cattle' 'Chickens' 'Fat, camels' 'Fat, cattle' 'Fat, goats'
 'Fat, sheep' 'Goats' 'Hides, cattle, fresh' 'Horses' 'Meat, camel'
 'Meat, cattle' 'Meat, chicken' 'Meat, game' 'Meat, goat' 'Meat, sheep'
 'Mules' 'Offals, edible, camel