In [1]:
# *************************************************************************************
# *                          © 2019 Roshini Saravanakumar                             *
# *************************************************************************************

import pandas as pd
import numpy as np
import seaborn as sns
from collections import Counter

df = pd.read_csv('data/raw_data.csv')
df.drop(columns='Row Number', inplace=True)

df = df[['Area', 'Item', 'Year', 'Production', 'Import Quantity', 'Export Quantity']]
df.rename(columns={'Area': 'Country', 'Item':'Produce', 'Production':'Production Quantity'}, inplace=True)

df.head() 

Unnamed: 0,Country,Produce,Year,Production Quantity,Import Quantity,Export Quantity
0,Armenia,Potatoes,2000,290260.0,390.0,300.0
1,Armenia,Potatoes,2001,363834.0,2290.0,0.0
2,Armenia,Potatoes,2002,374263.0,1918.0,0.0
3,Armenia,Potatoes,2003,507518.0,2853.0,7.0
4,Armenia,Potatoes,2004,576427.0,1553.0,0.0


In [2]:
def calculateConsumption(entry):
    return entry['Production Quantity'] + entry['Import Quantity'] - entry['Export Quantity']

# get all data about Peru from 2006 and sort it by production quantity
df = (df[(df['Country'] == 'Peru') & (df['Year'] == 2006)]).sort_values('Production Quantity', ascending=False)

# calculate the consumption for each entry (produce + import) - export
df['Consumption'] = df.apply(calculateConsumption, axis=1)

# print the top 5 crops produced by Peru in 2006 and their consumption
(df.head()).loc[:, ['Produce', 'Consumption']]

Unnamed: 0,Produce,Consumption
37221,Potatoes,3352637.0
37773,Plantains and others,1721051.0
37967,Maize,2760717.0
37651,"Onions, dry",524764.0
37787,Oranges,353041.0


In [3]:
def calculatePercentConsumed(entry):
    return (entry['Consumption'] / (entry['Production Quantity'] + entry['Import Quantity'])) * 100

# print the percentage of total interactions with a crop that are
df['Percent Consumed'] = df.apply(calculatePercentConsumed, axis=1)
(df.head()).loc[:, ['Produce', 'Percent Consumed']]

Unnamed: 0,Produce,Percent Consumed
37221,Potatoes,99.998538
37773,Plantains and others,96.788364
37967,Maize,99.772534
37651,"Onions, dry",90.999643
37787,Oranges,99.750793


In [4]:
# Danger zone! If a crop's consumption percentage is less than this value, it is an environmental hazard
threshold = 20 #???

# print all crops that are in the danger zone for Peru in 2006
df[df['Percent Consumed'] < threshold]

Unnamed: 0,Country,Produce,Year,Production Quantity,Import Quantity,Export Quantity,Consumption,Percent Consumed
38130,Peru,"Coffee, green",2006,273178.0,3.0,237537.0,35644.0,13.04776
38198,Peru,"Chillies and peppers, dry",2006,52195.0,30.0,49834.0,2391.0,4.578267
38326,Peru,"Tobacco, unmanufactured",2006,1431.0,0.0,1770.0,-339.0,-23.689727
38246,Peru,Ginger,2006,0.0,1.0,83.0,-82.0,-8200.0


In [5]:
# calculate percent exported
def calculatePercentExported(entry):
    return (entry['Export Quantity'] / (entry['Production Quantity'] + entry['Import Quantity'])) * 100

df['Percent Exported'] = df.apply(calculatePercentExported, axis=1)

# print the danger zone crops again
df[df['Percent Consumed'] < threshold]

Unnamed: 0,Country,Produce,Year,Production Quantity,Import Quantity,Export Quantity,Consumption,Percent Consumed,Percent Exported
38130,Peru,"Coffee, green",2006,273178.0,3.0,237537.0,35644.0,13.04776,86.95224
38198,Peru,"Chillies and peppers, dry",2006,52195.0,30.0,49834.0,2391.0,4.578267,95.421733
38326,Peru,"Tobacco, unmanufactured",2006,1431.0,0.0,1770.0,-339.0,-23.689727,123.689727
38246,Peru,Ginger,2006,0.0,1.0,83.0,-82.0,-8200.0,8300.0
