In [1]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score, KFold
from sklearn.ensemble import RandomForestRegressor

In [2]:
fbs_predict = pd.DataFrame()

In [3]:
fbs_path = Path('assets','fbs_1961_2013.csv')
fbs = pd.read_csv(fbs_path)
fbs.head()

Unnamed: 0,Country Code,Country,Item Code,Item,Domestic supply quantity,Food,Export Quantity,Import Quantity,Losses,Production,Feed,Stock Variation,Year,Population,Land Use
0,21,Brazil,2919,Fruits - Excluding Wine,6569.0,5290.0,388.0,70.0,1016.0,6887.0,0.0,0.0,1961,74988.0,28396.0
1,21,Brazil,2918,Vegetables,2067.0,1833.0,2.0,18.0,228.0,2050.0,0.0,0.0,1961,74988.0,28396.0
2,21,Brazil,2919,Fruits - Excluding Wine,7059.0,5735.0,345.0,83.0,1087.0,7322.0,0.0,0.0,1962,77287.0,30213.0
3,21,Brazil,2918,Vegetables,2199.0,1952.0,1.0,24.0,241.0,2176.0,0.0,0.0,1962,77287.0,30213.0
4,21,Brazil,2919,Fruits - Excluding Wine,7601.0,6211.0,432.0,96.0,1169.0,7937.0,0.0,0.0,1963,79643.0,32211.0


In [4]:
predicted_countries = ["Australia","Brazil","Canada","China","France","Germany","India","Japan","Mexico","Republic of Korea","United Kingdom","United States of America"]
predicted_commodities = ["Vegetables","Fruits - Excluding Wine"]

In [5]:
def get_predicted_losses(pred_pop):
    feature_cols = ['Population','Population Squared']
    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Losses'] #This is our target
    kf= KFold(n_splits=10, shuffle=True)
    
    kf.split(X,y)
    
    predicted_loss_values = []
        
    for train_indices, test_indices in kf.split(X,y):
        lr = LinearRegression()
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]
        
        lr.fit(X_train, y_train)
        d = {'Population' : [pred_pop],
             'Population Squared':[pred_pop**2]
            }
        
        lr_predict = pd.DataFrame(data=d)
        predicted_loss = lr.predict(lr_predict)
        predicted_loss_values.append(predicted_loss)
        
        return int(np.mean(predicted_loss_values))

In [6]:
# train the model to get the fitted Production value
def get_predicted_production(pred_pop, pred_loss):
    feature_cols = ['Population','Population Squared','Losses','Losses Squared']
    
    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Production'] #This is our target
    
    kf= KFold(n_splits=10, shuffle=True)
    
    kf.split(X,y)
    
    predicted_production_values = []
    
    for train_indices, test_indices in kf.split(X,y):
        lr = LinearRegression()
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]
        
        lr.fit(X_train, y_train)
        d = {'Population' : [pred_pop],
             'Population Squared':[pred_pop**2],
             'Losses': [pred_loss],
             'Losses Squared': [pred_loss**2]
            }
        
        lr_predict = pd.DataFrame(data=d)
        predicted_production = lr.predict(lr_predict)
        predicted_production_values.append(predicted_production)

        return int(np.mean(predicted_production_values))

In [7]:
#train the model to get the fitted Food value
def get_predicted_food_value(pred_pop, pred_loss, pred_prod):
    feature_cols = ['Population','Population Squared','Production','Production Squared','Losses','Losses Squared']
    
    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Food'] #This is our target
    
    kf= KFold(n_splits=10, shuffle=True)
    
    kf.split(X,y)
    
    predicted_food_values = []
    
    for train_indices, test_indices in kf.split(X,y):
        lr = LinearRegression()
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]
        
        lr.fit(X_train, y_train)
        
        d = {'Population' : [pred_pop],
             'Population Squared':[pred_pop**2],
             'Production': [pred_prod],
             'Production Squared': [pred_prod**2],
             'Losses': [pred_loss],
             'Losses Squared': [pred_loss**2]
            }
        
        lr_predict = pd.DataFrame(data=d)
        
        predicted_food = lr.predict(lr_predict)
        predicted_food_values.append(predicted_food)
        
        predicted_food_values
        return int(np.mean(predicted_food_values))


In [8]:
# train the model to get the fitted Feed value
def get_predicted_feed(pred_food, pred_pop, pred_loss, pred_prod):
    feature_cols = feature_cols = ['Food','Food Squared','Population','Population Squared','Production','Production Squared','Losses','Losses Squared']
    
    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Feed'] #This is our target
    
    kf= KFold(n_splits=10, shuffle=True)
    kf.split(X,y)
    
    predicted_feed_values = []
    
    for train_indices, test_indices in kf.split(X,y):
        lr = LinearRegression()
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]
        
        lr.fit(X_train, y_train)
        d = {'Food' : [pred_food],
             'Food Squared' : [pred_food**2],
             'Population' : [pred_pop],
             'Population Squared':[pred_pop**2],
             'Production': [pred_prod],
             'Production Squared': [pred_prod**2],
             'Losses': [pred_loss],
             'Losses Squared': [pred_loss**2]
            }
        
        lr_predict = pd.DataFrame(data=d)
        
        predicted_feed = lr.predict(lr_predict)
        predicted_feed_values.append(predicted_feed)
        return int(np.mean(predicted_feed_values))


In [9]:
# train the model to get the fitted Import value
def get_predicted_import_value(pred_food,pred_pop,pred_prod,pred_feed,pred_loss):
    feature_cols = ['Food','Food Squared','Population','Population Squared','Production','Production Squared','Feed','Feed Squared','Losses','Losses Squared']
    
    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Import Quantity'] #This is our target
    
    kf= KFold(n_splits=10, shuffle=True)
    kf.split(X,y)
    
    predicted_import_values = []
    
    for train_indices, test_indices in kf.split(X,y):
        lr = LinearRegression()
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]
        
        lr.fit(X_train, y_train)
        
        d = {'Food' : [pred_food],
             'Food Squared' : [pred_food**2],
             'Population' : [predicted_population],
             'Population Squared':[pred_pop**2],
             'Production': [pred_prod],
             'Production Squared': [pred_prod**2],
             'Feed': [pred_feed],
             'Feed Squared': [pred_feed**2],
             'Losses': [predicted_loss],
             'Losses Squared': [predicted_loss**2]
            }
        
        lr_predict = pd.DataFrame(data=d)
        
        predicted_import = lr.predict(lr_predict)
        predicted_import_values.append(predicted_import)
        
        return int(np.mean(predicted_import_values))


In [10]:
def get_predicted_import_value_forests(pred_food,pred_pop,pred_prod,pred_feed,pred_loss):
    feature_cols = ['Food','Food Squared','Population','Population Squared','Production','Production Squared','Feed','Feed Squared','Losses','Losses Squared']

    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Import Quantity'] #This is our target

    kf= KFold(n_splits=10, shuffle=True)
    kf.split(X,y)
    
    predicted_import_values = []
    
    for train_indices, test_indices in kf.split(X,y):
        # max_features=5 is best and n_estimators=150 is sufficiently large.
        rfreg = RandomForestRegressor(n_estimators=150, max_features=5, oob_score=True, random_state=1)
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]

        rfreg.fit(X_train, y_train)
        
        d = {'Food' : [pred_food],
             'Food Squared' : [pred_food**2],
             'Population' : [predicted_population],
             'Population Squared':[pred_pop**2],
             'Production': [pred_prod],
             'Production Squared': [pred_prod**2],
             'Feed': [pred_feed],
             'Feed Squared': [pred_feed**2],
             'Losses': [predicted_loss],
             'Losses Squared': [predicted_loss**2]
            }
        
        rfreg_predict = pd.DataFrame(data=d)
        predicted_import = rfreg.predict(rfreg_predict)
        predicted_import_values.append(predicted_import)
        
        return int(np.mean(predicted_import_values))

        

In [12]:
#train the model to get the fitted Export value
def get_predicted_export_value(pred_loss,pred_feed,pred_import,pred_pop,pred_food):
#     feature_cols = ['Losses','Losses Squared','Feed','Feed Squared','Import Quantity','Import Quantity Squared','Population','Population Squared','Food*Import']
    feature_cols = ['Losses','Losses Squared','Feed','Feed Squared','Import Quantity','Population','Population Squared','Food*Import']

    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Export Quantity'] #This is our target
    
    kf= KFold(n_splits=10, random_state=21, shuffle=True)
    
    kf.split(X,y)
    predicted_export_values = []
    
    for train_indices, test_indices in kf.split(X,y):
        lr = LinearRegression()
        
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]
        
        lr.fit(X_train, y_train)
        
        d = { 
            'Losses': [pred_loss],
             'Losses Squared': [pred_loss**2],
             'Feed': [pred_feed],
             'Feed Squared': [pred_feed**2],
             'Import Quantity' : [pred_import],
#              'Import Quantity Squared' : [pred_import**2],
             'Population' : [pred_pop],
             'Population Squared':[pred_pop**2],
             'Food*Import':[pred_food*pred_import]
            }
        
        lr_predict = pd.DataFrame(data=d)
        
        predicted_export = lr.predict(lr_predict)
        predicted_export_values.append(predicted_export)
                
        return int(np.mean(predicted_export_values))


In [13]:
def get_predicted_export_value_forests(pred_loss,pred_feed,pred_import,pred_pop,pred_food):
    feature_cols = ['Losses','Losses Squared','Feed','Feed Squared','Import Quantity','Population','Population Squared','Food*Import']

    X = fbs_country_commodity.loc[:, feature_cols] #These are our features
    y = fbs_country_commodity.loc[:, 'Export Quantity'] #This is our target

    kf= KFold(n_splits=10, shuffle=True)
    kf.split(X,y)
    
    predicted_export_values = []
    
    for train_indices, test_indices in kf.split(X,y):
        rfreg = RandomForestRegressor(n_estimators=150, max_features=5, oob_score=True, random_state=1)
        X_train = X.iloc[train_indices, :]
        y_train = y.iloc[train_indices]

        rfreg.fit(X_train, y_train)
        
        d = { 
            'Losses': [pred_loss],
             'Losses Squared': [pred_loss**2],
             'Feed': [pred_feed],
             'Feed Squared': [pred_feed**2],
             'Import Quantity' : [pred_import],
             'Population' : [pred_pop],
             'Population Squared':[pred_pop**2],
             'Food*Import':[pred_food*pred_import]
            }
        
        rfreg_predict = pd.DataFrame(data=d)
        predicted_export = rfreg.predict(rfreg_predict)
        predicted_export_values.append(predicted_export)
        
        return int(np.mean(predicted_export_values))

        

In [14]:
for predicted_country in predicted_countries:
    for predicted_commodity in predicted_commodities:
        fbs_country_rows = fbs.loc[:,'Country'] == predicted_country
        fbs_country = fbs.loc[fbs_country_rows,:]
        fbs_commodity_rows = fbs_country.loc[:,"Item"] == predicted_commodity
        fbs_country_commodity = fbs_country.loc[fbs_commodity_rows,:]
        fbs_country_commodity_years = fbs_country_commodity.loc[:,'Year'] <= 2013
        fbs_country_commodity = fbs_country_commodity.loc[fbs_country_commodity_years,:]
        fbs_country_commodity.loc[:, 'Population Squared'] = fbs_country_commodity.loc[:,'Population']**2
        fbs_country_commodity.loc[:, 'Production Squared'] = fbs_country_commodity.loc[:,'Production']**2
        fbs_country_commodity.loc[:, 'Food Squared'] = fbs_country_commodity.loc[:,'Food']**2
        fbs_country_commodity.loc[:, 'Feed Squared'] = fbs_country_commodity.loc[:,'Feed']**2
        fbs_country_commodity.loc[:, 'Losses Squared'] = fbs_country_commodity.loc[:,'Losses']**2
        fbs_country_commodity.loc[:, 'Import Quantity Squared'] = fbs_country_commodity.loc[:,'Import Quantity']**2
        fbs_country_commodity.loc[:,'Food*Import'] = fbs_country_commodity.loc[:,'Food'] * fbs_country_commodity.loc[:,'Import Quantity']
        for i in range(2014, 2051):
            # For each year of prediction
            predicted_year = i
            predicted_population = fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Population"]
            fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Losses"] = get_predicted_losses(predicted_population.iloc[0])
            predicted_loss = fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Losses"]
            fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Production"] = get_predicted_production(predicted_population.iloc[0],predicted_loss)
            predicted_prod = fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Production"]
            fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Food"] = get_predicted_food_value(predicted_population.iloc[0],predicted_loss.iloc[0],predicted_prod.iloc[0])
            predicted_food = fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Food"]
            fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Feed"] = get_predicted_feed(predicted_food.iloc[0],predicted_population.iloc[0],predicted_loss.iloc[0],predicted_prod.iloc[0])
            predicted_feed = fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Feed"]
            fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Import Quantity"] = get_predicted_import_value(predicted_food.iloc[0],predicted_population.iloc[0],predicted_prod.iloc[0],predicted_feed.iloc[0],predicted_loss.iloc[0])
            predicted_import = fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Import Quantity"]
            fbs.loc[(fbs["Country"] == predicted_country) & (fbs["Item"] == predicted_commodity) & (fbs["Year"] == predicted_year), "Export Quantity"] = get_predicted_export_value(predicted_loss.iloc[0],predicted_feed.iloc[0],predicted_import.iloc[0],predicted_population.iloc[0],predicted_food.iloc[0])
years_rows = fbs.loc[:,'Year'] >= 2014
fbs_predict = fbs.loc[years_rows,:]
fbs_predict_path = Path('assets','fbs_2014_2050.csv')
fbs_predict.to_csv(fbs_predict_path)