In [1]:
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.options.display.max_rows = 4000
import numpy as np
from numpy import loadtxt
from numpy import arange
from numpy import interp
import math
import os
import timeit
from timeit import default_timer as timer
from datetime import datetime
import time
import psutil

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

from sklearn.preprocessing import StandardScaler
from scipy import interpolate
from scipy.interpolate import interp1d
from sklearn.metrics import r2_score
from joblib import dump, load

from keras.models import Sequential
from keras.layers.core import Dense
import keras.backend as K

df = pd.read_csv('02_Data/WDIData.csv')  
df.columns = [c.replace(' ', '_') for c in df.columns]

countrycode = pd.read_csv('02_Data/COUNTRYCODES.csv')  
countrycode.columns = [c.replace(' ', '_') for c in countrycode.columns]

predictions = pd.read_csv('02_Data/PREDICTIONS.csv')  
predictions.columns = [c.replace(' ', '_') for c in predictions.columns]

SSP_Data = pd.read_csv('02_Data/SSP.csv')  
SSP_Data.columns = [c.replace(' ', '_') for c in SSP_Data.columns]

WDI_Data = pd.read_csv('02_Data/WDIData.csv')  
WDI_Data.columns = [c.replace(' ', '_') for c in WDI_Data.columns]

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Using TensorFlow backend.


In [2]:
titles = ["ENR", "FDI", "GDP", "IND", "RND", "POP", "TRD", "URB", "CO2"]
epochs = 1000
batchs = 32
testCountries = ["United Kingdom"]
top30Countries = ["China", "United States","India", "Russian Federation", "Japan", 
                  "Canada", "Germany", "Korea, Rep.", "Brazil", "France", "Saudi Arabia", 
                  "United Kingdom", "Pakistan", "Mexico", "Iran, Islamic Rep.", "Turkey", "Italy", "Spain", 
                  "Indonesia", "Australia", "South Africa", "Vietnam", "Egypt, Arab Rep.", 
                  "Thailand", "Argentina","Nigeria","Poland", "Malaysia", "Venezuela, RB", "Congo, Dem. Rep."]
secondbatch = ["Spain", 
                  "Indonesia", "Australia", "South Africa", "Vietnam", "Egypt, Arab Rep.", 
                  "Thailand", "Argentina","Nigeria","Poland", "Malaysia", "Venezuela, RB", "Congo, Dem. Rep."]
countryAdditions = ["Venezuela, RB", "Congo, Dem. Rep.","Nigeria", "Pakistan", "Argentina"]
updatedCountries = ["Brazil", "China"]
top6Countries = ["China", "United States", "India", "Russian Federation", "Japan", "United Kingdom"]

def getRawData(country):
    country_data = df.loc[df['Country_Name'] == country]
    country_data = country_data.fillna(0)
    country_data = country_data.drop(['Country_Code','Country_Name','Unnamed:_64','Indicator_Code'], 1)
    #Total greenhouse gas emissions (kt of CO2 equivalent)
    #Fossil fuel energy consumption (% of total)
    ENR = country_data.loc[country_data['Indicator_Name'] == "Energy use (kg of oil equivalent per capita)"] 
    FDI = country_data.loc[country_data['Indicator_Name'] == "Foreign direct investment, net inflows (% of GDP)"] 
    GDP = country_data.loc[country_data['Indicator_Name'] == "GDP per capita (current US$)"] 
    IND = country_data.loc[country_data['Indicator_Name'] == "Industry (including construction), value added (% of GDP)"] 
    RND = country_data.loc[country_data['Indicator_Name'] == "Trademark applications, total"] 
    POP = country_data.loc[country_data['Indicator_Name'] == "Population, total"] 
    TRD = country_data.loc[country_data['Indicator_Name'] == "Trade (% of GDP)"] 
    URB = country_data.loc[country_data['Indicator_Name'] == "Urban population (% of total population)"]
    CO2 = country_data.loc[country_data['Indicator_Name'] == "CO2 intensity (kg per kg of oil equivalent energy use)"]

    ALL = [ENR, FDI, GDP, IND, RND, POP, TRD, URB, CO2]
    INPUT = pd.concat(ALL)
    INPUT = INPUT.transpose()
    INPUT.columns = titles
    INPUT = INPUT.iloc[1:]
    return INPUT

def getAllData(country, modelVersion, noZeroes, dumpScaler):
    #get the initial dataset
    df = getRawData(country)
    df = df[20:]
    df = df.head(35)
    
    if noZeroes:
        df = df.astype(float)
        df.index = df.index.map(int)

        for column in df:
            if(df[column] == 0).any():
                df[column].replace(0, np.nan, inplace=True)
                df[column] = df[column].interpolate(method="linear", limit_direction="both", limit_area="inside")
                df[column]  = df[column].fillna(0)
    
    #start interpolation
    df = df.reset_index()
    years = df["index"].to_numpy().reshape(-1).astype(np.int16)
    adjustedYears = np.zeros(len(years)).astype(np.float32)

    #adjust years for where annual number should end up e.g. 0.5 = midpoint of year
    for i, x in enumerate(years):
        adjustedYears[i] = x+0.5

    #get array for interpolated years with quarterly steps
    newYears = np.arange(start=1980, stop=2015, step=0.25)
    newYears = newYears.astype(np.float32)
    newNP = np.zeros((141,10))

    #interpolation for the annual average indices
    for i, x in enumerate(titles):
        indicator = df[[str(x)]].to_numpy().reshape(-1)
        thisInterpolation = interpolate.interp1d(adjustedYears, indicator, kind="quadratic", fill_value='extrapolate')
        for j, y in enumerate(newYears):
            newNP[j+1,i+1] = thisInterpolation(y)

    #create DF
    newDF = pd.DataFrame(data=newNP[1:,1:])
    newDF["Year"] = newYears
    newDF = newDF.set_index("Year")
    newDF.columns = titles

    train_df = newDF.sample(frac=0.85,random_state=0)
    test_df = newDF.drop(train_df.index)

    input_df = train_df.drop('CO2', axis=1)
    scaler = StandardScaler()
    input_NP = input_df.to_numpy()
    input_NP = scaler.fit_transform(input_NP)
    if dumpScaler:
        dump(scaler, 'Countries/{}/Models/{}/Input_Scaler.bin'.format(country, modelVersion), compress=True)

    output_df = train_df.drop(["ENR", "FDI", "GDP", "IND", "RND", "POP", "TRD", "URB"], axis=1)
    scaler = StandardScaler()
    output_NP = output_df.to_numpy()
    output_NP = scaler.fit_transform(output_NP)
    if dumpScaler:
        dump(scaler, 'Countries/{}/Models/{}/Output_Scaler.bin'.format(country, modelVersion), compress=True)
    
    return df, newNP, newDF, train_df, test_df, input_NP, input_df, output_NP, output_df

def build_model():
    model = keras.Sequential([
        layers.Dense(5, activation="relu", input_dim=8),
        layers.Dense(1)
    ])
    
    optimiser = tf.keras.optimizers.RMSprop(0.001)
    
    model.compile(loss="mse",
                 optimizer=optimiser,
                 metrics=['mae','mse'])
    return model

def checkTest(country, testData, model, modelVersion):
    if 'CO2' in testData.columns:
        test_labels = testData.pop('CO2')
    scaler = load('Countries/{}/Models/{}/Input_Scaler.bin'.format(country, modelVersion))
    normed_test_data  = scaler.transform(testData)

    test_predictions = model.predict(normed_test_data).flatten()
    outputScaler = load('Countries/{}/Models/{}/Output_Scaler.bin'.format(country, modelVersion))
    finalOutput = outputScaler.inverse_transform(test_predictions)

    error = finalOutput - test_labels
    
    return test_labels, finalOutput, error, normed_test_data

def createANN(chosenCountry, printTraining, printR, snsPlot, modelVersion, txtLog):
    if not os.path.exists('Countries/{}/Models/{}'.format(chosenCountry, modelVersion)):
        os.makedirs('Countries/{}/Models/{}'.format(chosenCountry, modelVersion))
    
    allData, all_NP, all_df, trainingDataFull, testData, input_NP, input_df, output_NP, output_df = getAllData(chosenCountry, modelVersion, True)
    
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model=build_model()
    
    history = model.fit(
        input_NP, 
        output_NP,
        epochs=epochs,
        batch_size = batchs,
        validation_split = 0.15,
        verbose = 0,
        callbacks=[early_stop]
    )
    
    epochsRun = len(history.history['loss'])
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    hist.to_csv('Countries/{}/Models/{}/history.csv'.format(chosenCountry, modelVersion))
    
    
    actualVal, predictedVal, errorVal, normed_test_data = checkTest(chosenCountry, testData, model, modelVersion)
    r2Val = math.ceil(r2_score(actualVal, predictedVal)*100)/100
    
    if printTraining:
        printTrainingGraph(history, chosenCountry, modelVersion)

    if printR:
        printRGraph(actualVal, predictedVal, errorVal, r2Val, modelVersion, chosenCountry)

    if snsPlot:
        sns.pairplot(input_df)
    
    stamp = str(datetime.now().strftime('%H:%M:%S'))
    
    if r2Val < 0:
        txtLog.write(stamp + " :: NEGATIVE R2: "+ str(r2Val)+" in "+str(chosenCountry)+" Model "+ str(modelVersion) +" \n")
        r2values = pd.DataFrame({'Actual': actualVal, 'Predicted': predictedVal}, columns=['Actual', 'Predicted'])
        r2values.to_csv('Countries/{}/Models/{}/r2Values.csv'.format(chosenCountry, modelVersion))
        
    return model, r2Val, history, input_df, epochsRun

def printTrainingGraph(history, chosenCountry, modelVersion):
    plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)
    plt.figure(1)
    plotter.plot({'Basic':history}, metric="mse")
    plt.ylim([0, 2])
    plt.ylabel('CO2 (MSE)')
    plt.savefig('Countries/{}/Models/{}/mse_trained.png'.format(chosenCountry, modelVersion))
    plt.close()

    plt.figure(2)
    plotter.plot({'Basic':history}, metric="mae")
    plt.ylim([0, 0.5])
    plt.ylabel('CO2 (MAE)')
    plt.savefig('Countries/{}/Models/{}/mae_trained.png'.format(chosenCountry, modelVersion))
    plt.close()
    
def printRGraph(actual, predicted, error, r2Val, modelVersion, chosenCountry):
    plt.figure(3)
    a = plt.axes(aspect='equal')
    plt.scatter(actual, predicted)
    plt.xlabel('True Values [CO2]')
    plt.ylabel('Predictions [CO2]')
    lims = [0, 4]
    plt.xlim(lims)
    plt.ylim(lims)
    _ = plt.plot(lims, lims)
    a.set_title('R2: ' + str(r2Val))
    plt.savefig('Countries/{}/Models/{}/r2.png'.format(chosenCountry, modelVersion))
    plt.close()

    plt.figure(4)
    plt.hist(error, bins = 25)
    plt.xlabel("Prediction Error [CO2]")
    _ = plt.ylabel("Count")
    plt.savefig('Countries/{}/Models/{}/error_histogram.png'.format(chosenCountry, modelVersion))
    plt.close()
    
def createAllModels(country, iterations, txtLog):
    columns = ["Model", "R2", "Epochs"]
    index = range(iterations)
    
    data = []
    
    for x in range(iterations):
        modelVersion = x
        trainedModel, R2, trainedHistory, originalInput, epochsRun = createANN(country, False, True, False, modelVersion, txtLog)
        trainedModel.save_weights('Countries/{}/Models/{}/checkpoint_test'.format(country, modelVersion))
        data.append([R2, epochsRun])
        
    modelStats = pd.DataFrame(data, columns=['R2', 'Epochs'])
    modelStats.to_csv('Countries/{}/Models/{}ModelStats.csv'.format(country, country))
    
    modelCharacteristics = pd.DataFrame(data, columns=['R2', 'Epochs'])
    modelCharacteristics = modelCharacteristics.describe()
    modelCharacteristics.to_csv('Countries/{}/Models/{}ModelChar.csv'.format(country , country))
    
    return modelStats

#Produces csv for all of the countries data for post analysis
def produceAllCountryCSV(inputData):
    allCountryData = pd.DataFrame(columns=["R2", "Epochs", "Country"])

    for country in inputData:
        thisData = pd.read_csv('Countries/{}/Models/{}ModelStats.csv'.format(country, country)) 
        thisData["Country"] = country
        thisData = thisData.drop(['Unnamed: 0'], 1)
        frames = [allCountryData, thisData]
        allCountryData = pd.concat(frames)

    allCountryData.to_csv('Countries/allCountryData.csv')

#---------------------------------------------------------------------------------------
#Main process to start training the cohort of neural networks
#---------------------------------------------------------------------------------------

def runTraining(inputData):

    start = timer()
    txtLog = open("Countries/log.txt","w+")
    startStamp = str(datetime.now().strftime('%H:%M:%S'))
    txtLog.write("Time started: " + startStamp +"\n \n")

    for country in inputData:
        stamp = str(datetime.now().strftime('%H:%M:%S'))
        txtLog.write(stamp + " :: ================================================= \n")
        txtLog.write(stamp + " :: " + country + " started \n \n")

        createAllModels(country, 100, txtLog)

        stamp = str(datetime.now().strftime('%H:%M:%S'))
        txtLog.write("\n \n")
        txtLog.write(stamp + " :: " + country + " completed\n")
        print(stamp + " :: " + country + " completed")
        txtLog.write(stamp + " :: ================================================= \n")
        txtLog.write("  \n")

    end = timer()
    secondsDuration = (end - start)
    duration = time.strftime('%H:%M:%S', time.gmtime(secondsDuration))
    txtLog.write("")
    endStamp = str(datetime.now().strftime('%H:%M:%S'))
    txtLog.write("Time completed: " + endStamp +"\n")
    txtLog.write("Time taken: " + duration +"\n")
    txtLog.close()

    print("\nCompleted")

    osCommandString = "notepad.exe Countries/log.txt"
    os.system(osCommandString)
    
    produceAllCountryCSV(inputData)

#---------------------------------------------------------------------------------------
#Create alternate models
#---------------------------------------------------------------------------------------

def build_model_retest():
    model = keras.Sequential([
        layers.Dense(5, activation="relu", input_dim=8),
        layers.Dense(1)
    ])
    
    optimiser = tf.keras.optimizers.Adam(lr=0.001)
    
    model.compile(loss="mse",
                 optimizer=optimiser,
                 metrics=['mae','mse'])
    return model

def ANNRetest(chosenCountry, printTraining, printR, snsPlot, modelVersion, structure, structureVal1, structureVal2):
    if not os.path.exists('Countries_restoredWeights/{}/{}'.format(chosenCountry, modelVersion)):
        os.makedirs('Countries_restoredWeights/{}/{}'.format(chosenCountry, modelVersion))
    
    allData, all_NP, all_df, trainingDataFull, testData, input_NP, input_df, output_NP, output_df = getAllData(chosenCountry, modelVersion)
    
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, restore_best_weights=True)
    model=build_model_retest()

    history = model.fit(
        input_NP, 
        output_NP,
        epochs=epochs,
        batch_size = batchs,
        validation_split = 0.15,
        verbose = 0,
        callbacks=[early_stop]
    )
    
    epochsRun = len(history.history['loss'])
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    actualVal, predictedVal, errorVal, normed_test_data = checkTest(chosenCountry, testData, model, modelVersion)
    r2Val = math.ceil(r2_score(actualVal, predictedVal)*100)/100
    
    if printTraining:
        printTrainingGraph(history, chosenCountry, modelVersion)

    if printR:
        printRGraph(actualVal, predictedVal, errorVal, r2Val, modelVersion, chosenCountry)

    if snsPlot:
        sns.pairplot(input_df)
    
    stamp = str(datetime.now().strftime('%H:%M:%S'))
    
    return model, r2Val, history, input_df, epochsRun

def createAllModelsRetest(country, iterations, structureVal1, structureVal2):
    structure ="8-{}-{}-1".format(structureVal1, structureVal2)
    columns = ["Model", "R2", "Epochs"]
    index = range(iterations)
    
    data = []
    
    for x in range(iterations):
        modelVersion = x
        trainedModel, R2, trainedHistory, originalInput, epochsRun = ANNRetest(country, False, True, False, modelVersion, structure, structureVal1, structureVal2)
        trainedModel.save_weights('Countries_restoredWeights/{}/{}/checkpoint_test'.format(country, modelVersion))
        data.append([R2, epochsRun])
        
    modelStats = pd.DataFrame(data, columns=['R2', 'Epochs'])
    modelStats.to_csv('Countries_restoredWeights/{}/{}_ModelStats.csv'.format(country, country))
    
    modelCharacteristics = pd.DataFrame(data, columns=['R2', 'Epochs'])
    modelCharacteristics = modelCharacteristics.describe()
    modelCharacteristics.to_csv('Countries_restoredWeights/{}/{}_ModelChar.csv'.format(country, country))
    
    return modelStats

#---------------------------------------------------------------------------------------
#Data checks
#---------------------------------------------------------------------------------------

def showData(country):
    df = getRawData(country)
    df = df[20:]
    df = df.head(35)
        
    return df

def zeroCheck(inputData):
    newNP = np.zeros((1,1))
    newDF = pd.DataFrame(data=newNP[1:,1:], dtype='int64')
    newDF["Country"] = "Country"
    for x, country in enumerate(inputData):
        df, newNP, wef, train_df, test_df, input_NP, input_df, output_NP, output_df = getAllData(country, "0", True, False)
        zero_count = (df == 0).astype(int).sum(axis=0)
        zero_count = zero_count.to_frame().T
        
        newDF = newDF.append(zero_count)
        newDF.iat[x, 0] = country
        
    newDF.loc[:,'Total'] = ((newDF.sum(axis=1)/270)*100).round(2)
    newDF = newDF.sort_values(by=['Total'])
    
    return newDF

#---------------------------------------------------------------------------------------
#Predictions using models
#---------------------------------------------------------------------------------------

def prediction(country, predictionInput, modelVersion):
    scaler = load('Countries/{}/Models/{}/Input_Scaler.bin'.format(country, modelVersion))
    normalised_test = scaler.transform(predictionInput)
    
    model=build_model()
    model.load_weights('Countries/{}/Models/{}/checkpoint_test'.format(country, modelVersion)).expect_partial()
    
    #test_predictions = model.predict(normalised_test).flatten()
    test_predictions = []
    
    normalised_test = normalised_test.astype('float32')

    test_predictions = model(normalised_test)
    test_predictions = test_predictions.numpy()
    test_predictions = test_predictions.flatten()
    
    K.clear_session()
    
    outputScaler = load('Countries/{}/Models/{}/Output_Scaler.bin'.format(country, modelVersion))
    
    del scaler, normalised_test, model
    
    return outputScaler.inverse_transform(test_predictions)

def predictAll(country, predictionInput):
    index=range(101)
    columns = ["Results"]
    newDF = pd.DataFrame(index=index, columns=columns)
    newDF = newDF.fillna(0)
    
    for x in range(100):
        newDF.loc[x, 0] = prediction(country, predictionInput, x)

    newDF.drop("Results", axis=1, inplace=True)

    del columns
    
    return newDF

def getYearData(country, year):
    df = getRawData(country)
    df = df[20:]
    df = df.head(35)
    df2 = df.loc[year, 'ENR':'URB' ]
    df3 = df.loc[year, "CO2"]
    df2 = df2.to_numpy().reshape(1, -1)
    return df2, df3

def compareActualHistory(country):
    yearsTest = range(1980, 2015)
    index = range(101)
    columns = ["Results"]
    resultsDF = pd.DataFrame(index=index, columns=columns)
    resultsDF = resultsDF.fillna(0)
    
    for n in yearsTest:
        inputData, CO2 = getYearData(country, str(n))
        result = predictAll(country, inputData)
        result.loc[100,:] = CO2
        resultsDF = pd.concat([resultsDF, result], axis=1)
        
        print("Completed " + str(n) + " ")
        
    resultsDF.drop("Results", axis=1, inplace=True)
    resultsDF.columns = yearsTest
    resultsDF.to_csv('Projections/History_Check/{}_HistoryCheck.csv'.format(country))
    print("\n " + str(datetime.now().strftime('%H:%M:%S')) + " Completed " + country + "\n")
    return resultsDF

#---------------------------------------------------------------------------------------
#Produces histogram to show ranges by country for different structures
#---------------------------------------------------------------------------------------

def directoryLooperbyCountry():
    newNP = np.zeros((1,1))
    mainResults = pd.DataFrame(data=newNP[1:,1:])
    
    for thisStructure in next(os.walk('Structure/'))[1]:
    
        newNP = np.zeros((101,1))
        thisResults = pd.DataFrame(data=newNP[1:,1:])
        thisResults["0"]=" {}".format(thisStructure)
        for country in top6Countries:

            thisModelData = pd.read_csv('Structure/{}/{}/{}_ModelStats.csv'.format(thisStructure, country, thisStructure))
            thisModelData.columns = [c.replace(' ', '_') for c in thisModelData.columns]
            thisModelData.drop(thisModelData.columns[[0,2]], axis=1, inplace=True)

            thisModelData.columns = [country]
            thisResults = pd.concat([thisResults, thisModelData] , axis=1)

        mainResults = pd.concat([mainResults, thisResults])
    
    mainResults.to_csv('Structure/Structure Comparison.csv'.format(country, country))

#---------------------------------------------------------------------------------------
#Produces histogram to show ranges by for different structures only
#---------------------------------------------------------------------------------------

def directoryLooper():
    newNP = np.zeros((1,1))
    mainResults = pd.DataFrame(data=newNP[1:,1:])
    
    for thisStructure in next(os.walk('Structure/'))[1]:
        
        newNP = np.zeros((1,1))
        thisResults = pd.DataFrame(data=newNP[1:,1:])

        for country in top6Countries:

            thisModelData = pd.read_csv('Structure/{}/{}/{}_ModelStats.csv'.format(thisStructure, country, thisStructure))
            thisModelData.columns = [c.replace(' ', '_') for c in thisModelData.columns]
            thisModelData.drop(thisModelData.columns[[0,2]], axis=1, inplace=True)

            thisModelData.columns = ["Results"]
            thisResults = pd.concat([thisResults, thisModelData])
            
        thisResults["0"]=" {}".format(thisStructure)
        
        #print(thisStructure + " mean is " + str(thisResults["Results"].mean(axis=0)))
        mainResults = pd.concat([mainResults, thisResults])
        

    mainResults.to_csv('Structure/Structure Comparison.csv'.format(country, country))
    
#---------------------------------------------------------------------------------------
#SSP Projections
#---------------------------------------------------------------------------------------
    
SSPS = ["SSP1","SSP2","SSP3","SSP4","SSP5"]       
projectionYears = ["2020", "2030", "2040", "2050"]

def countryConverter(country):
    return countrycode.loc[countrycode["Country"]==country, "Code"].iloc[0]

def countryOECD(country):
    return countrycode.loc[countrycode["Country"]==country, "OECD"].iloc[0]

def countryIncome(country):
    return countrycode.loc[countrycode["Country"]==country, "Income"].iloc[0]

def interpolater(year1, year2, y1, y2, year):
    x = [year1, year2]
    y = [y1, y2]
    y_new = np.interp(year, x, y)
    
    del x, y
    
    return y_new

def getGDPInfo(country, value):
    gdparray = ["GDP (constant LCU)","GDP (current US$)", "GDP (current LCU)", "PPP conversion factor, GDP (LCU per international $)","GDP deflator: linked series (base year varies by country)"]
    gdpTable = WDI_Data.loc[(WDI_Data['Country_Name'] == country) & WDI_Data['Indicator_Name'].isin(gdparray)]
    newValue = (value*1000000000) * gdpTable.loc[(gdpTable['Indicator_Name'] == "PPP conversion factor, GDP (LCU per international $)")]["2005"].iloc[0]
    newValue = ((newValue)/gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (current LCU)")]["2005"].iloc[0])*(gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (constant LCU)")]["2005"].iloc[0])
    newValue = (newValue/gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (constant LCU)")]["2010"].iloc[0]) * gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (current US$)")]["2010"].iloc[0]
    
    del gdpTable, gdparray
    
    return newValue

def industryProjector(country, year, income, thisSSP):
    industryData = WDI_Data.loc[(WDI_Data['Country_Name'] == country) & (WDI_Data['Indicator_Name'] == "Industry (including construction), value added (% of GDP)")]
    industryData=industryData.drop(industryData.columns[0:34],axis=1)
    industryData=industryData.drop(['Unnamed:_64'],1)
    industryData=industryData.drop(industryData.columns[25:],axis=1)
    industryData = industryData.to_numpy().reshape(-1)
    x = arange(1990,2015)
    indPrediction = interp(year, x, industryData)
    rate = predictions[(predictions['INDICATOR'] == "IND")& (predictions['TYPE'] == income)][thisSSP].iloc[0]
    newRate = ( ( (rate/40)*( 40-(2050-int(year)) ) )/100)+1
    
    del industryData, rate
    
    return indPrediction * newRate

def createInputData(country, year, thisSSP):
    code = countryConverter(country)
    income = countryIncome(country)
    OECD = countryOECD(country)
    
    latest, CO2 = getYearData(country, "2010")
    
    thisCountry = SSP_Data.loc[SSP_Data['REGION'] == code]
    
    population = thisCountry[(thisCountry['VARIABLE'] == "Population") & (thisCountry['MODEL'] == "OECD Env-Growth") & (thisCountry['SSP'] == thisSSP)][year].iloc[0]*1000000
    urbanisation = thisCountry[(thisCountry['VARIABLE'] == "Population|Urban|Share") & (thisCountry['MODEL'] == "NCAR") & (thisCountry['SSP'] == thisSSP)][year].iloc[0]
    trade = interpolater(2010, 2050, latest[0][6], predictions[(predictions['INDICATOR'] == "TRD")][thisSSP].iloc[0] ,int(year))
    fdi = interpolater(2010, 2050, latest[0][1], predictions[(predictions['INDICATOR'] == "FDI")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0] ,int(year))
    #rnd = latest[0][4]*(((predictions[(predictions['INDICATOR'] == "RND")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0]/100)+1)**(int(year)-2010))
    rnd = latest[0][4]+((latest[0][4]*((predictions[(predictions['INDICATOR'] == "RND")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0]/100)))*(int(year)-2010))
    #enr = latest[0][0]*(((predictions[(predictions['INDICATOR'] == "ENR")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0]/100)+1)**(int(year)-2010))
    enr = latest[0][0]+((latest[0][0]*((predictions[(predictions['INDICATOR'] == "ENR")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0]/100)))*(int(year)-2010))
    
    gdp = (getGDPInfo(country, thisCountry[(thisCountry['VARIABLE'] == "GDP|PPP") & (thisCountry['MODEL'] == "OECD Env-Growth") & (thisCountry['SSP'] == thisSSP)][year].iloc[0]))/population
    ind = industryProjector(country, year, income, thisSSP)
    
    thisInputData = [[enr, fdi, gdp, ind, rnd, population, trade, urbanisation]]
    
    del code, income, OECD, latest, CO2, thisCountry, population, urbanisation, trade, fdi, rnd, enr, gdp, ind
    
    return thisInputData

def SSP_Projection(country):
    if not os.path.exists('Projections/Individual/{}/'.format(country)):
        os.makedirs('Projections/Individual/{}/'.format(country))
    
    newNP = np.zeros((1,1))
    countrySummary = pd.DataFrame(data=newNP[1:,1:])
    
    for SSP in SSPS:
        start = timer()
        newNP = np.zeros((1,1))
        SSPResults = pd.DataFrame(data=newNP[1:,1:])
        for year in projectionYears:
            thisResults = predictAll(country, createInputData(country, year, SSP))
            SSPResults = pd.concat([SSPResults, thisResults], axis=1)

            del thisResults
            
        SSPResults.columns = projectionYears
        characteristics = SSPResults.describe()

        SSPStd = characteristics.iloc[2]
        ConfInt = (SSPStd/10)*1.96
        lowerInt = characteristics.iloc[1] - ConfInt
        upperInt = characteristics.iloc[1] + ConfInt
        
        plotting = pd.concat([lowerInt,characteristics.iloc[1],upperInt], axis=1)
        plotting.columns = [SSP+" lower", SSP+" mean", SSP+" upper"]
        convertedPlotting = plotting.T
        countrySummary = pd.concat([countrySummary, convertedPlotting])
        
        SSPResults = pd.concat([SSPResults, characteristics])
        SSPResults.to_csv('Projections/Individual/{}/{}.csv'.format(country, SSP))

        end = timer()
        secondsDuration = (end - start)
        duration = time.strftime('%H:%M:%S', time.gmtime(secondsDuration))
        print(country + " " + SSP +  ": " + duration)
        
        del start, end, duration, convertedPlotting, plotting, newNP, SSPResults, SSPStd, ConfInt, lowerInt, upperInt, characteristics
    
    countrySummary.to_csv('Projections/Individual/{}/Summary.csv'.format(country))
    
    del countrySummary

In [None]:
#Compare the actual vs network projections
print(str(datetime.now().strftime('%H:%M:%S'))+ " Started")
for country in top6Countries:
    compareActualHistory(country)

In [None]:
UK2019 = [[2630.53, 0.0790593814371999, 42328.9002575769, 17.4206873962023, 105674, 66836327, 64.2877735623301, 83.652]]
UK2018 = [[2757.69, 2.83706174602294, 43043.22782, 17.5188312851163, 94953, 66460344, 62.6190596109507, 83.398]]
UK2015 = [[2764.516671, 1.547962, 44974.831877, 18.141621, 57891.0, 65116219.0, 56.683096, 82.626]]
USA2017 = [[7547.57012, 1.820076, 59957.725851, 18.20794, 448211.0, 324985539.0, 27.14232, 82.058]]

predictionSet = predictAll("United Kingdom", UK2018)
predictionSet.to_csv('Projections/History_Check/Adjusted_Input/UK2018.csv')

In [3]:
for x, country in enumerate(top30Countries):
    startStamp = str(datetime.now().strftime('%H:%M:%S'))
    print(startStamp + ": " + str(x+1) + ". " + country)
    SSP_Projection(country)
    print("\n")
    

15:29:37: 1. China
China SSP1: 00:00:25
China SSP2: 00:00:24
China SSP3: 00:00:24
China SSP4: 00:00:23
China SSP5: 00:00:23


15:31:38: 2. United States
United States SSP1: 00:00:25
United States SSP2: 00:00:23
United States SSP3: 00:00:24
United States SSP4: 00:00:26
United States SSP5: 00:00:25


15:33:43: 3. India
India SSP1: 00:00:25
India SSP2: 00:00:23
India SSP3: 00:00:24
India SSP4: 00:00:24
India SSP5: 00:00:25


15:35:46: 4. Russian Federation
Russian Federation SSP1: 00:00:40
Russian Federation SSP2: 00:00:22
Russian Federation SSP3: 00:00:24
Russian Federation SSP4: 00:00:22
Russian Federation SSP5: 00:00:22


15:38:00: 5. Japan
Japan SSP1: 00:00:25
Japan SSP2: 00:00:22
Japan SSP3: 00:00:22
Japan SSP4: 00:00:22
Japan SSP5: 00:00:23


15:39:57: 6. Canada
Canada SSP1: 00:00:24
Canada SSP2: 00:00:22
Canada SSP3: 00:00:22
Canada SSP4: 00:00:22
Canada SSP5: 00:00:22


15:41:52: 7. Germany
Germany SSP1: 00:00:26
Germany SSP2: 00:00:22
Germany SSP3: 00:00:22
Germany SSP4: 00:00:22

[]

In [6]:
def scalerTest(country, predictionInput, modelVersion):
    scaler = load('Countries/{}/Models/{}/Input_Scaler.bin'.format(country, modelVersion))
    normalised_test = scaler.transform(predictionInput)
    
    model=build_model()
    model.load_weights('Countries/{}/Models/{}/checkpoint_test'.format(country, modelVersion)).expect_partial()
    
    #test_predictions = model.predict(normalised_test).flatten()
    test_predictions = []
    
    normalised_test = normalised_test.astype('float32')

    test_predictions = model(normalised_test)
    test_predictions = test_predictions.numpy()
    test_predictions = test_predictions.flatten()
    
    K.clear_session()
    
    outputScaler = load('Countries/{}/Models/{}/Output_Scaler.bin'.format(country, modelVersion))
    
    #del scaler, normalised_test, model
    
    return normalised_test, predictionInput

UK2015 = [[2764.516671, 1.547962, 44974.831877, 18.141621, 57891.0, 65116219.0, 56.683096, 82.626]]

In [7]:
scalerTest("United Kingdom", UK2015, 1)

(array([[-2.8919036 , -0.66108465,  1.3425224 ,  0.23228668,  3.043277  ,
          2.4578793 ,  1.1025933 ,  2.6209843 ]], dtype=float32),
 [[2764.516671,
   1.547962,
   44974.831877,
   18.141621,
   57891.0,
   65116219.0,
   56.683096,
   82.626]])