In [1]:
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.options.display.max_rows = 4000
import numpy as np
from numpy import loadtxt
from numpy import arange
from numpy import interp
import math
import os, psutil
import timeit
from timeit import default_timer as timer
from datetime import datetime
import time
import memory_profiler
from memory_profiler import profile

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

from sklearn.preprocessing import StandardScaler
from scipy import interpolate
from scipy.interpolate import interp1d
from sklearn.metrics import r2_score
from joblib import dump, load

from keras.models import Sequential
from keras.layers.core import Dense
import keras.backend as K

WDI = pd.read_csv('02_Data/WDIData.csv')  
WDI.columns = [c.replace(' ', '_') for c in WDI.columns]

hdi_data = pd.read_csv('02_Data/HDI.csv')  
hdi_data.columns = [c.replace(' ', '_') for c in hdi_data.columns]

pv_data = pd.read_csv('02_Data/PV.csv')  
pv_data.columns = [c.replace(' ', '_') for c in pv_data.columns]

hyd_data = pd.read_csv('02_Data/HYD.csv')  
hyd_data.columns = [c.replace(' ', '_') for c in hyd_data.columns]

countrycode = pd.read_csv('02_Data/COUNTRYCODES.csv')  
countrycode.columns = [c.replace(' ', '_') for c in countrycode.columns]

predictions = pd.read_csv('02_Data/PREDICTIONS.csv')  
predictions.columns = [c.replace(' ', '_') for c in predictions.columns]

SSP_Data = pd.read_csv('02_Data/SSP.csv')  
SSP_Data.columns = [c.replace(' ', '_') for c in SSP_Data.columns]

WDI_Data = pd.read_csv('02_Data/WDIData.csv')  
WDI_Data.columns = [c.replace(' ', '_') for c in WDI_Data.columns]

np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) 

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Using TensorFlow backend.


In [4]:
currentPath = "Groupings/IMF/Emerging"
titles = ["ENR", "FDI", "GDP", "IND", "RND", "POP", "TRD", "URB", "CO2"]
epochs = 1000
batchs = 32
testCountries = ["United Kingdom", "France", "Germany"]
top30Countries = ["China", "United States","India", "Russian Federation", "Japan", 
                  "Canada", "Germany", "Korea, Rep.", "Brazil", "France", "Saudi Arabia", 
                  "United Kingdom", "Pakistan", "Mexico", "Iran, Islamic Rep.", "Turkey", "Italy", "Spain", 
                  "Indonesia", "Australia", "South Africa", "Vietnam", "Egypt, Arab Rep.", 
                  "Thailand", "Argentina","Nigeria","Poland", "Malaysia", "Venezuela, RB", "Congo, Dem. Rep."]

UNDeveloped = ["United States", "Russian Federation", "Japan", 
                  "Canada", "Germany", "Korea, Rep.", "France", "Saudi Arabia", 
                  "United Kingdom", "Turkey", "Italy", "Spain", 
                  "Australia", "Argentina","Poland"]

IMFAdvanced = ["United States", "Japan", 
                  "Canada", "Germany", "Korea, Rep.", "France",  
                  "United Kingdom", "Italy", "Spain", 
                  "Australia"]

UNDeveloping = [x for x in top30Countries if x not in UNDeveloped]
IMFEmerging = [x for x in top30Countries if x not in IMFAdvanced]

def getPower(inputData, country):
    power = inputData.loc[inputData['country'] == country]
    return power.iloc[0]["power"]

def getHDI(inputData, country):
    inputData["Country"] = inputData["Country"].str.lstrip()
    hdi = inputData.loc[inputData['Country'] == country]
    hdi = hdi.transpose()
    hdi = hdi[1:26]
    hdi.columns = ["HDI"]
    return hdi

def getArea(country):
    country_data = WDI.loc[WDI['Country_Name'] == country]
    country_data = country_data.fillna(0)
    country_data = country_data.drop(['Country_Code','Country_Name','Unnamed:_64','Indicator_Code'], 1)
    
    AREA = country_data.loc[country_data['Indicator_Name'] == "Surface area (sq. km)"]
    AREA=AREA.transpose()
    AREA= AREA.iloc[1:].astype(float)
    AREA.columns = ["Country"]
    
    return AREA.iloc[[20]].iloc[0]
    
def getRawData(country):
    country_data = WDI.loc[WDI['Country_Name'] == country]
    country_data = country_data.fillna(0)
    country_data = country_data.drop(['Country_Code','Country_Name','Unnamed:_64','Indicator_Code'], 1)
    #Total greenhouse gas emissions (kt of CO2 equivalent)
    #Fossil fuel energy consumption (% of total)
    ENR = country_data.loc[country_data['Indicator_Name'] == "Energy use (kg of oil equivalent per capita)"] 
    FDI = country_data.loc[country_data['Indicator_Name'] == "Foreign direct investment, net inflows (% of GDP)"] 
    GDP = country_data.loc[country_data['Indicator_Name'] == "GDP per capita (current US$)"] 
    IND = country_data.loc[country_data['Indicator_Name'] == "Industry (including construction), value added (% of GDP)"] 
    RND = country_data.loc[country_data['Indicator_Name'] == "Trademark applications, total"] 
    POP = country_data.loc[country_data['Indicator_Name'] == "Population, total"]
    TRD = country_data.loc[country_data['Indicator_Name'] == "Trade (% of GDP)"] 
    URB = country_data.loc[country_data['Indicator_Name'] == "Urban population (% of total population)"]
    CO2 = country_data.loc[country_data['Indicator_Name'] == "CO2 intensity (kg per kg of oil equivalent energy use)"]

    AREA = country_data.loc[country_data['Indicator_Name'] == "Surface area (sq. km)"]
    
    AREA=AREA.transpose()
    AREA= AREA.iloc[1:].astype(float)
    AREA.columns = ["Country"]
    POP=POP.transpose()
    POP= POP.iloc[1:].astype(float)
    POP.columns = ["Country"]
    DEN = POP/AREA
    DEN=DEN.transpose()
                            
    ALL = [ENR, FDI, GDP, IND, RND, DEN, TRD, URB, CO2]
    INPUT = pd.concat(ALL)
    INPUT = INPUT.transpose()
    INPUT.columns = titles
    INPUT = INPUT.iloc[31:56]
    
    INPUT["PV"]=getPower(pv_data, country)
    INPUT["HYD"]=getPower(hyd_data, country)

    RAWHYD = INPUT["HYD"]
    HYD=RAWHYD/AREA["Country"]

    INPUT["HYD"] = HYD
    INPUT["HDI"] = getHDI(hdi_data, country)
    INPUT = INPUT.astype(float)
    return INPUT

def getAllData(modelVersion, noZeroes, dumpScaler, dataInputs):
    
    newNP = np.zeros((1,1))
    mainResults = pd.DataFrame(data=newNP[1:,1:])
    
    for country in dataInputs:
        df = getRawData(country)
        mainResults = pd.concat([mainResults, df])

    #return mainResults
        
    if noZeroes:
        mainResults = mainResults.astype(float)
        mainResults.index = mainResults.index.map(int)

        for column in df:
            if(mainResults[column] == 0).any():
                mainResults[column].replace(0, np.nan, inplace=True)
                mainResults[column] = mainResults[column].interpolate(method="linear", limit_direction="both", limit_area="inside")
                mainResults[column]  = mainResults[column].fillna(0)

    mainResults  =  mainResults.reset_index(drop=True)
    train_df = mainResults.sample(frac=0.85,random_state=0)
    test_df = mainResults.drop(train_df.index)

    input_df = train_df.drop('CO2', axis=1)
    scaler = StandardScaler()
    input_NP = input_df.to_numpy()
    input_NP = scaler.fit_transform(input_NP)
    if dumpScaler:
        dump(scaler, '{}/Models/{}/Input_Scaler.bin'.format(currentPath, modelVersion), compress=True)

    output_df = train_df.drop(["ENR", "FDI", "GDP", "IND", "RND", "POP", "TRD", "URB", "HDI", "HYD", "PV"], axis=1)
    scaler = StandardScaler()
    output_NP = output_df.to_numpy()
    output_NP = scaler.fit_transform(output_NP)
    if dumpScaler:
        dump(scaler, '{}/Models/{}/Output_Scaler.bin'.format(currentPath, modelVersion), compress=True)
    
    
    return df, newNP, mainResults, train_df, test_df, input_NP, input_df, output_NP, output_df

def build_model():
    model = keras.Sequential([
        layers.Dense(5, activation="relu", input_dim=11),
        layers.Dense(1)
    ])
    
    optimiser = tf.keras.optimizers.RMSprop(0.001)
    
    model.compile(loss="mse",
                 optimizer=optimiser,
                 metrics=['mae','mse'])
    return model

def checkTest(testData, model, modelVersion):
    if 'CO2' in testData.columns:
        test_labels = testData.pop('CO2')
    scaler = load('{}/Models/{}/Input_Scaler.bin'.format(currentPath, modelVersion))
    normed_test_data  = scaler.transform(testData)

    test_predictions = model.predict(normed_test_data).flatten()
    outputScaler = load('{}/Models/{}/Output_Scaler.bin'.format(currentPath, modelVersion))
    finalOutput = outputScaler.inverse_transform(test_predictions)

    error = finalOutput - test_labels
    
    return test_labels, finalOutput, error, normed_test_data

def createANN(printTraining, printR, snsPlot, modelVersion, txtLog, inputData):
    if not os.path.exists('{}/Models/{}'.format(currentPath, modelVersion)):
        os.makedirs('{}/Models/{}'.format(currentPath, modelVersion))
    
    allData, all_NP, all_df, trainingDataFull, testData, input_NP, input_df, output_NP, output_df = getAllData(modelVersion, True, True, inputData)
    
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model=build_model()
    
    history = model.fit(
        input_NP, 
        output_NP,
        epochs=epochs,
        batch_size = batchs,
        validation_split = 0.15,
        verbose = 0,
        callbacks=[early_stop]
    )
    
    epochsRun = len(history.history['loss'])
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    hist.to_csv('{}/Models/{}/history.csv'.format(currentPath, modelVersion))
    
    
    actualVal, predictedVal, errorVal, normed_test_data = checkTest(testData, model, modelVersion)
    r2Val = math.ceil(r2_score(actualVal, predictedVal)*100)/100
    
    if printTraining:
        printTrainingGraph(history, modelVersion)

    if printR:
        printRGraph(actualVal, predictedVal, errorVal, r2Val, modelVersion)

    if snsPlot:
        sns.pairplot(input_df)
    
    stamp = str(datetime.now().strftime('%H:%M:%S'))
    
    if r2Val < 0:
        txtLog.write(stamp + " :: NEGATIVE R2: "+ str(r2Val)+" in model "+ str(modelVersion) +" \n")
        r2values = pd.DataFrame({'Actual': actualVal, 'Predicted': predictedVal}, columns=['Actual', 'Predicted'])
        r2values.to_csv('{}/Models/{}/r2Values.csv'.format(currentPath, modelVersion))
        
    return model, r2Val, history, input_df, epochsRun

def printTrainingGraph(history, modelVersion):
    plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)
    plt.figure(1)
    plotter.plot({'Basic':history}, metric="mse")
    plt.ylim([0, 2])
    plt.ylabel('CO2 (MSE)')
    plt.savefig('{}/Models/{}/mse_trained.png'.format(currentPath, modelVersion))
    plt.close()

    plt.figure(2)
    plotter.plot({'Basic':history}, metric="mae")
    plt.ylim([0, 0.5])
    plt.ylabel('CO2 (MAE)')
    plt.savefig('{}/Models/{}/mae_trained.png'.format(currentPath,modelVersion))
    plt.close()
    
def printRGraph(actual, predicted, error, r2Val, modelVersion):
    plt.figure(3)
    a = plt.axes(aspect='equal')
    plt.scatter(actual, predicted)
    plt.xlabel('True Values [CO2]')
    plt.ylabel('Predictions [CO2]')
    lims = [0, 4]
    plt.xlim(lims)
    plt.ylim(lims)
    _ = plt.plot(lims, lims)
    a.set_title('R2: ' + str(r2Val))
    plt.savefig('{}/Models/{}/r2.png'.format(currentPath, modelVersion))
    plt.close()

    plt.figure(4)
    plt.hist(error, bins = 25)
    plt.xlabel("Prediction Error [CO2]")
    _ = plt.ylabel("Count")
    plt.savefig('{}/Models/{}/error_histogram.png'.format(currentPath, modelVersion))
    plt.close()
    
def createAllModels(iterations, txtLog, inputData):
    columns = ["Model", "R2", "Epochs"]
    index = range(iterations)
    
    data = []
    
    for x in range(iterations):
        modelVersion = x
        trainedModel, R2, trainedHistory, originalInput, epochsRun = createANN(False, True, False, modelVersion, txtLog,inputData)
        trainedModel.save_weights('{}/Models/{}/checkpoint_test'.format(currentPath, modelVersion))
        data.append([R2, epochsRun])
        
    modelStats = pd.DataFrame(data, columns=['R2', 'Epochs'])
    modelStats.to_csv('{}/Models/ModelStats.csv'.format(currentPath))
    
    modelCharacteristics = pd.DataFrame(data, columns=['R2', 'Epochs'])
    modelCharacteristics = modelCharacteristics.describe()
    modelCharacteristics.to_csv('{}/Models/ModelChar.csv'.format(currentPath))
    
    return modelStats

#Produces csv for all of the countries data for post analysis
def produceAllCountryCSV(inputData):
    allCountryData = pd.DataFrame(columns=["R2", "Epochs", "Country"])

    for country in inputData:
        thisData = pd.read_csv('{}/Models/ModelStats.csv'.format(currentPath)) 
        thisData["Country"] = country
        thisData = thisData.drop(['Unnamed: 0'], 1)
        frames = [allCountryData, thisData]
        allCountryData = pd.concat(frames)

    allCountryData.to_csv('{}/allData.csv'.format(currentPath))

#---------------------------------------------------------------------------------------
#Main process to start training the cohort of neural networks
#---------------------------------------------------------------------------------------

def runTraining(inputData):

    start = timer()
    txtLog = open("{}/log.txt".format(currentPath),"w+")
    startStamp = str(datetime.now().strftime('%H:%M:%S'))
    txtLog.write("Time started: " + startStamp +"\n \n")

    createAllModels(100, txtLog, inputData)

    end = timer()
    secondsDuration = (end - start)
    duration = time.strftime('%H:%M:%S', time.gmtime(secondsDuration))
    txtLog.write("")
    endStamp = str(datetime.now().strftime('%H:%M:%S'))
    txtLog.write("Time completed: " + endStamp +"\n")
    txtLog.write("Time taken: " + duration +"\n")
    txtLog.close()

    print("\nCompleted")

    osCommandString = "notepad.exe {}/log.txt".format(currentPath)
    os.system(osCommandString)
    
    #produceAllCountryCSV(inputData)


#---------------------------------------------------------------------------------------
#Data checks
#---------------------------------------------------------------------------------------


def zeroCheck(inputData):
    newNP = np.zeros((1,1))
    newDF = pd.DataFrame(data=newNP[1:,1:], dtype='int64')
    newDF["Country"] = "Country"
    for x, country in enumerate(inputData):
        df = getRawData(country)
        zero_count = (df == 0).astype(int).sum(axis=0)
        zero_count = zero_count.to_frame().T
        
        newDF = newDF.append(zero_count)
        newDF.iat[x, 0] = country
        
    newDF.loc[:,'Total'] = ((newDF.sum(axis=1)/270)*100).round(2)
    newDF = newDF.sort_values(by=['Total'])
    
    return newDF

#---------------------------------------------------------------------------------------
#Predictions using models
#---------------------------------------------------------------------------------------

def prediction(predictionInput, modelVersion):
    
    scaler = load('{}/Models/{}/Input_Scaler.bin'.format(currentPath, modelVersion))
    normalised_test = scaler.transform(predictionInput)
    
    model=build_model()
    model.load_weights('{}/Models/{}/checkpoint_test'.format(currentPath, modelVersion)).expect_partial()
    
    test_predictions = []
    #test_predictions = model.predict(normalised_test).flatten()
    
    normalised_test = normalised_test.astype('float32')

    test_predictions = model(normalised_test)
    test_predictions = test_predictions.numpy()
    test_predictions = test_predictions.flatten()
    
    K.clear_session()
    
    outputScaler = load('{}/Models/{}/Output_Scaler.bin'.format(currentPath, modelVersion))

    del scaler, normalised_test, model

    return outputScaler.inverse_transform(test_predictions)

def predictAll(predictionInput):
    
    index=range(101)
    columns = ["Results"]
    
    newDF = pd.DataFrame(index=index, columns=columns)
    newDF = newDF.fillna(0)
    
    for x in range(100):
        newDF.loc[x, 0] = prediction(predictionInput, x) 
    
    newDF.drop("Results", axis=1, inplace=True)
    
    del columns
    
    return newDF

def getYearData(country, year):
    df = getRawData(country)
    df2 = df.loc[year, 'ENR':'HDI' ]
    df2 = df2.drop("CO2")
    df3 = df.loc[year, "CO2"]
    df2 = df2.to_numpy().reshape(1, -1)
    return df2, df3

def compareActualHistory(country):
    yearsTest = range(1990, 2015)
    index = range(101)
    columns = ["Results"]
    resultsDF = pd.DataFrame(index=index, columns=columns)
    resultsDF = resultsDF.fillna(0)
    
    for n in yearsTest:
        inputData, CO2 = getYearData(country, str(n))
        result = predictAll(inputData)
        result.loc[100,:] = CO2
        resultsDF = pd.concat([resultsDF, result], axis=1)
        
        print("Completed " + str(n) + " ")
        
    resultsDF.drop("Results", axis=1, inplace=True)
    resultsDF.columns = yearsTest
    resultsDF.to_csv('{}/Projections/{}_HistoryCheck.csv'.format(currentPath, country))
    print("\n " + str(datetime.now().strftime('%H:%M:%S')) + " Completed " + country + "\n")
    return resultsDF

#---------------------------------------------------------------------------------------
#Produces histogram to show ranges by country for different structures
#---------------------------------------------------------------------------------------

def directoryLooperbyCountry():
    newNP = np.zeros((1,1))
    mainResults = pd.DataFrame(data=newNP[1:,1:])
    
    for thisStructure in next(os.walk('Structure/'))[1]:
    
        newNP = np.zeros((101,1))
        thisResults = pd.DataFrame(data=newNP[1:,1:])
        thisResults["0"]=" {}".format(thisStructure)
        for country in top6Countries:

            thisModelData = pd.read_csv('Structure/{}/{}/{}_ModelStats.csv'.format(thisStructure, country, thisStructure))
            thisModelData.columns = [c.replace(' ', '_') for c in thisModelData.columns]
            thisModelData.drop(thisModelData.columns[[0,2]], axis=1, inplace=True)

            thisModelData.columns = [country]
            thisResults = pd.concat([thisResults, thisModelData] , axis=1)

        mainResults = pd.concat([mainResults, thisResults])
    
    mainResults.to_csv('Structure/Structure Comparison.csv'.format(country, country))

#---------------------------------------------------------------------------------------
#Produces histogram to show ranges by for different structures only
#---------------------------------------------------------------------------------------

def directoryLooper():
    newNP = np.zeros((1,1))
    mainResults = pd.DataFrame(data=newNP[1:,1:])
    
    for thisStructure in next(os.walk('Structure/'))[1]:
        
        newNP = np.zeros((1,1))
        thisResults = pd.DataFrame(data=newNP[1:,1:])

        for country in top6Countries:

            thisModelData = pd.read_csv('Structure/{}/{}/{}_ModelStats.csv'.format(thisStructure, country, thisStructure))
            thisModelData.columns = [c.replace(' ', '_') for c in thisModelData.columns]
            thisModelData.drop(thisModelData.columns[[0,2]], axis=1, inplace=True)

            thisModelData.columns = ["Results"]
            thisResults = pd.concat([thisResults, thisModelData])
            
        thisResults["0"]=" {}".format(thisStructure)
        
        #print(thisStructure + " mean is " + str(thisResults["Results"].mean(axis=0)))
        mainResults = pd.concat([mainResults, thisResults])
        

    mainResults.to_csv('Structure/Structure Comparison.csv'.format(country, country))

    
#---------------------------------------------------------------------------------------
#HDI Functions
#---------------------------------------------------------------------------------------
    
def HDICalculate():
    
    newNP = np.zeros((1,3))
    mainResults = pd.DataFrame(data=newNP[1:,1:])
    mainResults.columns=["Value","Change"]
    for x in range(206):
        thisrow = hdi_data.loc[ x , : ]
        thisrow=thisrow[1:31].astype(float)
        difference = thisrow.diff()
        results = pd.concat([thisrow, difference], axis=1)
        results.columns=["Value","Change"]

        mainResults = pd.concat([mainResults, results])
        
    mainResults.fillna(0)
    mainResults.to_csv('02_Data/HDI_adjusted.csv')
    
def HDIProjector(h, x, k):
    y=int(x)-2010
    for i in range(y):
        h = h*(1-k)+k
    return h
    

SSPS = ["SSP1","SSP2","SSP3","SSP4","SSP5"]  
 
projectionYears = ["2020", "2030", "2040", "2050"]
nextYears = ["2060", "2070", "2080", "2090", "2100"]
allYears = projectionYears + nextYears

def countryConverter(country):
    return countrycode.loc[countrycode["Country"]==country, "Code"].iloc[0]

def countryOECD(country):
    return countrycode.loc[countrycode["Country"]==country, "OECD"].iloc[0]

def countryIncome(country):
    return countrycode.loc[countrycode["Country"]==country, "Income"].iloc[0]

def interpolater(year1, year2, y1, y2, year):
    x = [year1, year2]
    y = [y1, y2]
    y_new = np.interp(year, x, y)
    
    del x, y
    
    return y_new

def getGDPInfo(country, value):
    gdparray = ["GDP (constant LCU)","GDP (current US$)", "GDP (current LCU)", "PPP conversion factor, GDP (LCU per international $)","GDP deflator: linked series (base year varies by country)"]
    gdpTable = WDI_Data.loc[(WDI_Data['Country_Name'] == country) & WDI_Data['Indicator_Name'].isin(gdparray)]
    newValue = (value*1000000000) * gdpTable.loc[(gdpTable['Indicator_Name'] == "PPP conversion factor, GDP (LCU per international $)")]["2005"].iloc[0]
    newValue = ((newValue)/gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (current LCU)")]["2005"].iloc[0])*(gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (constant LCU)")]["2005"].iloc[0])
    newValue = (newValue/gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (constant LCU)")]["2010"].iloc[0]) * gdpTable.loc[(gdpTable['Indicator_Name'] == "GDP (current US$)")]["2010"].iloc[0]
    
    del gdpTable, gdparray
    
    return newValue

def industryProjector(country, year, income, thisSSP):
    industryData = WDI_Data.loc[(WDI_Data['Country_Name'] == country) & (WDI_Data['Indicator_Name'] == "Industry (including construction), value added (% of GDP)")]
    industryData=industryData.drop(industryData.columns[0:34],axis=1)
    industryData=industryData.drop(['Unnamed:_64'],1)
    industryData=industryData.drop(industryData.columns[25:],axis=1)
    industryData = industryData.to_numpy().reshape(-1)
    x = arange(1990,2015)
    indPrediction = interp(year, x, industryData)
    rate = predictions[(predictions['INDICATOR'] == "IND")& (predictions['TYPE'] == income)][thisSSP].iloc[0]
    newRate = ( ( (rate/40)*( 40-(2050-int(year)) ) )/100)+1
    
    del industryData, rate
    
    return indPrediction * newRate

def createInputData(country, year, thisSSP):
    code = countryConverter(country)
    income = countryIncome(country)
    OECD = countryOECD(country)
    
    latest, CO2 = getYearData(country, "2010")
    
    thisCountry = SSP_Data.loc[SSP_Data['REGION'] == code]
    
    population = thisCountry[(thisCountry['VARIABLE'] == "Population") & (thisCountry['MODEL'] == "OECD Env-Growth") & (thisCountry['SSP'] == thisSSP)][year].iloc[0]*1000000
    urbanisation = thisCountry[(thisCountry['VARIABLE'] == "Population|Urban|Share") & (thisCountry['MODEL'] == "NCAR") & (thisCountry['SSP'] == thisSSP)][year].iloc[0]
    trade = interpolater(2010, 2050, latest[0][6], predictions[(predictions['INDICATOR'] == "TRD")][thisSSP].iloc[0] ,int(year))
    fdi = interpolater(2010, 2050, latest[0][1], predictions[(predictions['INDICATOR'] == "FDI")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0] ,int(year))
    rnd = latest[0][4]*(((predictions[(predictions['INDICATOR'] == "RND")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0]/100)+1)**(int(year)-2010))
    #enr = latest[0][0]*(((predictions[(predictions['INDICATOR'] == "ENR")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0]/100)+1)**(int(year)-2010))
    enr = latest[0][0]+((latest[0][0]*((predictions[(predictions['INDICATOR'] == "ENR")& (predictions['TYPE'] == OECD)][thisSSP].iloc[0]/100)))*(int(year)-2010))
    gdp = (getGDPInfo(country, thisCountry[(thisCountry['VARIABLE'] == "GDP|PPP") & (thisCountry['MODEL'] == "OECD Env-Growth") & (thisCountry['SSP'] == thisSSP)][year].iloc[0]))/population
    ind = industryProjector(country, year, income, thisSSP)
    rate = predictions[(predictions['INDICATOR'] == "HDI")& (predictions['TYPE'] == income)][thisSSP].iloc[0]
    hdio = getHDI(hdi_data, country)
    hdi = HDIProjector(hdio.iloc[[20]].iloc[0].iloc[0], year, rate)
    pv =getPower(pv_data, country)
    raw_hyd=getPower(hyd_data, country)                   
    area = getArea(country)
    hyd = raw_hyd/area
    
    den = population/area
    
    thisInputData = [[enr, fdi, gdp, ind, rnd, den, trade, urbanisation, pv, hyd.iloc[0],  hdi]]
    
    del code, den, income, OECD, latest, CO2, thisCountry, population, urbanisation, trade, fdi, rnd, enr, gdp, ind, rate, hdio, hdi, pv, hyd
    
    return thisInputData

def SSP_Projection(country, yearRange):
    if not os.path.exists('Projections/Grouped/{}/'.format(country)):
        os.makedirs('Projections/Grouped/{}/'.format(country))
    
    newNP = np.zeros((1,1))
    countrySummary = pd.DataFrame(data=newNP[1:,1:])
    
    for SSP in SSPS:
        start = timer()

        newNP = np.zeros((1,1))
        SSPResults = pd.DataFrame(data=newNP[1:,1:])
        
        for year in yearRange:
            
            thisResults = predictAll(createInputData(country, year, SSP))
            
            SSPResults = pd.concat([SSPResults, thisResults], axis=1)

            del thisResults
            
            
        SSPResults.columns = yearRange
        characteristics = SSPResults.describe()

        SSPStd = characteristics.iloc[2]
        ConfInt = (SSPStd/10)*1.96
        lowerInt = characteristics.iloc[1] - ConfInt
        upperInt = characteristics.iloc[1] + ConfInt
        
        plotting = pd.concat([lowerInt,characteristics.iloc[1],upperInt], axis=1)
        plotting.columns = [SSP+" lower", SSP+" mean", SSP+" upper"]
        convertedPlotting = plotting.T
        countrySummary = pd.concat([countrySummary, convertedPlotting])
        
        SSPResults = pd.concat([SSPResults, characteristics])
        SSPResults.to_csv('Projections/Grouped/{}/{}.csv'.format(country, SSP))
        
        end = timer()
        secondsDuration = (end - start)
        duration = time.strftime('%H:%M:%S', time.gmtime(secondsDuration))
        print(country + " " + SSP +  ": " + duration)

        del start, end, duration, convertedPlotting, plotting, newNP, SSPResults, SSPStd, ConfInt, lowerInt, upperInt, characteristics
        
    countrySummary.to_csv('Projections/Grouped/{}/Summary.csv'.format(country))
    
    del countrySummary

In [5]:
for x, country in enumerate(IMFEmerging):
    startStamp = str(datetime.now().strftime('%H:%M:%S'))
    print(startStamp + ": " + str(x+1) + ". " + country)
    SSP_Projection(country, allYears)
    print("\n")

17:21:04: 1. China
China SSP1: 00:00:55
China SSP2: 00:00:53
China SSP3: 00:00:53
China SSP4: 00:00:53
China SSP5: 00:00:53


17:25:34: 2. India
India SSP1: 00:00:54
India SSP2: 00:00:53
India SSP3: 00:00:52
India SSP4: 00:00:52
India SSP5: 00:00:52


17:30:00: 3. Russian Federation
Russian Federation SSP1: 00:00:53
Russian Federation SSP2: 00:00:53
Russian Federation SSP3: 00:00:53
Russian Federation SSP4: 00:00:54
Russian Federation SSP5: 00:00:53


17:34:28: 4. Brazil
Brazil SSP1: 00:00:54
Brazil SSP2: 00:00:54
Brazil SSP3: 00:00:56
Brazil SSP4: 00:00:53
Brazil SSP5: 00:00:53


17:39:01: 5. Saudi Arabia
Saudi Arabia SSP1: 00:00:53
Saudi Arabia SSP2: 00:00:53
Saudi Arabia SSP3: 00:00:53
Saudi Arabia SSP4: 00:00:53
Saudi Arabia SSP5: 00:00:53


17:43:28: 6. Pakistan
Pakistan SSP1: 00:00:53
Pakistan SSP2: 00:00:53
Pakistan SSP3: 00:00:53
Pakistan SSP4: 00:00:53
Pakistan SSP5: 00:00:53


17:47:56: 7. Mexico
Mexico SSP1: 00:00:53
Mexico SSP2: 00:00:54
Mexico SSP3: 00:00:52
Mexico SSP4: 0

In [None]:
UK2019 = [[2630.53, 0.0790593814371999, 42328.9002575769, 17.4206873962023, 105674, 66836327, 64.2877735623301, 83.652]]
UK2018 = [[2757.69, 2.83706174602294, 43043.22782, 17.5188312851163, 94953, 66460344, 62.6190596109507, 83.398]]
UK2015 = [[2764.516671, 1.547962, 44974.831877, 18.141621, 57891.0, 65116219.0, 56.683096, 82.626]]
USA2017 = [[7547.57012, 1.820076, 59957.725851, 18.20794, 448211.0, 324985539.0, 27.14232, 82.058]]

predictionSet = predictAll("United Kingdom", UK2018)
predictionSet.to_csv('Projections/History_Check/Adjusted_Input/UK2018.csv')

In [None]:
batch1 = ["China",
"India",
"Russian Federation",
"Brazil",
"Saudi Arabia",
"Pakistan",
"Mexico"]

batch2 = [
    "Iran, Islamic Rep.",
"Turkey",
"Indonesia",
"South Africa",
"Vietnam",
"Egypt, Arab Rep.",
"Thailand"
]

batch3 = [
    "Argentina",
"Nigeria",
"Poland",
"Malaysia",
"Venezuela, RB",
"Congo, Dem. Rep."
]