In [109]:
#importing CSV File and Reading it
import pandas as pd
import csv
import pyspark
from pyspark import SparkContext, SparkConf
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot,iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
from sklearn.utils import shuffle
from sklearn.metrics import mean_absolute_error
from scipy.stats import pearsonr, zscore

In [3]:
#Starting the Spark Session
conf=SparkConf().setAppName("CSE545 Project").set("spark.driver.memory", "12g").set("spark.executor.memory", "6g").set("spark.driver.maxResultSize", "6g")
sc=SparkContext(conf=conf)

In [4]:
#Loading GHI Dataset
GHI_rdd1 = sc.textFile("002_AppendixD.tsv").map(lambda line: line.split("\t"))

In [5]:
#Deleting records with more than 1 missing values
def count_x(x):
    cnt = 0
    for i in range(0,len(x)):
        if x[i] == " -":
            cnt += 1
    if cnt > 1:
        return False
    return True

In [6]:
GHI_rdd1 = GHI_rdd1.filter(lambda line: len(line)>1).map(lambda x: [w.replace("\"","") for w in x]).filter(lambda x: count_x(x))

In [7]:
#Converting Strings to floats wherever necessary
def conv_x(x):
    if x[0] != "Country":
        x[1] = float(x[1])
        x[2] = float(x[2])
        x[3] = float(x[3])
        x[4] = float(x[4])
    
    return x

In [8]:
#Transforming <5 values into integer values
def rep_5(x):
    a = []
    cnt = 0
    for i in range(0,len(x)):
        if x[i] == "<5":
            x[i] = 4 - cnt
            x[i] = str(x[i])
            cnt += 1
        a.append(x[i])
        
    return a

In [9]:
#Filling in Missing Values based on the future differences
def fill_x(x):
    if x[1] == " -":
        s = float(x[2]) - float(x[3])
        d = float(x[3]) - float(x[4])
        j = (s + d)/2
        x[1] = str(round(j + float(x[2]),1))
        
    return x

In [27]:
GHI_rdd2 = GHI_rdd1.map(lambda x: rep_5(x)).map(lambda x: fill_x(x)).map(lambda x: conv_x(x))
#print(GHI_rdd2.collect())
#Countries whose records are there in the current dataset
GHI_rdd2_unique_val = GHI_rdd2.map(lambda x: x[0]).filter(lambda x: x!="Country").distinct().collect()

In [28]:
#Loading other values dataset
GHI_rdd3 = sc.textFile("001_AppendixC.tsv").map(lambda line: line.split("\t")).filter(lambda line: len(line)>1).map(lambda x: [w.replace("\"","") for w in x])

In [29]:
#Filtering values which have corresponding record in GHI dataset
GHI_rdd3 = GHI_rdd3.filter(lambda x: x[0] in GHI_rdd2_unique_val)

In [30]:
#Formatting dataset
GHI_rdd3 = GHI_rdd3.map(lambda x: [x[0],[x[1],x[5],x[9],x[13]],[x[2],x[6],x[10],x[14]],[x[3],x[7],x[11],x[15]],[x[4],x[8],x[12],x[16]]])

In [31]:
#CHeking for the number ofmissing values
def cnt(x):
    a = []
    for i in range(0,len(x)):
        c = 0
        for j in range(0,len(x[i])):
            if x[i][j] == "-":
                c += 1
        a.append(c)
    return a

In [32]:
#Removing the records with at least 2 missing values
def mor_2(x):
    for i in range(0,len(x[1])):
        if x[1][i] == 2:
            return x[0]

In [33]:
GHI_rdd3_toRemove = GHI_rdd3.map(lambda x: (x[0],cnt(x[1:5]))).map(lambda x: mor_2(x)).filter(lambda x: x!=None).collect()

In [34]:
#Updating GHI and values datasets for the final filtered results
GHI_rdd4 = GHI_rdd3.filter(lambda x: x[0] not in GHI_rdd3_toRemove)
GHI_rdd2 = GHI_rdd2.filter(lambda x: x[0] not in GHI_rdd3_toRemove)

In [35]:
#Filling in missing values according to the GHI value
def cal_x0(x,ghi_rdd):
    if x[0] != "Country":
        for j in range(0,len(ghi_rdd.value)):
            if x[0] == ghi_rdd.value[j][0]:
                ghis = ghi_rdd.value[j]
        for i in range(1,len(x)):
            ghi = ghis[i]
            if x[i][0] == "-":
                x[i][1] = round(float(x[i][1]),1)
                x[i][2] = round(float(x[i][2]),1)
                x[i][3] = round(float(x[i][3]),1)
                x[i][0] = round(3*(ghi - (x[i][3]/3) - (x[i][2]/6) - (x[i][1]/6)),1)
            else:
                x[i][0] = round(float(x[i][0]),1)
                x[i][1] = round(float(x[i][1]),1)
                x[i][2] = round(float(x[i][2]),1)
                x[i][3] = round(float(x[i][3]),1)
    return x

In [36]:
def conv_listx(x):
    a = [x[0]]
    for i in range(1,5):
        for j in range(0,4):
            a.append(x[i][j])
    return a

In [37]:
#Call to fill in the missing values
GHI_rdd2_broadcast=sc.broadcast(GHI_rdd2.collect())
GHI_rdd4 = GHI_rdd4.map(lambda x: cal_x0(x,GHI_rdd2_broadcast)).map(lambda x: conv_listx(x))

In [38]:
# #Transforming rdd to pandas dataframe for applying ML and Visulaization techniques
# headers = GHI_rdd2.collect()[0]
# GHI_rdd2=GHI_rdd2.filter(lambda x: x[0]!='Country')

# df0 = pd.DataFrame(GHI_rdd2.collect(), columns=headers)

# headers = GHI_rdd4.collect()[0]
# GHI_rdd4=GHI_rdd4.filter(lambda x: x[0]!='Country')

# df1 = pd.DataFrame(GHI_rdd4.collect(), columns=headers)

In [39]:
def calc_beta(betas, X_test, y_test):
    y_pred = np.matmul(X_test, betas)[:,0]
    print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))

In [40]:
def RidgeRegression(X, penalty_value = 1.0, learning_rate = 0.00000000001, n_epochs = 100):

    #Dividing into training and test data
    offset = int(int(X.shape[0]) * 0.9)
    X_test, Y_test = X[offset:,:3], X[offset:,3:]
    X_tf, Y_tf = X[:offset,:3], X[:offset,3:]

    # Conversion to tensors
    X_tf = tf.constant(X_tf, dtype=tf.float32, name="GHI_X")
    Y_tf = tf.constant(Y_tf.reshape(-1,1), dtype=tf.float32, name="GHI_Y")
    Xt_tf = tf.transpose(X_tf)
    penalty = tf.constant(1.0, dtype=tf.float32, name="penalty")
    I = tf.constant(np.identity(int(X_tf.shape[1])), dtype=tf.float32, name="I")
    beta = tf.Variable(tf.random_uniform([int(X_tf.shape[1]), 1], -1., 1.), name = "beta")
    y_pred = tf.matmul(X_tf, beta, name="predictions")
    penalizedCost = tf.reduce_sum(tf.square(Y_tf - y_pred)) + penalty * tf.reduce_sum(tf.square(beta))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    training_op = optimizer.minimize(penalizedCost)
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(n_epochs):
            if epoch %10 == 0: #print debugging output
                print("Epoch", epoch, "; penalizedCost =", penalizedCost.eval())
            sess.run(training_op)
        #done training, get final beta: 
        best_beta = beta.eval()
    print(best_beta)
    calc_beta(best_beta, X_test, Y_test)
    return best_beta

In [45]:
#Transforming rdd to tensors for applying Machine Learning Models
X_ghi=GHI_rdd2.map(lambda x: x[1:]).filter(lambda x: x[0]!="GHI1992")
X_ghi=np.array(X_ghi.collect())
print(X_ghi)

best_beta_val = RidgeRegression(X_ghi, 1)

[[ 50.2  52.7  37.9  33.3]
 [ 20.8  21.6  16.5  11.1]
 [ 17.5  15.6  11.3   9.5]
 [ 65.8  57.5  39.7  32.5]
 [  7.    6.6   5.8   5.4]
 [ 23.8  18.4  11.4   7.7]
 [ 36.5  27.5  15.3   9.6]
 [ 53.6  37.6  32.2  26.5]
 [  5.    4.    3.    2. ]
 [ 44.5  37.5  31.7  24.4]
 [ 36.7  30.3  23.9  17.2]
 [ 33.8  33.   30.7  24.4]
 [ 15.9  11.7   5.4   5.4]
 [  7.9   8.2   7.6   5.4]
 [ 47.   47.9  36.4  27.6]
 [ 45.8  43.6  27.1  22.2]
 [ 40.   39.6  29.5  22.1]
 [ 52.2  50.9  47.   50.9]
 [ 62.5  51.9  50.9  43.5]
 [  5.9   4.    3.    2. ]
 [ 25.9  15.8  11.2   7.5]
 [ 14.6  11.3   9.4   8. ]
 [ 39.1  36.   31.6  25.6]
 [  7.5   6.2   5.    5.3]
 [ 32.9  32.6  35.1  26.5]
 [  7.8   6.2   4.    3. ]
 [ 10.5   5.3   4.    3. ]
 [ 60.3  46.7  35.1  31.4]
 [ 23.8  18.4  15.4  11.6]
 [ 22.3  20.5  16.4  14.4]
 [ 20.1  16.4  16.6  14.7]
 [ 19.5  16.2  12.7  11.1]
 [  7.8   6.2   4.    3. ]
 [ 67.8  56.   40.2  32.3]
 [ 11.5   9.8   9.1   8.1]
 [ 24.2  20.7  17.4  13.8]
 [ 35.2  27.5  23.8  23.2]
 

In [46]:
sess = tf.InteractiveSession()
best_beta_val = np.array(best_beta_val, dtype=np.float64)
X_ghi_2024 = tf.matmul(X_ghi[:,1:], best_beta_val, name="predictions")
s = X_ghi_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_ghi_df = pd.DataFrame(X_ghi)
X_ghi_df = X_ghi_df.rename(index=str, columns={0: "GHI1992", 1: "GHI2000",2: "GHI2008",3: "GHI2015"})
X_ghi_df['GHI2024'] = s

[[ 15.56972377]
 [  3.00524869]
 [  4.18974853]
 [ 15.00397584]
 [  2.06458993]
 [  3.54366131]
 [  5.22106035]
 [  8.56349964]
 [  0.5592742 ]
 [  7.03157489]
 [  5.00707815]
 [  6.1665905 ]
 [  4.57285545]
 [  0.93276687]
 [  9.68070557]
 [ 12.05524707]
 [  7.91432538]
 [ 22.42514894]
 [ 11.94433877]
 [  0.5592742 ]
 [  2.4780313 ]
 [  2.88376272]
 [  7.64719162]
 [  2.60161195]
 [  3.60537758]
 [  1.41988797]
 [  1.04759328]
 [ 14.07853515]
 [  3.25119677]
 [  5.77513292]
 [  4.16394072]
 [  4.55813894]
 [  1.41988797]
 [ 13.69583035]
 [  2.6569232 ]
 [  4.29314654]
 [  9.63077176]
 [  5.10223253]
 [  5.59798026]
 [  8.8161653 ]
 [ 12.00394487]
 [ 15.51872656]
 [  3.4387871 ]
 [  7.60196183]
 [  5.12390065]
 [ 10.06417413]
 [  3.18279054]
 [  4.15390515]
 [  7.03966706]
 [  3.47782679]
 [  4.12875631]
 [ -0.69447896]
 [  5.94560089]
 [  0.5592742 ]
 [  3.60372083]
 [ 12.65819995]
 [  1.62671835]
 [  3.22283473]
 [  8.25713272]
 [ 14.60585957]
 [  1.29578974]
 [  1.82701843]
 [ 17.63

In [47]:
X_ghi_2024 = np.array(X_ghi_df)
sess = tf.InteractiveSession()
best_beta_val = np.array(best_beta_val, dtype=np.float64)
X_ghi_2032 = tf.matmul(X_ghi_2024[:,2:], best_beta_val, name="predictions")
s = X_ghi_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_ghi_2024 = pd.DataFrame(X_ghi_2024)
X_ghi_2024 = X_ghi_2024.rename(index=str, columns={0: "GHI1992", 1: "GHI2000",2: "GHI2008",3: "GHI2015",4: "GHI2024"})
X_ghi_2024['GHI2032'] = s
X_ghi_2024

[[ -2.76004879e+00]
 [ -1.38950763e+00]
 [ -8.24381573e-01]
 [ -1.75405927e+00]
 [ -1.02654030e+00]
 [  3.98788446e-01]
 [  1.70731556e+00]
 [ -4.97702817e+00]
 [ -2.22335654e-01]
 [ -4.54203957e+00]
 [ -2.51117029e+00]
 [ -5.77488306e+00]
 [  1.18344609e+00]
 [ -1.35384299e+00]
 [ -3.27774200e+00]
 [  5.05061237e-01]
 [ -2.32416035e+00]
 [ -1.00415186e+01]
 [ -1.09800649e+01]
 [ -2.22335654e-01]
 [ -4.93849880e-01]
 [ -1.35243619e+00]
 [ -5.19617321e+00]
 [ -7.49233771e-01]
 [ -8.47299719e+00]
 [  9.87959875e-03]
 [ -3.42701766e-01]
 [ -3.43719865e+00]
 [ -2.10985408e+00]
 [ -2.09607108e+00]
 [ -3.83816383e+00]
 [ -1.49075515e+00]
 [  9.87959875e-03]
 [ -2.58680915e+00]
 [ -1.79101150e+00]
 [ -2.48803082e+00]
 [ -4.15260560e+00]
 [  5.92483610e-01]
 [ -1.78238943e+00]
 [ -3.09471254e+00]
 [ -3.31499155e+00]
 [ -2.80662020e+00]
 [ -3.36296659e+00]
 [ -9.25854545e+00]
 [ -2.36497468e+00]
 [ -7.03216542e+00]
 [ -7.20190037e+00]
 [ -4.05158051e-02]
 [ -5.52160691e+00]
 [ -1.53441770e+00]


Unnamed: 0,GHI1992,GHI2000,GHI2008,GHI2015,GHI2024,GHI2032
0,50.2,52.7,37.9,33.3,15.569724,-2.760049
1,20.8,21.6,16.5,11.1,3.005249,-1.389508
2,17.5,15.6,11.3,9.5,4.189749,-0.824382
3,65.8,57.5,39.7,32.5,15.003976,-1.754059
4,7.0,6.6,5.8,5.4,2.064590,-1.026540
5,23.8,18.4,11.4,7.7,3.543661,0.398788
6,36.5,27.5,15.3,9.6,5.221060,1.707316
7,53.6,37.6,32.2,26.5,8.563500,-4.977028
8,5.0,4.0,3.0,2.0,0.559274,-0.222336
9,44.5,37.5,31.7,24.4,7.031575,-4.542040


In [48]:
X_ghi_2032 = np.array(X_ghi_2024)
sess = tf.InteractiveSession()
best_beta_val = np.array(best_beta_val, dtype=np.float64)
X_ghi_2040 = tf.matmul(X_ghi_2032[:,3:], best_beta_val, name="predictions")
s = X_ghi_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_ghi_2032 = pd.DataFrame(X_ghi_2032)
X_ghi_2032 = X_ghi_2032.rename(index=str, columns={0: "GHI1992", 1: "GHI2000",2: "GHI2008",3: "GHI2015",4: "GHI2024",5: "GHI2032"})
X_ghi_2032['GHI2040'] = s
X_ghi_2032

[[ -4.35405759]
 [  0.28100508]
 [ -1.02599112]
 [ -3.16850308]
 [ -0.79575724]
 [  0.03164046]
 [  0.38532562]
 [ -2.28491359]
 [  0.05944813]
 [ -1.21509938]
 [ -0.25273505]
 [ -1.52071528]
 [ -1.20225064]
 [  0.02212032]
 [ -1.33386271]
 [ -2.35133489]
 [ -0.94572973]
 [-10.80089393]
 [ -4.30682053]
 [  0.05944813]
 [  0.16542458]
 [ -0.84517538]
 [ -1.9516585 ]
 [ -1.10963696]
 [ -0.65505372]
 [ -0.16456405]
 [ -0.12748835]
 [ -4.2953541 ]
 [ -0.43945259]
 [ -1.78322401]
 [ -1.70343784]
 [ -1.36231943]
 [ -0.16456405]
 [ -2.73633762]
 [ -0.99311857]
 [ -0.9258424 ]
 [ -3.9327504 ]
 [ -1.33801985]
 [ -0.56503201]
 [ -3.15327919]
 [ -3.27056163]
 [ -5.46422891]
 [ -0.94445583]
 [ -2.19637106]
 [ -1.43030918]
 [ -3.69969751]
 [ -0.89163265]
 [ -1.03386964]
 [ -2.77135269]
 [ -1.60949915]
 [ -1.4161737 ]
 [  1.26494581]
 [ -0.12742504]
 [  0.05944813]
 [ -0.03892787]
 [ -2.75264725]
 [ -0.18516167]
 [ -1.40205736]
 [ -2.47092832]
 [ -4.92643887]
 [ -0.15220549]
 [ -0.48403763]
 [ -7.64

Unnamed: 0,GHI1992,GHI2000,GHI2008,GHI2015,GHI2024,GHI2032,GHI2040
0,50.2,52.7,37.9,33.3,15.569724,-2.760049,-4.354058
1,20.8,21.6,16.5,11.1,3.005249,-1.389508,0.281005
2,17.5,15.6,11.3,9.5,4.189749,-0.824382,-1.025991
3,65.8,57.5,39.7,32.5,15.003976,-1.754059,-3.168503
4,7.0,6.6,5.8,5.4,2.064590,-1.026540,-0.795757
5,23.8,18.4,11.4,7.7,3.543661,0.398788,0.031640
6,36.5,27.5,15.3,9.6,5.221060,1.707316,0.385326
7,53.6,37.6,32.2,26.5,8.563500,-4.977028,-2.284914
8,5.0,4.0,3.0,2.0,0.559274,-0.222336,0.059448
9,44.5,37.5,31.7,24.4,7.031575,-4.542040,-1.215099


In [49]:
X_ghi_2032 = X_ghi_2032.round(2)
X_ghi_2032

Unnamed: 0,GHI1992,GHI2000,GHI2008,GHI2015,GHI2024,GHI2032,GHI2040
0,50.2,52.7,37.9,33.3,15.57,-2.76,-4.35
1,20.8,21.6,16.5,11.1,3.01,-1.39,0.28
2,17.5,15.6,11.3,9.5,4.19,-0.82,-1.03
3,65.8,57.5,39.7,32.5,15.00,-1.75,-3.17
4,7.0,6.6,5.8,5.4,2.06,-1.03,-0.80
5,23.8,18.4,11.4,7.7,3.54,0.40,0.03
6,36.5,27.5,15.3,9.6,5.22,1.71,0.39
7,53.6,37.6,32.2,26.5,8.56,-4.98,-2.28
8,5.0,4.0,3.0,2.0,0.56,-0.22,0.06
9,44.5,37.5,31.7,24.4,7.03,-4.54,-1.22


In [58]:
X_param1=GHI_rdd4.map(lambda x: [x[1],x[5],x[9],x[13]]).filter(lambda x: x[0]!="UN9193")
X_param1=np.array(X_param1.collect())
print(X_param1)

best_beta_val_UN1 = RidgeRegression(X_param1, 1)

[[  37.4   46.1   24.6   23. ]
 [   7.7    7.2    9.7    4.9]
 [   9.1   10.7    7.5    4.6]
 [  65.4   50.    23.7   14. ]
 [   4.7    3.5    4.1    3.6]
 [  52.7   23.8    5.2    4.4]
 [  81.7   23.2    2.6    1.2]
 [  36.1   20.8   16.4   15.1]
 [   8.2    2.3    1.6    1.1]
 [  28.    22.6   12.8   10.3]
 [  35.9   33.4   28.1   20.2]
 [  26.8   35.6   31.9   26. ]
 [  14.2   12.     2.6    2.3]
 [   1.9    4.8    5.8    3.4]
 [  22.7   25.4   22.6   20.2]
 [  30.9   29.2   18.7   15.3]
 [  36.5   30.6   14.5    7.9]
 [  47.7   42.6   34.7   58.6]
 [  55.7   40.1   40.7   32.5]
 [   7.4    4.7    4.1    3.7]
 [  23.3   15.9   13.5    9.6]
 [  12.6    9.7    9.5    7.1]
 [  43.7   32.5   33.    28.2]
 [   5.5    5.1    5.1    5.6]
 [  13.3   17.6   16.5   15.4]
 [  20.9   10.4    2.2    1.4]
 [  13.2    3.7    2.3    0.8]
 [  75.7   48.1   24.1   12.8]
 [  29.2   28.1   20.2   13.5]
 [  16.2   18.5   14.6   12.1]
 [   5.8    5.3    4.6    4.5]
 [  16.    11.2   11.6   12.3]
 [  13.5

In [59]:
sess = tf.InteractiveSession()
best_beta_val_UN1 = np.array(best_beta_val_UN1, dtype=np.float64)
X_param1_2024 = tf.matmul(X_param1[:,1:], best_beta_val_UN1, name="predictions")
s = X_param1_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param1_df = pd.DataFrame(X_param1)
X_param1_df = X_param1_df.rename(index=str, columns={0: "UN9193", 1: "UN9901",2: "UN0709",3: "UN1416"})
s[s < 0] = 0
X_param1_df['UN2325'] = s

[[ 30.61926592]
 [  5.4881523 ]
 [  6.378828  ]
 [ 24.32518115]
 [  3.71772164]
 [ 10.09429983]
 [  7.43767194]
 [ 17.38896054]
 [  1.46416328]
 [ 14.15396395]
 [ 24.45238343]
 [ 29.6434858 ]
 [  5.15857157]
 [  3.81242227]
 [ 22.52831842]
 [ 19.7996578 ]
 [ 14.3333372 ]
 [ 58.49157942]
 [ 35.74731366]
 [  4.1425752 ]
 [ 11.61914598]
 [  8.03783526]
 [ 30.51350238]
 [  5.76755412]
 [ 16.71947963]
 [  3.98217016]
 [  1.56905529]
 [ 22.76281444]
 [ 17.89658653]
 [ 14.36209325]
 [  4.94464813]
 [ 12.64177707]
 [  3.48789836]
 [ 36.37084247]
 [  4.90449455]
 [  7.82418819]
 [ 12.04155723]
 [  9.26268587]
 [ 10.41942947]
 [ 17.80741867]
 [ 20.80421249]
 [ 29.45286162]
 [  8.68358603]
 [ 51.06049974]
 [ 16.84995161]
 [ 15.87207885]
 [ 10.52456602]
 [  5.54362562]
 [ 29.20477738]
 [  8.51605887]
 [  5.55185413]
 [  3.09411514]
 [ 23.35981493]
 [  2.33885966]
 [  9.31854414]
 [ 23.36917454]
 [  2.43695183]
 [  4.76744701]
 [ 15.13329716]
 [ 44.02421353]
 [  2.12623068]
 [  5.22023281]
 [ 42.73

In [60]:
X_param1_df_2024 = np.array(X_param1_df)
sess = tf.InteractiveSession()
best_beta_val_UN1 = np.array(best_beta_val_UN1, dtype=np.float64)
X_param1_2032 = tf.matmul(X_param1_df_2024[:,2:], best_beta_val_UN1, name="predictions")
s = X_param1_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param1_df_2024 = pd.DataFrame(X_param1_df_2024)
X_param1_df_2024 = X_param1_df_2024.rename(index=str, columns={0: "UN9193", 1: "UN9901",2: "UN0709",3: "UN1416",4:"UN2325"})
s[s < 0] = 0
X_param1_df_2024['UN3133'] = s

[[ 30.91499067]
 [  6.99844156]
 [  7.12958355]
 [ 26.02062611]
 [  4.01862122]
 [  9.5683118 ]
 [  6.83118976]
 [ 18.11743164]
 [  1.59901208]
 [ 14.71914345]
 [ 26.9791623 ]
 [ 31.9919691 ]
 [  4.87009294]
 [  4.59447426]
 [ 23.81641708]
 [ 20.75900352]
 [ 15.50738958]
 [ 54.60611024]
 [ 39.14113219]
 [  4.36448059]
 [ 12.86176794]
 [  8.91367521]
 [ 32.88471111]
 [  5.87419044]
 [ 17.57135172]
 [  3.83871621]
 [  1.90484106]
 [ 24.91651477]
 [ 19.72456037]
 [ 15.28830343]
 [  5.12039112]
 [ 12.99394289]
 [  3.52870215]
 [ 38.44414094]
 [  4.99514865]
 [  8.94242151]
 [ 12.50118764]
 [  9.19504094]
 [  9.97305921]
 [ 18.28966821]
 [ 21.42138938]
 [ 29.37725154]
 [  9.87159542]
 [ 54.27815385]
 [ 17.46188029]
 [ 16.86979575]
 [ 13.06120289]
 [  6.06537342]
 [ 30.85647429]
 [  8.85582319]
 [  5.38687732]
 [  3.51939701]
 [ 25.43186147]
 [  2.27532228]
 [ 10.10645873]
 [ 24.99899783]
 [  2.47828103]
 [  4.71582251]
 [ 15.11738762]
 [ 44.37055577]
 [  2.13776889]
 [  5.24523376]
 [ 41.90

In [61]:
X_param1_df_2032 = np.array(X_param1_df_2024)
sess = tf.InteractiveSession()
best_beta_val_UN1 = np.array(best_beta_val_UN1, dtype=np.float64)
X_param1_2040 = tf.matmul(X_param1_df_2032[:,3:], best_beta_val_UN1, name="predictions")
s = X_param1_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param1_df_2032 = pd.DataFrame(X_param1_df_2032)
X_param1_df_2032 = X_param1_df_2032.rename(index=str, columns={0: "UN9193", 1: "UN9901",2: "UN0709",3: "UN1416",4:"UN2325",5:"UN3133"})
s[s < 0] = 0
X_param1_df_2032['UN3941'] = s

[[ 30.21192968]
 [  6.84517947]
 [  6.81089099]
 [ 23.99545348]
 [  4.11807391]
 [  8.5361743 ]
 [  5.52622169]
 [ 18.2031925 ]
 [  1.54483604]
 [ 14.22642539]
 [ 26.54874261]
 [ 32.02291919]
 [  4.36064603]
 [  4.53250875]
 [ 24.05028959]
 [ 20.29498649]
 [ 14.18465743]
 [ 58.20100156]
 [ 39.40920146]
 [  4.40592511]
 [ 12.65050086]
 [  8.895531  ]
 [ 33.33388955]
 [  6.09426243]
 [ 17.87908709]
 [  3.32492865]
 [  1.70619331]
 [ 22.8388806 ]
 [ 19.04724326]
 [ 15.19785564]
 [  5.20890162]
 [ 13.46335561]
 [  3.41321509]
 [ 37.73663485]
 [  5.13612949]
 [  8.90468291]
 [ 12.69460041]
 [  9.02121126]
 [  9.76261288]
 [ 18.46145059]
 [ 21.40623288]
 [ 30.52226701]
 [ 10.04709504]
 [ 55.04700143]
 [ 17.60891012]
 [ 17.09562116]
 [ 12.40223089]
 [  6.23878403]
 [ 31.62251808]
 [  9.1870697 ]
 [  5.30517052]
 [  3.2884779 ]
 [ 25.02277513]
 [  2.39108363]
 [  9.60245769]
 [ 24.09332957]
 [  2.26811561]
 [  5.14129757]
 [ 15.6901309 ]
 [ 46.14581417]
 [  2.11979194]
 [  5.1236583 ]
 [ 44.04

In [62]:
X_param1_df_2032 = X_param1_df_2032.round(2)
X_param1_df_2032

Unnamed: 0,UN9193,UN9901,UN0709,UN1416,UN2325,UN3133,UN3941
0,37.4,46.1,24.6,23.0,30.62,30.91,30.21
1,7.7,7.2,9.7,4.9,5.49,7.00,6.85
2,9.1,10.7,7.5,4.6,6.38,7.13,6.81
3,65.4,50.0,23.7,14.0,24.33,26.02,24.00
4,4.7,3.5,4.1,3.6,3.72,4.02,4.12
5,52.7,23.8,5.2,4.4,10.09,9.57,8.54
6,81.7,23.2,2.6,1.2,7.44,6.83,5.53
7,36.1,20.8,16.4,15.1,17.39,18.12,18.20
8,8.2,2.3,1.6,1.1,1.46,1.60,1.54
9,28.0,22.6,12.8,10.3,14.15,14.72,14.23


In [69]:
X_param2=GHI_rdd4.map(lambda x: [x[2],x[6],x[10],x[14]]).filter(lambda x: x[0]!="stu9094")
X_param2=np.array(X_param2.collect())
print(X_param2)

best_beta_val_STU = RidgeRegression(X_param2, 1)

[[ 51.3  54.7  51.3  40.9]
 [ 37.2  39.2  23.1  17.9]
 [ 22.9  23.6  13.2  11.7]
 [ 52.9  47.1  29.2  37.6]
 [  7.1   9.9   8.2   7.5]
 [ 23.5  17.7  20.8   9.4]
 [ 30.   24.1  26.8  18. ]
 [ 71.5  54.   43.2  36.1]
 [  7.5   6.    4.2   2.8]
 [ 43.7  39.1  44.7  34. ]
 [ 37.1  33.1  27.2  18.1]
 [ 41.   29.1  31.4  23.5]
 [ 13.3  10.    7.1   7.9]
 [ 12.8  10.2   8.1   6.2]
 [ 40.7  45.5  35.1  27.3]
 [ 58.5  49.2  39.5  32.4]
 [ 36.3  38.2  36.4  31.7]
 [ 42.6  44.6  45.1  42.6]
 [ 47.2  39.3  38.7  39.9]
 [  4.2   3.    2.    1.8]
 [ 38.   17.8   9.8   6.3]
 [ 22.   18.1  12.7  12.2]
 [ 32.2  28.9  28.8  21.2]
 [ 10.7   7.9   5.6   5.2]
 [ 34.2  31.5  39.   29.6]
 [  1.3   1.4   1.3   1. ]
 [  8.6   7.    6.1   4.4]
 [ 34.3  26.5  33.   33.5]
 [ 21.2   8.   10.1   7.1]
 [ 38.1  32.5  26.3  25.2]
 [ 33.1  24.6  30.7  22.3]
 [ 29.5  32.3  20.6  13.6]
 [ 11.4   5.4   4.4   3.8]
 [ 66.9  57.4  46.4  38.4]
 [  4.3   5.8   6.4   4.9]
 [ 30.3  26.3  19.2  17.5]
 [ 33.9  24.1  25.5  25. ]
 

In [70]:
sess = tf.InteractiveSession()
best_beta_val_STU = np.array(best_beta_val_STU, dtype=np.float64)
X_param2_2024 = tf.matmul(X_param2[:,1:], best_beta_val_STU, name="predictions")
s = X_param2_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param2_df = pd.DataFrame(X_param2)
X_param2_df = X_param2_df.rename(index=str, columns={0: "stu9094", 1: "stu9802",2: "stu0610",3: "stu1216"})
s[s < 0] = 0
X_param2_df['stu2226'] = s

[[ 12.56869098]
 [ -8.21544758]
 [ -4.38424437]
 [  6.23750887]
 [  1.87066269]
 [  2.31435146]
 [  7.44515991]
 [  4.88691971]
 [ -0.8948024 ]
 [ 17.24260598]
 [ -0.54006388]
 [ 10.24369574]
 [  1.6578935 ]
 [  0.30154122]
 [  0.52878689]
 [  4.0454156 ]
 [ 12.05219038]
 [ 20.6297175 ]
 [ 19.96797069]
 [ -0.10937948]
 [ -5.80140109]
 [  0.91687609]
 [  7.02653499]
 [  0.30645938]
 [ 17.38259313]
 [  0.27080864]
 [  0.59458264]
 [ 22.96357965]
 [  4.10355726]
 [  6.4123974 ]
 [ 12.90306424]
 [ -7.15362599]
 [  0.70776968]
 [  5.44086542]
 [  2.31413939]
 [  1.45688593]
 [ 13.56707197]
 [  0.28734242]
 [  2.42729347]
 [ 20.74502671]
 [  7.538761  ]
 [  6.05253271]
 [  7.5192005 ]
 [  8.66244074]
 [  3.82854809]
 [  9.06789633]
 [ 14.40447458]
 [ -8.19721531]
 [  8.13262602]
 [  1.79657514]
 [  0.27604662]
 [  2.94564849]
 [  3.45934391]
 [  3.37044891]
 [ -1.03038011]
 [ 19.62295561]
 [  0.66179572]
 [  3.82597003]
 [  2.47107406]
 [  7.29298911]
 [  0.15217756]
 [  3.22734594]
 [ 16.81

In [71]:
X_param2_df_2024 = np.array(X_param2_df)
sess = tf.InteractiveSession()
best_beta_val_STU = np.array(best_beta_val_STU, dtype=np.float64)
X_param2_2032 = tf.matmul(X_param2_df_2024[:,2:], best_beta_val_STU, name="predictions")
s = X_param2_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param2_df_2024 = pd.DataFrame(X_param2_df_2024)
X_param2_df_2024 = X_param2_df_2024.rename(index=str, columns={0: "stu9094", 1: "stu9802",2: "stu0610",3: "stu1216",4:"stu2226"})
s[s < 0] = 0
X_param2_df_2024['stu3034'] = s

[[-16.27446785]
 [-12.99757093]
 [ -6.75185663]
 [ -3.57102392]
 [ -2.29248856]
 [-12.56531681]
 [ -9.20359887]
 [-18.41208722]
 [ -2.57181287]
 [ -8.89445057]
 [-16.67084867]
 [ -8.2175251 ]
 [ -1.30297858]
 [ -4.30333933]
 [-19.19553716]
 [-17.52016397]
 [ -7.30981924]
 [ -2.06378871]
 [  1.93906066]
 [ -1.01049149]
 [ -6.10798734]
 [ -5.18320938]
 [ -9.97334026]
 [ -2.46180462]
 [ -5.54515021]
 [ -0.47492208]
 [ -3.01158836]
 [  7.11125064]
 [ -2.0777415 ]
 [ -6.43117203]
 [ -5.57301355]
 [-12.67532444]
 [ -1.61720158]
 [-19.76339644]
 [ -1.40718056]
 [ -8.20143633]
 [  1.07869949]
 [ -5.37382252]
 [-15.30550788]
 [ -2.82626994]
 [-14.07567395]
 [ -6.96069782]
 [ -4.63164821]
 [ -8.90841403]
 [-13.36445055]
 [-17.65965744]
 [ -6.29274445]
 [ -3.95013499]
 [ -7.16716588]
 [ -0.43479619]
 [ -3.77713494]
 [ -9.57152347]
 [-17.07128417]
 [  0.80046188]
 [-14.83321577]
 [ -4.77509532]
 [ -1.93682002]
 [ -3.0339724 ]
 [-20.95981426]
 [-14.44901849]
 [ -2.5179745 ]
 [ -1.40349364]
 [ -7.51

In [72]:
X_param2_df_2032 = np.array(X_param2_df_2024)
sess = tf.InteractiveSession()
best_beta_val_STU = np.array(best_beta_val_STU, dtype=np.float64)
X_param2_2040 = tf.matmul(X_param2_df_2032[:,3:], best_beta_val_STU, name="predictions")
s = X_param2_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param2_df_2032 = pd.DataFrame(X_param2_df_2032)
X_param2_df_2032 = X_param2_df_2032.rename(index=str, columns={0: "stu9094", 1: "stu9802",2: "stu0610",3: "stu1216",4:"stu2226",5:"stu3034"})
s[s < 0] = 0
X_param2_df_2032['stu3842'] = s

[[-31.79034064]
 [-16.43774343]
 [-10.74422336]
 [-31.66567761]
 [ -6.02876343]
 [ -7.56991647]
 [-13.11254427]
 [-30.90807939]
 [ -2.57126713]
 [-23.30886405]
 [-16.62140536]
 [-16.87883173]
 [ -6.49373984]
 [ -5.55512473]
 [-24.8271625 ]
 [-27.89655022]
 [-23.57894459]
 [-29.65177888]
 [-25.61495867]
 [ -1.65295744]
 [ -5.78535104]
 [-10.78256881]
 [-16.24326734]
 [ -4.6345578 ]
 [-19.20405293]
 [ -0.79401937]
 [ -3.76767302]
 [-13.39867379]
 [ -4.6366304 ]
 [-20.19837065]
 [-14.5563165 ]
 [-12.48901176]
 [ -3.16473889]
 [-32.76595279]
 [ -3.43762021]
 [-15.40176727]
 [-15.69567011]
 [ -9.32671113]
 [-16.15019169]
 [-33.18026437]
 [-27.39521626]
 [-22.56747737]
 [ -7.56870447]
 [-16.13526892]
 [-19.08847974]
 [-31.10129091]
 [-26.81539592]
 [ -6.61182976]
 [-16.8375875 ]
 [ -4.40980921]
 [ -7.03612123]
 [ -5.99454286]
 [-22.28835164]
 [ -2.18453772]
 [-11.84619498]
 [-31.21581389]
 [ -3.18583909]
 [ -9.81473511]
 [-29.35375767]
 [-26.13055118]
 [ -4.3380431 ]
 [ -4.94694533]
 [-35.35

In [81]:
X_param2_df_2032 = X_param2_df_2032.round(2)
X_param2_df_2032

Unnamed: 0,stu9094,stu9802,stu0610,stu1216,stu2226,stu3034,stu3842
0,51.3,54.7,51.3,40.9,12.57,0.00,0.0
1,37.2,39.2,23.1,17.9,0.00,0.00,0.0
2,22.9,23.6,13.2,11.7,0.00,0.00,0.0
3,52.9,47.1,29.2,37.6,6.24,0.00,0.0
4,7.1,9.9,8.2,7.5,1.87,0.00,0.0
5,23.5,17.7,20.8,9.4,2.31,0.00,0.0
6,30.0,24.1,26.8,18.0,7.45,0.00,0.0
7,71.5,54.0,43.2,36.1,4.89,0.00,0.0
8,7.5,6.0,4.2,2.8,0.00,0.00,0.0
9,43.7,39.1,44.7,34.0,17.24,0.00,0.0


In [88]:
X_param3=GHI_rdd4.map(lambda x: [x[3],x[7],x[11],x[15]]).filter(lambda x: x[0]!="wast9094")
X_param3=np.array(X_param3.collect())
print(X_param3)

best_beta_val_WAST = RidgeRegression(X_param3, 1)

[[ 11.5  13.4   8.9   9.5]
 [  9.4  12.2   9.4   6.2]
 [  7.1   3.1   4.1   4.1]
 [  7.9   8.7   8.2   4.9]
 [  1.6   1.6   1.2   1.6]
 [  5.    2.5   4.2   4.2]
 [  6.6   9.    6.8   3.1]
 [ 16.1  13.8  17.5  14.3]
 [  2.6   2.    1.9   1.8]
 [ 11.2   9.    8.4   4.5]
 [  3.6   1.6   1.4   1.6]
 [ 13.1   6.    7.2   6.8]
 [  2.9   2.2   1.6   1.9]
 [  3.4   3.2   3.4   2.8]
 [ 15.5  15.7  11.3   7.6]
 [ 13.9  16.9   8.9   9.6]
 [  4.5   6.2   7.3   5.2]
 [  9.7  10.5  12.2   7.2]
 [ 14.6  13.9  15.7  13. ]
 [  0.5   0.5   0.3   0.3]
 [  3.9   2.5   2.6   1.8]
 [  1.7   1.1   0.9   1.1]
 [  7.1   7.1   7.2   8.2]
 [  2.1   1.7   1.    1.4]
 [  8.3   6.9  14.    7.6]
 [  1.6   1.3   1.    1.2]
 [  3.3   2.4   2.4   2. ]
 [ 17.3  19.4  17.   21.5]
 [  2.2   1.5   2.3   2.4]
 [  2.8   3.2   2.7   2.3]
 [  4.3   6.9   7.9   9.5]
 [  1.4   1.5   1.6   2.1]
 [  4.6   2.7   2.5   2.5]
 [  9.2  12.4  10.8   9.9]
 [  9.8   7.2   6.3   5.3]
 [  4.    4.3   3.7   3.4]
 [ 10.8   8.9   8.5  11.1]
 

In [89]:
sess = tf.InteractiveSession()
best_beta_val_WAST = np.array(best_beta_val_WAST, dtype=np.float64)
X_param3_2024 = tf.matmul(X_param3[:,1:], best_beta_val_WAST, name="predictions")
s = X_param3_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param3_df = pd.DataFrame(X_param3)
X_param3_df = X_param3_df.rename(index=str, columns={0: "wast9094", 1: "wast9802",2: "wast0610",3: "wast1216"})
s[s < 0] = 0
X_param3_df['wast2226'] = s

[[ 10.21511807]
 [  6.53333864]
 [  3.14780498]
 [  4.41644113]
 [  1.57396536]
 [  2.93629279]
 [  3.56419556]
 [ 10.82158051]
 [  1.60988142]
 [  4.09785326]
 [  1.48444858]
 [  5.40806036]
 [  1.918769  ]
 [  2.34326339]
 [  8.42619827]
 [ 11.79786603]
 [  4.02030985]
 [  5.44740906]
 [ 10.50938122]
 [  0.34689126]
 [  1.50992656]
 [  1.04853239]
 [  7.12723083]
 [  1.52761121]
 [  3.46269355]
 [  1.1783864 ]
 [  1.73531427]
 [ 19.86243732]
 [  1.75311911]
 [  2.21021786]
 [  7.88910165]
 [  1.79861529]
 [  2.26492248]
 [  9.29508431]
 [  4.98387237]
 [  3.21399186]
 [  9.90230045]
 [  2.57061853]
 [  4.52615676]
 [  1.71137233]
 [  7.6432699 ]
 [  8.24299984]
 [  7.83292775]
 [  2.42153347]
 [  1.13522377]
 [ 17.0919035 ]
 [  7.77421596]
 [  4.20511997]
 [  4.95163968]
 [  3.01832263]
 [  2.4931356 ]
 [  1.64100466]
 [  3.59538407]
 [  2.7214689 ]
 [  3.49654348]
 [  9.91335762]
 [  2.75235176]
 [  3.47529564]
 [  3.61294854]
 [  4.66565061]
 [  3.01463926]
 [  1.8379688 ]
 [  7.77

In [90]:
X_param3_df_2024 = np.array(X_param3_df)
sess = tf.InteractiveSession()
best_beta_val_WAST = np.array(best_beta_val_WAST, dtype=np.float64)
X_param3_2032 = tf.matmul(X_param3_df_2024[:,2:], best_beta_val_WAST, name="predictions")
s = X_param3_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param3_df_2024 = pd.DataFrame(X_param3_df_2024)
X_param3_df_2024 = X_param3_df_2024.rename(index=str, columns={0: "wast9094", 1: "wast9802",2: "wast0610",3: "wast1216",4: "wast2226"})
s[s < 0] = 0
X_param3_df_2024['wast3034'] = s

[[  8.66477489]
 [  7.06839973]
 [  2.72448009]
 [  5.24843704]
 [  1.20100735]
 [  2.53357374]
 [  4.69589035]
 [ 10.7274529 ]
 [  1.44224857]
 [  5.22840604]
 [  1.2064365 ]
 [  4.85654682]
 [  1.54522443]
 [  2.28942261]
 [  8.94229892]
 [ 10.03294912]
 [  4.37649512]
 [  6.84617889]
 [ 10.26253508]
 [  0.30340994]
 [  1.65171342]
 [  0.82772848]
 [  5.7646475 ]
 [  1.16380184]
 [  5.66344678]
 [  0.94156265]
 [  1.67806063]
 [ 15.36234493]
 [  1.47225082]
 [  2.09574827]
 [  6.16161242]
 [  1.3484454 ]
 [  1.96972521]
 [  8.47516404]
 [  4.76520947]
 [  2.92619133]
 [  7.49870257]
 [  1.9928609 ]
 [  5.64925178]
 [  1.68382647]
 [  6.87373206]
 [  6.72129004]
 [  7.02959711]
 [  4.22937702]
 [  0.98419732]
 [ 14.39298092]
 [  7.21300076]
 [  3.71937545]
 [  4.5230812 ]
 [  2.24781691]
 [  1.83417444]
 [  2.16829669]
 [  4.40624862]
 [  1.98072544]
 [  2.42287933]
 [  9.10016849]
 [  2.14257   ]
 [  3.19377975]
 [  3.63623526]
 [  4.98691663]
 [  2.24870397]
 [  1.62916318]
 [  7.18

In [91]:
X_param3_df_2032 = np.array(X_param3_df_2024)
sess = tf.InteractiveSession()
best_beta_val_WAST = np.array(best_beta_val_WAST, dtype=np.float64)
X_param3_2040 = tf.matmul(X_param3_df_2032[:,3:], best_beta_val_WAST, name="predictions")
s = X_param3_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param3_df_2032 = pd.DataFrame(X_param3_df_2032)
X_param3_df_2032 = X_param3_df_2032.rename(index=str, columns={0: "wast9094", 1: "wast9802",2: "wast0610",3: "wast1216",4: "wast2226",5: "wast3034"})
s[s < 0] = 0
X_param3_df_2032['wast3842'] = s

[[  7.21671958]
 [  6.03139154]
 [  2.77276153]
 [  4.79946451]
 [  1.05040033]
 [  2.73967802]
 [  3.91957882]
 [ 10.8348442 ]
 [  1.33502452]
 [  4.75349437]
 [  1.09531326]
 [  4.81653266]
 [  1.33136962]
 [  2.1897618 ]
 [  7.45441467]
 [  7.77235865]
 [  4.32638882]
 [  6.74576546]
 [ 10.00482341]
 [  0.24360602]
 [  1.56675378]
 [  0.73899257]
 [  5.45511969]
 [  0.95259248]
 [  6.74894024]
 [  0.82516352]
 [  1.57473579]
 [ 13.99820559]
 [  1.55372156]
 [  1.86306223]
 [  6.0232126 ]
 [  1.29482372]
 [  1.81141701]
 [  7.62992777]
 [  4.28479314]
 [  2.62451126]
 [  6.99850139]
 [  1.56723348]
 [  5.02282675]
 [  1.03587897]
 [  6.04355507]
 [  4.87096536]
 [  5.50041848]
 [  4.91064183]
 [  0.96788429]
 [ 14.15953856]
 [  8.72004793]
 [  3.01699283]
 [  4.08307   ]
 [  1.93581593]
 [  1.54559438]
 [  2.52396369]
 [  4.03109068]
 [  1.87291882]
 [  1.79271014]
 [  6.41766829]
 [  1.87556385]
 [  2.91711746]
 [  2.82378231]
 [  4.75315628]
 [  1.85291488]
 [  1.74116275]
 [  6.56

In [92]:
X_param3_df_2032 = X_param3_df_2032.round(2)
X_param3_df_2032

Unnamed: 0,wast9094,wast9802,wast0610,wast1216,wast2226,wast3034,wast3842
0,11.5,13.4,8.9,9.5,10.22,8.66,7.22
1,9.4,12.2,9.4,6.2,6.53,7.07,6.03
2,7.1,3.1,4.1,4.1,3.15,2.72,2.77
3,7.9,8.7,8.2,4.9,4.42,5.25,4.80
4,1.6,1.6,1.2,1.6,1.57,1.20,1.05
5,5.0,2.5,4.2,4.2,2.94,2.53,2.74
6,6.6,9.0,6.8,3.1,3.56,4.70,3.92
7,16.1,13.8,17.5,14.3,10.82,10.73,10.83
8,2.6,2.0,1.9,1.8,1.61,1.44,1.34
9,11.2,9.0,8.4,4.5,4.10,5.23,4.75


In [97]:
X_param4=GHI_rdd4.map(lambda x: [x[4],x[8],x[12],x[16]]).filter(lambda x: x[0]!="UM1992")
X_param4=np.array(X_param4.collect())
print(X_param4)

best_beta_val_UM = RidgeRegression(X_param4, 1)

[[ 16.8  13.7  11.    9.1]
 [  3.7   2.6   1.8   1.4]
 [  4.5   4.    2.9   2.6]
 [ 22.6  21.7  19.2  15.7]
 [  2.6   2.    1.5   1.3]
 [  4.5   3.    2.    1.4]
 [  9.5   7.4   4.3   3.2]
 [ 13.2   8.8   5.6   3.8]
 [  1.7   1.4   0.7   0.5]
 [ 17.   14.5  11.6  10. ]
 [ 11.5   8.    5.2   3.8]
 [  5.9   8.3   6.2   4.4]
 [  5.5   3.2   1.8   1.6]
 [  2.3   2.1   1.4   1. ]
 [ 20.2  18.6  13.    8.9]
 [ 11.8  10.8   5.2   2.9]
 [ 14.3  15.   11.3   8.8]
 [ 17.6  17.5  15.8  13. ]
 [ 20.9  19.   16.8  13.9]
 [  1.6   1.1   0.9   0.8]
 [  5.2   3.7   1.9   1.1]
 [  3.3   2.5   2.    1.6]
 [  9.7  12.2   7.3   4.5]
 [  1.6   1.3   1.    1. ]
 [ 15.3  14.6  11.7   9.3]
 [  1.1   0.8   0.6   0.4]
 [  1.2   0.8   0.6   0.6]
 [ 11.5  10.1   8.1   6.5]
 [  5.6   4.1   3.5   3.1]
 [  5.2   3.4   2.7   2.2]
 [  7.8   4.7   3.1   2.4]
 [  5.3   3.2   2.2   1.7]
 [  1.9   1.1   0.6   0.3]
 [ 19.5  14.5   8.7   5.9]
 [  2.8   2.5   2.4   2.2]
 [  9.1   8.5   6.9   5.1]
 [ 15.9  11.9   8.7   6.9]
 

In [98]:
sess = tf.InteractiveSession()
best_beta_val_UM = np.array(best_beta_val_UM, dtype=np.float64)
X_param4_2024 = tf.matmul(X_param4[:,1:], best_beta_val_UM, name="predictions")
s = X_param4_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param4_df = pd.DataFrame(X_param4)
X_param4_df = X_param4_df.rename(index=str, columns={0: "UM1992", 1: "UM2000",2: "UM2008",3: "UM2015"})
s[s < 0] = 0
X_param4_df['UM2024'] = s

[[  2.90150591]
 [  0.84152755]
 [  0.95570118]
 [  3.30890841]
 [  0.46678391]
 [  1.13768041]
 [  3.12248662]
 [  3.60930753]
 [  0.68910671]
 [  2.82417395]
 [  3.0170018 ]
 [  2.63406893]
 [  1.21318558]
 [  0.78247916]
 [  6.76794995]
 [  6.02530982]
 [  4.14992647]
 [  2.36314989]
 [  2.79301877]
 [  0.18106832]
 [  1.96400502]
 [  0.57318461]
 [  5.64020724]
 [  0.19017342]
 [  3.37354514]
 [  0.27009636]
 [  0.13112503]
 [  2.28622431]
 [  0.56057761]
 [  0.76728118]
 [  1.64614006]
 [  1.05516649]
 [  0.59489671]
 [  6.31495352]
 [  0.0677372 ]
 [  2.16294557]
 [  3.40736962]
 [  1.80941446]
 [  2.51681466]
 [  1.50633118]
 [  5.25804352]
 [  5.85343485]
 [  0.36038817]
 [  2.30443452]
 [  1.16156719]
 [  2.948432  ]
 [  1.75254076]
 [  1.4021002 ]
 [  0.76511146]
 [  0.34000328]
 [  0.64526125]
 [  2.1963488 ]
 [  4.23719612]
 [  0.23752573]
 [  1.95840677]
 [  3.48336941]
 [  0.64743097]
 [  0.90274566]
 [  1.11396019]
 [  8.38962533]
 [  0.49676356]
 [  0.67916405]
 [  4.31

In [99]:
X_param4_df_2024 = np.array(X_param4_df)
sess = tf.InteractiveSession()
best_beta_val_UM = np.array(best_beta_val_UM, dtype=np.float64)
X_param4_2032 = tf.matmul(X_param4_df_2024[:,2:], best_beta_val_UM, name="predictions")
s = X_param4_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param4_df_2024 = pd.DataFrame(X_param4_df_2024)
X_param4_df_2024 = X_param4_df_2024.rename(index=str, columns={0: "UM1992", 1: "UM2000",2: "UM2008",3: "UM2015",4: "UM2024"})
s[s < 0] = 0
X_param4_df_2024['UM2032'] = s

[[  5.33127232e+00]
 [  6.37280109e-01]
 [  1.22849937e+00]
 [  1.05659858e+01]
 [  6.64491886e-01]
 [  6.01706112e-01]
 [  7.81562493e-01]
 [  1.41685524e+00]
 [  6.23710601e-03]
 [  5.69643611e+00]
 [  1.48800324e+00]
 [  2.47233549e+00]
 [  3.34764339e-01]
 [  4.26424122e-01]
 [  4.39101260e+00]
 [ -4.03139375e-01]
 [  4.78551376e+00]
 [  8.92718663e+00]
 [  9.28033812e+00]
 [  4.63060677e-01]
 [  8.82413985e-03]
 [  9.49683058e-01]
 [  1.29752080e+00]
 [  4.97572116e-01]
 [  5.55474028e+00]
 [  2.34417691e-01]
 [  2.86716130e-01]
 [  3.86623015e+00]
 [  1.90301619e+00]
 [  1.27774789e+00]
 [  9.63219827e-01]
 [  7.62851175e-01]
 [  3.08613676e-02]
 [  1.71027040e+00]
 [  1.50851599e+00]
 [  3.24049945e+00]
 [  3.50929083e+00]
 [  9.41090605e-02]
 [  3.68729809e+00]
 [  1.46032954e+00]
 [  4.56357028e+00]
 [  4.76772628e+00]
 [  2.46078656e+00]
 [  3.93525303e+00]
 [  1.04804235e+00]
 [  2.50578438e+00]
 [  1.24841100e+00]
 [  4.58810447e-01]
 [  1.99407581e+00]
 [  1.02660576e+00]


In [100]:
X_param4_df_2032 = np.array(X_param4_df_2024)
sess = tf.InteractiveSession()
best_beta_val_UM = np.array(best_beta_val_UM, dtype=np.float64)
X_param4_2040 = tf.matmul(X_param4_df_2032[:,3:], best_beta_val_UM, name="predictions")
s = X_param4_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param4_df_2032 = pd.DataFrame(X_param4_df_2032)
X_param4_df_2032 = X_param4_df_2032.rename(index=str, columns={0: "UM1992", 1: "UM2000",2: "UM2008",3: "UM2015",4: "UM2024",5: "UM2032"})
s[s < 0] = 0
X_param4_df_2032['UM2040'] = s

[[ 3.39787478]
 [ 0.56239192]
 [ 1.14756757]
 [ 5.28725052]
 [ 0.54132208]
 [ 0.52156218]
 [ 1.48917165]
 [ 1.45061382]
 [ 0.26866814]
 [ 3.9271985 ]
 [ 1.5322733 ]
 [ 1.44368876]
 [ 0.8605464 ]
 [ 0.38155641]
 [ 3.02523686]
 [ 1.13443865]
 [ 3.24546669]
 [ 4.33747698]
 [ 4.76288696]
 [ 0.31900187]
 [ 0.4953221 ]
 [ 0.57491974]
 [ 1.67976214]
 [ 0.46321589]
 [ 3.30832825]
 [ 0.1177515 ]
 [ 0.28238039]
 [ 2.33932427]
 [ 1.19185365]
 [ 0.81463099]
 [ 1.00887173]
 [ 0.68316724]
 [ 0.10219985]
 [ 2.43508487]
 [ 0.80911281]
 [ 1.6099346 ]
 [ 2.67961842]
 [ 0.55538228]
 [ 2.15730434]
 [ 1.11992061]
 [ 3.66505146]
 [ 3.30630733]
 [ 1.529431  ]
 [ 2.62775231]
 [ 0.7167649 ]
 [ 1.69128706]
 [ 1.04246939]
 [ 0.73253899]
 [ 1.16841496]
 [ 0.57308035]
 [ 0.67646463]
 [ 0.44358148]
 [ 1.89223041]
 [ 0.31597805]
 [ 0.61166672]
 [ 2.55765867]
 [ 0.32268066]
 [ 0.38944346]
 [ 2.41601121]
 [ 3.25502397]
 [ 0.1592362 ]
 [ 0.07087411]
 [ 2.19156933]
 [ 2.64802658]
 [ 0.27935656]
 [ 4.75343625]
 [ 2.84660

In [101]:
X_param4_df_2032 = X_param4_df_2032.round(2)
X_param4_df_2032

Unnamed: 0,UM1992,UM2000,UM2008,UM2015,UM2024,UM2032,UM2040
0,16.8,13.7,11.0,9.1,2.90,5.33,3.40
1,3.7,2.6,1.8,1.4,0.84,0.64,0.56
2,4.5,4.0,2.9,2.6,0.96,1.23,1.15
3,22.6,21.7,19.2,15.7,3.31,10.57,5.29
4,2.6,2.0,1.5,1.3,0.47,0.66,0.54
5,4.5,3.0,2.0,1.4,1.14,0.60,0.52
6,9.5,7.4,4.3,3.2,3.12,0.78,1.49
7,13.2,8.8,5.6,3.8,3.61,1.42,1.45
8,1.7,1.4,0.7,0.5,0.69,0.01,0.27
9,17.0,14.5,11.6,10.0,2.82,5.70,3.93


In [102]:
UM2024 = X_param4_df_2032['UM2024'].tolist()
UM2032 = X_param4_df_2032['UM2032'].tolist()
UM2040 = X_param4_df_2032['UM2040'].tolist()
wast2226 = X_param3_df_2032['wast2226'].tolist()
wast3034 = X_param3_df_2032['wast3034'].tolist()
wast3842 = X_param3_df_2032['wast3842'].tolist()
stu2226 = X_param2_df_2032['stu2226'].tolist()
stu3034 = X_param2_df_2032['stu3034'].tolist()
stu3842 = X_param2_df_2032['stu3842'].tolist()
UN2325 = X_param1_df_2032['UN2325'].tolist()
UN3133 = X_param1_df_2032['UN3133'].tolist()
UN3941 = X_param1_df_2032['UN3941'].tolist()
GHI2024_a = X_ghi_2032['GHI2024'].tolist()
GHI2032_a = X_ghi_2032['GHI2032'].tolist()
GHI2040_a = X_ghi_2032['GHI2040'].tolist()

In [103]:
GHI2024_b = []
GHI2032_b = []
GHI2040_b = []
for i in range(0,len(UM2024)):
    x = (UN2325[i]/3) + (wast2226[i]/6) + (stu2226[i]/6) + (UM2024[i]/3)
    y = (UN3133[i]/3) + (wast3034[i]/6) + (stu3034[i]/6) + (UM2032[i]/3)
    z = (UN3941[i]/3) + (wast3842[i]/6) + (stu3842[i]/6) + (UM2040[i]/3)
    GHI2024_b.append(x)
    GHI2032_b.append(y)
    GHI2040_b.append(z)
    
GHI2024_b = [ round(elem, 2) for elem in GHI2024_b ]
GHI2032_b = [ round(elem, 2) for elem in GHI2032_b ]
GHI2040_b = [ round(elem, 2) for elem in GHI2040_b ]

In [104]:
GHI2024 = []
GHI2032 = []
GHI2040 = []
for i in range(0,len(GHI2024_a)):
    x = (GHI2024_a[i] + GHI2024_b[i])/2
    y = (GHI2032_a[i] + GHI2032_b[i])/2
    z = (GHI2040_a[i] + GHI2040_b[i])/2
    GHI2024.append(x)
    GHI2032.append(y)
    GHI2040.append(z)
    
GHI2024 = [ round(elem, 2) for elem in GHI2024 ]
GHI2032 = [ round(elem, 2) for elem in GHI2032 ]
GHI2040 = [ round(elem, 2) for elem in GHI2040 ]

In [105]:
df1 = pd.DataFrame({'GHI2024': GHI2024,'GHI2032': GHI2032,'GHI2040': GHI2040})

In [106]:
countries = pd.DataFrame({'COUNTRY':GHI_rdd2_unique_val})
ccodes = pd.read_csv("ccodes.csv")
result = pd.merge(countries, ccodes, on='COUNTRY')
result = result.drop(['GDP (BILLIONS)'],axis=1)

In [107]:
result

Unnamed: 0,COUNTRY,CODE
0,Albania,ALB
1,Algeria,DZA
2,Angola,AGO
3,Argentina,ARG
4,Botswana,BWA
5,Brazil,BRA
6,Cambodia,KHM
7,China,CHN
8,Costa Rica,CRI
9,Ecuador,ECU


In [140]:
data = [ dict(
        type = 'choropleth',
        locations = result['CODE'],
        z = df1['GHI2024'],
        text = result["COUNTRY"],
        colorscale = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'GHI'),
      ) ]

layout = dict(
    title = 'Global Hunger Index - 2024',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )

In [139]:
data = [ dict(
        type = 'choropleth',
        locations = result['CODE'],
        z = df1['GHI2032'],
        text = result["COUNTRY"],
        colorscale = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'GHI'),
      ) ]

layout = dict(
    title = 'Global Hunger Index - 2032',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )

In [138]:
data = [ dict(
        type = 'choropleth',
        locations = result['CODE'],
        z = df1['GHI2040'],
        text = result["COUNTRY"],
        colorscale = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'GHI'),
      ) ]

layout = dict(
    title = 'Global Hunger Index - 2040',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )

In [137]:
data = [ dict(
        type = 'choropleth',
        locations = result['CODE'],
        z = X_ghi_2032['GHI1992'],
        text = result["COUNTRY"],
        colorscale = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'GHI'),
      ) ]

layout = dict(
    title = 'Global Hunger Index - 1992',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )

In [136]:
data = [ dict(
        type = 'choropleth',
        locations = result['CODE'],
        z = X_ghi_2032['GHI2000'],
        text = result["COUNTRY"],
        colorscale = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'GHI'),
      ) ]

layout = dict(
    title = 'Global Hunger Index - 2000',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )

In [135]:
data = [ dict(
        type = 'choropleth',
        locations = result['CODE'],
        z = X_ghi_2032['GHI2008'],
        text = result["COUNTRY"],
        colorscale = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'GHI'),
      ) ]

layout = dict(
    title = 'Global Hunger Index - 2008',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )

In [134]:
data = [ dict(
        type = 'choropleth',
        locations = result['CODE'],
        z = X_ghi_2032['GHI2015'],
        text = result["COUNTRY"],
        colorscale = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'GHI'),
      ) ]

layout = dict(
    title = 'Global Hunger Index - 2015',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
iplot( fig, validate=False, filename='d3-world-map' )