In [2]:
#importing CSV File and Reading it
import pandas as pd
import csv
import pyspark
from pyspark import SparkContext, SparkConf
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import plotly.plotly as py
from sklearn.utils import shuffle
from sklearn.metrics import mean_absolute_error
from scipy.stats import pearsonr, zscore

In [3]:
#Starting the Spark Session
conf=SparkConf().setAppName("CSE545 Project").set("spark.driver.memory", "12g").set("spark.executor.memory", "6g").set("spark.driver.maxResultSize", "6g")
sc=SparkContext(conf=conf)

In [4]:
#Loading GHI Dataset
GHI_rdd1 = sc.textFile("002_AppendixD.tsv").map(lambda line: line.split("\t"))

In [5]:
#Deleting records with more than 1 missing values
def count_x(x):
    cnt = 0
    for i in range(0,len(x)):
        if x[i] == " -":
            cnt += 1
    if cnt > 1:
        return False
    return True

In [6]:
GHI_rdd1 = GHI_rdd1.filter(lambda line: len(line)>1).map(lambda x: [w.replace("\"","") for w in x]).filter(lambda x: count_x(x))

In [7]:
#Converting Strings to floats wherever necessary
def conv_x(x):
    if x[0] != "Country":
        x[1] = float(x[1])
        x[2] = float(x[2])
        x[3] = float(x[3])
        x[4] = float(x[4])
    
    return x

In [8]:
#Transforming <5 values into integer values
def rep_5(x):
    a = []
    cnt = 0
    for i in range(0,len(x)):
        if x[i] == "<5":
            x[i] = 4 - cnt
            x[i] = str(x[i])
            cnt += 1
        a.append(x[i])
        
    return a

In [9]:
#Filling in Missing Values based on the future differences
def fill_x(x):
    if x[1] == " -":
        s = float(x[2]) - float(x[3])
        d = float(x[3]) - float(x[4])
        j = (s + d)/2
        x[1] = str(round(j + float(x[2]),1))
        
    return x

In [10]:
GHI_rdd2 = GHI_rdd1.map(lambda x: rep_5(x)).map(lambda x: fill_x(x)).map(lambda x: conv_x(x))
#print(GHI_rdd2.collect())
#Countries whose records are there in the current dataset
GHI_rdd2_unique_val = GHI_rdd2.map(lambda x: x[0]).distinct().collect()

In [11]:
#Loading other values dataset
GHI_rdd3 = sc.textFile("001_AppendixC.tsv").map(lambda line: line.split("\t")).filter(lambda line: len(line)>1).map(lambda x: [w.replace("\"","") for w in x])

In [12]:
#Filtering values which have corresponding record in GHI dataset
GHI_rdd3 = GHI_rdd3.filter(lambda x: x[0] in GHI_rdd2_unique_val)

In [13]:
#Formatting dataset
GHI_rdd3 = GHI_rdd3.map(lambda x: [x[0],[x[1],x[5],x[9],x[13]],[x[2],x[6],x[10],x[14]],[x[3],x[7],x[11],x[15]],[x[4],x[8],x[12],x[16]]])

In [14]:
#CHeking for the number ofmissing values
def cnt(x):
    a = []
    for i in range(0,len(x)):
        c = 0
        for j in range(0,len(x[i])):
            if x[i][j] == "-":
                c += 1
        a.append(c)
    return a

In [15]:
#Removing the records with at least 2 missing values
def mor_2(x):
    for i in range(0,len(x[1])):
        if x[1][i] == 2:
            return x[0]

In [16]:
GHI_rdd3_toRemove = GHI_rdd3.map(lambda x: (x[0],cnt(x[1:5]))).map(lambda x: mor_2(x)).filter(lambda x: x!=None).collect()

In [17]:
#Updating GHI and values datasets for the final filtered results
GHI_rdd4 = GHI_rdd3.filter(lambda x: x[0] not in GHI_rdd3_toRemove)
GHI_rdd2 = GHI_rdd2.filter(lambda x: x[0] not in GHI_rdd3_toRemove)

In [18]:
#Filling in missing values according to the GHI value
def cal_x0(x,ghi_rdd):
    if x[0] != "Country":
        for j in range(0,len(ghi_rdd.value)):
            if x[0] == ghi_rdd.value[j][0]:
                ghis = ghi_rdd.value[j]
        for i in range(1,len(x)):
            ghi = ghis[i]
            if x[i][0] == "-":
                x[i][1] = round(float(x[i][1]),1)
                x[i][2] = round(float(x[i][2]),1)
                x[i][3] = round(float(x[i][3]),1)
                x[i][0] = round(3*(ghi - (x[i][3]/3) - (x[i][2]/6) - (x[i][1]/6)),1)
            else:
                x[i][0] = round(float(x[i][0]),1)
                x[i][1] = round(float(x[i][1]),1)
                x[i][2] = round(float(x[i][2]),1)
                x[i][3] = round(float(x[i][3]),1)
    return x

In [19]:
def conv_listx(x):
    a = [x[0]]
    for i in range(1,5):
        for j in range(0,4):
            a.append(x[i][j])
    return a

In [20]:
#Call to fill in the missing values
GHI_rdd2_broadcast=sc.broadcast(GHI_rdd2.collect())
GHI_rdd4 = GHI_rdd4.map(lambda x: cal_x0(x,GHI_rdd2_broadcast)).map(lambda x: conv_listx(x))

In [21]:
# #Transforming rdd to pandas dataframe for applying ML and Visulaization techniques
# headers = GHI_rdd2.collect()[0]
# GHI_rdd2=GHI_rdd2.filter(lambda x: x[0]!='Country')

# df0 = pd.DataFrame(GHI_rdd2.collect(), columns=headers)

# headers = GHI_rdd4.collect()[0]
# GHI_rdd4=GHI_rdd4.filter(lambda x: x[0]!='Country')

# df1 = pd.DataFrame(GHI_rdd4.collect(), columns=headers)

In [53]:
def calc_beta(betas, X_test, y_test):
    y_pred = np.matmul(X_test, betas)[:,0]
    print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))

In [54]:
def RidgeRegression(X, penalty_value = 1.0, learning_rate = 0.00000000001, n_epochs = 100):

    #Dividing into training and test data
    offset = int(int(X.shape[0]) * 0.9)
    X_test, Y_test = X[offset:,:3], X[offset:,3:]
    X_tf, Y_tf = X[:offset,:3], X[:offset,3:]

    # Conversion to tensors
    X_tf = tf.constant(X_tf, dtype=tf.float32, name="GHI_X")
    Y_tf = tf.constant(Y_tf.reshape(-1,1), dtype=tf.float32, name="GHI_Y")
    Xt_tf = tf.transpose(X_tf)
    penalty = tf.constant(1.0, dtype=tf.float32, name="penalty")
    I = tf.constant(np.identity(int(X_tf.shape[1])), dtype=tf.float32, name="I")
    beta = tf.Variable(tf.random_uniform([int(X_tf.shape[1]), 1], -1., 1.), name = "beta")
    y_pred = tf.matmul(X_tf, beta, name="predictions")
    penalizedCost = tf.reduce_sum(tf.square(Y_tf - y_pred)) + penalty * tf.reduce_sum(tf.square(beta))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    training_op = optimizer.minimize(penalizedCost)
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(n_epochs):
            if epoch %10 == 0: #print debugging output
                print("Epoch", epoch, "; penalizedCost =", penalizedCost.eval())
            sess.run(training_op)
        #done training, get final beta: 
        best_beta = beta.eval()
    print(best_beta)
    calc_beta(best_beta, X_test, Y_test)
    return best_beta

In [55]:
#Transforming rdd to tensors for applying Machine Learning Models
X_ghi=GHI_rdd2.map(lambda x: x[1:]).filter(lambda x: x[0]!="GHI1992")
X_ghi=np.array(X_ghi.collect())
print(X_ghi)

best_beta_val = RidgeRegression(X_ghi, 1)

[[ 50.2  52.7  37.9  33.3]
 [ 20.8  21.6  16.5  11.1]
 [ 17.5  15.6  11.3   9.5]
 [ 65.8  57.5  39.7  32.5]
 [  7.    6.6   5.8   5.4]
 [ 23.8  18.4  11.4   7.7]
 [ 36.5  27.5  15.3   9.6]
 [ 53.6  37.6  32.2  26.5]
 [  5.    4.    3.    2. ]
 [ 44.5  37.5  31.7  24.4]
 [ 36.7  30.3  23.9  17.2]
 [ 33.8  33.   30.7  24.4]
 [ 15.9  11.7   5.4   5.4]
 [  7.9   8.2   7.6   5.4]
 [ 47.   47.9  36.4  27.6]
 [ 45.8  43.6  27.1  22.2]
 [ 40.   39.6  29.5  22.1]
 [ 52.2  50.9  47.   50.9]
 [ 62.5  51.9  50.9  43.5]
 [  5.9   4.    3.    2. ]
 [ 25.9  15.8  11.2   7.5]
 [ 14.6  11.3   9.4   8. ]
 [ 39.1  36.   31.6  25.6]
 [  7.5   6.2   5.    5.3]
 [ 32.9  32.6  35.1  26.5]
 [  7.8   6.2   4.    3. ]
 [ 10.5   5.3   4.    3. ]
 [ 60.3  46.7  35.1  31.4]
 [ 23.8  18.4  15.4  11.6]
 [ 22.3  20.5  16.4  14.4]
 [ 20.1  16.4  16.6  14.7]
 [ 19.5  16.2  12.7  11.1]
 [  7.8   6.2   4.    3. ]
 [ 67.8  56.   40.2  32.3]
 [ 11.5   9.8   9.1   8.1]
 [ 24.2  20.7  17.4  13.8]
 [ 35.2  27.5  23.8  23.2]
 

In [56]:
sess = tf.InteractiveSession()
best_beta_val = np.array(best_beta_val, dtype=np.float64)
X_ghi_2024 = tf.matmul(X_ghi[:,1:], best_beta_val, name="predictions")
s = X_ghi_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_ghi_df = pd.DataFrame(X_ghi)
X_ghi_df = X_ghi_df.rename(index=str, columns={0: "GHI1992", 1: "GHI2000",2: "GHI2008",3: "GHI2015"})
X_ghi_df['GHI2024'] = s

[[ 26.4546976 ]
 [ 12.43469454]
 [  8.05606701]
 [ 31.36724661]
 [  2.49178531]
 [ 11.80558237]
 [ 18.92240677]
 [ 16.94490308]
 [  2.3401095 ]
 [ 18.20898169]
 [ 16.37741897]
 [ 14.05774471]
 [  7.25034898]
 [  3.91707274]
 [ 25.67786075]
 [ 25.44863084]
 [ 21.7069727 ]
 [ 13.23394812]
 [ 18.74767412]
 [  2.3401095 ]
 [  9.52140455]
 [  5.07931014]
 [ 16.05158435]
 [  2.21153874]
 [ 12.1916055 ]
 [  3.71377666]
 [  2.87650816]
 [ 22.18318198]
 [  9.1774141 ]
 [  9.30986084]
 [  5.29725149]
 [  7.54448445]
 [  3.71377666]
 [ 30.0818231 ]
 [  3.63060593]
 [  9.8435452 ]
 [  9.94879223]
 [  8.4723056 ]
 [ 16.06218547]
 [ 11.50664034]
 [ 21.51713009]
 [ 19.47430527]
 [  7.31654045]
 [ 16.41262318]
 [  9.44561574]
 [ 14.25809344]
 [  8.69568123]
 [  7.49872361]
 [  9.14315855]
 [  2.44463488]
 [  5.08006651]
 [  6.42999595]
 [ 20.54453752]
 [  2.3401095 ]
 [ 11.92414535]
 [ 26.03302527]
 [  4.17892582]
 [  2.91796221]
 [ 14.51591212]
 [ 20.9574823 ]
 [  3.43468716]
 [  3.55783854]
 [ 14.83

In [58]:
X_ghi_2024 = np.array(X_ghi_df)
sess = tf.InteractiveSession()
best_beta_val = np.array(best_beta_val, dtype=np.float64)
X_ghi_2032 = tf.matmul(X_ghi_2024[:,2:], best_beta_val, name="predictions")
s = X_ghi_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_ghi_2024 = pd.DataFrame(X_ghi_2024)
X_ghi_2024 = X_ghi_2024.rename(index=str, columns={0: "GHI1992", 1: "GHI2000",2: "GHI2008",3: "GHI2015",4: "GHI2024"})
X_ghi_2024['GHI2032'] = s
X_ghi_2024

[[ 17.21427419]
 [  7.0283629 ]
 [  5.04003536]
 [ 15.78326082]
 [  3.61669148]
 [  2.80450049]
 [  1.82625751]
 [ 18.21643366]
 [  1.22796637]
 [ 17.01867095]
 [ 11.1834275 ]
 [ 18.73637887]
 [  0.20916213]
 [  4.38206091]
 [ 16.51445957]
 [  8.19847681]
 [ 12.82144595]
 [ 33.49543235]
 [ 33.86626077]
 [  1.22796637]
 [  4.08250164]
 [  5.22395136]
 [ 18.25968304]
 [  3.05472808]
 [ 23.94634969]
 [  1.24691936]
 [  1.78099926]
 [ 17.40086935]
 [  8.06524797]
 [  8.8127653 ]
 [ 11.54787126]
 [  6.61261438]
 [  1.24691936]
 [ 17.07538174]
 [  5.86545606]
 [  9.42369839]
 [ 14.98048066]
 [  2.04681847]
 [  9.55901864]
 [ 12.58604122]
 [ 16.3425212 ]
 [ 15.71479813]
 [ 10.66702465]
 [ 27.96075661]
 [  9.28785901]
 [ 22.92130302]
 [ 20.00828877]
 [  3.0434804 ]
 [ 17.2724797 ]
 [  5.23003067]
 [  2.57124087]
 [  5.83505053]
 [ 13.69459024]
 [  1.22796637]
 [  4.53329897]
 [ 13.50052149]
 [  0.95020831]
 [  5.48277135]
 [ 16.31496686]
 [ 21.58094536]
 [  1.424946  ]
 [  3.49836309]
 [ 23.42

Unnamed: 0,GHI1992,GHI2000,GHI2008,GHI2015,GHI2024,GHI2032
0,50.2,52.7,37.9,33.3,26.454698,17.214274
1,20.8,21.6,16.5,11.1,12.434695,7.028363
2,17.5,15.6,11.3,9.5,8.056067,5.040035
3,65.8,57.5,39.7,32.5,31.367247,15.783261
4,7.0,6.6,5.8,5.4,2.491785,3.616691
5,23.8,18.4,11.4,7.7,11.805582,2.804500
6,36.5,27.5,15.3,9.6,18.922407,1.826258
7,53.6,37.6,32.2,26.5,16.944903,18.216434
8,5.0,4.0,3.0,2.0,2.340109,1.227966
9,44.5,37.5,31.7,24.4,18.208982,17.018671


In [59]:
X_ghi_2032 = np.array(X_ghi_2024)
sess = tf.InteractiveSession()
best_beta_val = np.array(best_beta_val, dtype=np.float64)
X_ghi_2040 = tf.matmul(X_ghi_2032[:,3:], best_beta_val, name="predictions")
s = X_ghi_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_ghi_2032 = pd.DataFrame(X_ghi_2032)
X_ghi_2032 = X_ghi_2032.rename(index=str, columns={0: "GHI1992", 1: "GHI2000",2: "GHI2008",3: "GHI2015",4: "GHI2024",5: "GHI2032"})
X_ghi_2032['GHI2040'] = s
X_ghi_2032

[[ 19.06952476]
 [  5.40650744]
 [  5.34006567]
 [ 19.06564867]
 [  2.6291072 ]
 [  4.95991125]
 [  7.10164363]
 [ 12.43808103]
 [  0.99514626]
 [ 11.2041117 ]
 [  8.29246812]
 [ 10.25414511]
 [  4.63566277]
 [  2.09085513]
 [ 14.24049584]
 [ 14.52956164]
 [ 11.61896809]
 [ 25.52141151]
 [ 18.2070964 ]
 [  0.99514626]
 [  4.03882273]
 [  3.93180169]
 [ 11.60458486]
 [  2.90438284]
 [  8.94992808]
 [  1.86513152]
 [  1.55384341]
 [ 17.33288561]
 [  5.32459301]
 [  7.44794963]
 [  6.12322436]
 [  5.84337989]
 [  1.86513152]
 [ 18.10049161]
 [  3.66648364]
 [  6.48133114]
 [ 11.67786004]
 [  5.56024057]
 [  8.4094201 ]
 [ 10.824799  ]
 [ 15.42653537]
 [ 17.75926069]
 [  5.68391555]
 [ 13.40431969]
 [  7.04709953]
 [ 14.08970532]
 [  7.39833648]
 [  4.86563445]
 [  9.96502398]
 [  4.02041538]
 [  4.414508  ]
 [  1.4479187 ]
 [ 10.07948332]
 [  0.99514626]
 [  5.34145406]
 [ 16.05753965]
 [  2.03806936]
 [  3.93560968]
 [ 11.50355058]
 [ 18.33767568]
 [  1.76136882]
 [  2.57413279]
 [ 20.16

Unnamed: 0,GHI1992,GHI2000,GHI2008,GHI2015,GHI2024,GHI2032,GHI2040
0,50.2,52.7,37.9,33.3,26.454698,17.214274,19.069525
1,20.8,21.6,16.5,11.1,12.434695,7.028363,5.406507
2,17.5,15.6,11.3,9.5,8.056067,5.040035,5.340066
3,65.8,57.5,39.7,32.5,31.367247,15.783261,19.065649
4,7.0,6.6,5.8,5.4,2.491785,3.616691,2.629107
5,23.8,18.4,11.4,7.7,11.805582,2.804500,4.959911
6,36.5,27.5,15.3,9.6,18.922407,1.826258,7.101644
7,53.6,37.6,32.2,26.5,16.944903,18.216434,12.438081
8,5.0,4.0,3.0,2.0,2.340109,1.227966,0.995146
9,44.5,37.5,31.7,24.4,18.208982,17.018671,11.204112


In [121]:
X_ghi_2032 = X_ghi_2032.round(2)
X_ghi_2032

Unnamed: 0,GHI1992,GHI2000,GHI2008,GHI2015,GHI2024,GHI2032,GHI2040
0,50.2,52.7,37.9,33.3,26.45,17.21,19.07
1,20.8,21.6,16.5,11.1,12.43,7.03,5.41
2,17.5,15.6,11.3,9.5,8.06,5.04,5.34
3,65.8,57.5,39.7,32.5,31.37,15.78,19.07
4,7.0,6.6,5.8,5.4,2.49,3.62,2.63
5,23.8,18.4,11.4,7.7,11.81,2.80,4.96
6,36.5,27.5,15.3,9.6,18.92,1.83,7.10
7,53.6,37.6,32.2,26.5,16.94,18.22,12.44
8,5.0,4.0,3.0,2.0,2.34,1.23,1.00
9,44.5,37.5,31.7,24.4,18.21,17.02,11.20


In [109]:
X_param1=GHI_rdd4.map(lambda x: [x[1],x[5],x[9],x[13]]).filter(lambda x: x[0]!="UN9193")
X_param1=np.array(X_param1.collect())
print(X_param1)

best_beta_val_UN1 = RidgeRegression(X_param1, 1)#adjusting penalty: worse

[[  37.4   46.1   24.6   23. ]
 [   7.7    7.2    9.7    4.9]
 [   9.1   10.7    7.5    4.6]
 [  65.4   50.    23.7   14. ]
 [   4.7    3.5    4.1    3.6]
 [  52.7   23.8    5.2    4.4]
 [  81.7   23.2    2.6    1.2]
 [  36.1   20.8   16.4   15.1]
 [   8.2    2.3    1.6    1.1]
 [  28.    22.6   12.8   10.3]
 [  35.9   33.4   28.1   20.2]
 [  26.8   35.6   31.9   26. ]
 [  14.2   12.     2.6    2.3]
 [   1.9    4.8    5.8    3.4]
 [  22.7   25.4   22.6   20.2]
 [  30.9   29.2   18.7   15.3]
 [  36.5   30.6   14.5    7.9]
 [  47.7   42.6   34.7   58.6]
 [  55.7   40.1   40.7   32.5]
 [   7.4    4.7    4.1    3.7]
 [  23.3   15.9   13.5    9.6]
 [  12.6    9.7    9.5    7.1]
 [  43.7   32.5   33.    28.2]
 [   5.5    5.1    5.1    5.6]
 [  13.3   17.6   16.5   15.4]
 [  20.9   10.4    2.2    1.4]
 [  13.2    3.7    2.3    0.8]
 [  75.7   48.1   24.1   12.8]
 [  29.2   28.1   20.2   13.5]
 [  16.2   18.5   14.6   12.1]
 [   5.8    5.3    4.6    4.5]
 [  16.    11.2   11.6   12.3]
 [  13.5

In [110]:
sess = tf.InteractiveSession()
best_beta_val_UN1 = np.array(best_beta_val_UN1, dtype=np.float64)
X_param1_2024 = tf.matmul(X_param1[:,1:], best_beta_val_UN1, name="predictions")
s = X_param1_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param1_df = pd.DataFrame(X_param1)
X_param1_df = X_param1_df.rename(index=str, columns={0: "UN9193", 1: "UN9901",2: "UN0709",3: "UN1416"})
s[s < 0] = 0
X_param1_df['UN2325'] = s

[[  7.56392551]
 [  2.27872072]
 [  1.62415511]
 [  3.42848881]
 [  1.54843478]
 [  0.1209874 ]
 [ -1.3855872 ]
 [  5.85750847]
 [  0.39357373]
 [  3.37885963]
 [  7.91474874]
 [ 10.45605572]
 [  0.09234589]
 [  1.51000399]
 [  8.12147676]
 [  5.38973964]
 [  1.8238047 ]
 [ 23.45476734]
 [ 13.55242065]
 [  1.47944835]
 [  3.77295051]
 [  2.93281669]
 [ 11.74799132]
 [  2.31790048]
 [  6.2834905 ]
 [ -0.16845512]
 [  0.20920306]
 [  3.14873397]
 [  4.89586037]
 [  4.66541675]
 [  1.80047683]
 [  5.12939692]
 [  0.78023262]
 [ 10.4646397 ]
 [  1.85860588]
 [  2.98632618]
 [  4.35222445]
 [  2.27615264]
 [  2.27271471]
 [  6.02389558]
 [  6.54843686]
 [ 11.4672753 ]
 [  3.80778352]
 [ 19.19133762]
 [  5.76785351]
 [  5.93183441]
 [  3.23658298]
 [  2.41969486]
 [ 11.64337258]
 [  3.56160011]
 [  1.33456067]
 [  0.64338792]
 [  7.33415834]
 [  0.93204964]
 [  2.0693757 ]
 [  5.82723897]
 [  0.23849116]
 [  2.41430643]
 [  5.86967094]
 [ 17.60769182]
 [  0.59426218]
 [  1.26813832]
 [ 17.30

In [111]:
X_param1_df_2024 = np.array(X_param1_df)
sess = tf.InteractiveSession()
best_beta_val_UN1 = np.array(best_beta_val_UN1, dtype=np.float64)
X_param1_2032 = tf.matmul(X_param1_df_2024[:,2:], best_beta_val_UN1, name="predictions")
s = X_param1_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param1_df_2024 = pd.DataFrame(X_param1_df_2024)
X_param1_df_2024 = X_param1_df_2024.rename(index=str, columns={0: "UN9193", 1: "UN9901",2: "UN0709",3: "UN1416",4:"UN2325"})
s[s < 0] = 0
X_param1_df_2024['UN3133'] = s

[[  3.04988190e+00]
 [  5.14694467e-01]
 [  4.19147228e-01]
 [  5.74277080e-01]
 [  6.04016841e-01]
 [ -5.74843565e-03]
 [ -1.23441732e-01]
 [  2.34517518e+00]
 [  1.20095709e-01]
 [  1.19885092e+00]
 [  2.60482759e+00]
 [  3.85427290e+00]
 [  1.97666763e-02]
 [  4.13260614e-01]
 [  3.19381911e+00]
 [  1.96160583e+00]
 [  1.75328114e-01]
 [  1.20511384e+01]
 [  4.94006933e+00]
 [  5.85294409e-01]
 [  1.22930696e+00]
 [  1.01199204e+00]
 [  4.49403513e+00]
 [  1.01948187e+00]
 [  2.53946904e+00]
 [ -6.76224649e-02]
 [ -4.84279557e-02]
 [  3.07950843e-01]
 [  1.44915653e+00]
 [  1.73393555e+00]
 [  7.47745136e-01]
 [  2.21842219e+00]
 [  3.02749609e-01]
 [  3.78671909e+00]
 [  8.08666062e-01]
 [  9.60195910e-01]
 [  1.79307792e+00]
 [  9.71403309e-01]
 [  1.07328087e+00]
 [  2.50713849e+00]
 [  2.65672033e+00]
 [  5.23184013e+00]
 [  1.33994372e+00]
 [  7.54066575e+00]
 [  2.35301046e+00]
 [  2.32737829e+00]
 [  5.25661666e-01]
 [  9.32489001e-01]
 [  4.72450285e+00]
 [  1.51356713e+00]


In [113]:
X_param1_df_2032 = np.array(X_param1_df_2024)
sess = tf.InteractiveSession()
best_beta_val_UN1 = np.array(best_beta_val_UN1, dtype=np.float64)
X_param1_2040 = tf.matmul(X_param1_df_2032[:,3:], best_beta_val_UN1, name="predictions")
s = X_param1_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param1_df_2032 = pd.DataFrame(X_param1_df_2032)
X_param1_df_2032 = X_param1_df_2032.rename(index=str, columns={0: "UN9193", 1: "UN9901",2: "UN0709",3: "UN1416",4:"UN2325",5:"UN3133"})
s[s < 0] = 0
X_param1_df_2032['UN3941'] = s

[[ -1.33130946e-01]
 [ -1.98706931e-02]
 [ -9.42389766e-02]
 [ -7.19546445e-01]
 [  6.60142922e-02]
 [ -3.91769499e-01]
 [ -1.10005760e-01]
 [  1.38612412e-01]
 [ -1.38901323e-02]
 [ -1.28918161e-01]
 [ -2.54166297e-02]
 [  1.98741776e-01]
 [ -1.93893782e-01]
 [  2.42177593e-03]
 [  2.35978673e-01]
 [ -8.18499228e-02]
 [ -4.77650662e-01]
 [  1.81722232e+00]
 [  3.44751163e-01]
 [  4.25616438e-02]
 [ -1.45197269e-02]
 [  4.50721389e-02]
 [  3.83197973e-01]
 [  1.26771299e-01]
 [  2.31598245e-01]
 [ -1.28340054e-01]
 [ -5.33052693e-02]
 [ -7.45573407e-01]
 [ -1.74335185e-01]
 [  4.87535600e-02]
 [  6.66002430e-02]
 [  2.73580135e-01]
 [ -3.02828713e-02]
 [ -8.48265075e-02]
 [  8.79885075e-02]
 [  3.81165737e-02]
 [  1.53029899e-01]
 [ -2.52877576e-02]
 [ -3.88303378e-02]
 [  1.75145081e-01]
 [  1.12553377e-01]
 [  6.49793998e-01]
 [  1.35036723e-01]
 [  6.40541961e-01]
 [  1.60743068e-01]
 [  1.93432090e-01]
 [ -1.98667853e-01]
 [  1.10001709e-01]
 [  5.04385280e-01]
 [  1.91850137e-01]


In [122]:
X_param1_df_2032 = X_param1_df_2032.round(2)
X_param1_df_2032

Unnamed: 0,UN9193,UN9901,UN0709,UN1416,UN2325,UN3133,UN3941
0,37.4,46.1,24.6,23.0,7.56,3.05,0.00
1,7.7,7.2,9.7,4.9,2.28,0.51,0.00
2,9.1,10.7,7.5,4.6,1.62,0.42,0.00
3,65.4,50.0,23.7,14.0,3.43,0.57,0.00
4,4.7,3.5,4.1,3.6,1.55,0.60,0.07
5,52.7,23.8,5.2,4.4,0.12,0.00,0.00
6,81.7,23.2,2.6,1.2,0.00,0.00,0.00
7,36.1,20.8,16.4,15.1,5.86,2.35,0.14
8,8.2,2.3,1.6,1.1,0.39,0.12,0.00
9,28.0,22.6,12.8,10.3,3.38,1.20,0.00


In [118]:
X_param2=GHI_rdd4.map(lambda x: [x[2],x[6],x[10],x[14]]).filter(lambda x: x[0]!="stu9094")
X_param2=np.array(X_param2.collect())
print(X_param2)

best_beta_val_STU = RidgeRegression(X_param2, 1)#adjusting penalty: worse

[[ 51.3  54.7  51.3  40.9]
 [ 37.2  39.2  23.1  17.9]
 [ 22.9  23.6  13.2  11.7]
 [ 52.9  47.1  29.2  37.6]
 [  7.1   9.9   8.2   7.5]
 [ 23.5  17.7  20.8   9.4]
 [ 30.   24.1  26.8  18. ]
 [ 71.5  54.   43.2  36.1]
 [  7.5   6.    4.2   2.8]
 [ 43.7  39.1  44.7  34. ]
 [ 37.1  33.1  27.2  18.1]
 [ 41.   29.1  31.4  23.5]
 [ 13.3  10.    7.1   7.9]
 [ 12.8  10.2   8.1   6.2]
 [ 40.7  45.5  35.1  27.3]
 [ 58.5  49.2  39.5  32.4]
 [ 36.3  38.2  36.4  31.7]
 [ 42.6  44.6  45.1  42.6]
 [ 47.2  39.3  38.7  39.9]
 [  4.2   3.    2.    1.8]
 [ 38.   17.8   9.8   6.3]
 [ 22.   18.1  12.7  12.2]
 [ 32.2  28.9  28.8  21.2]
 [ 10.7   7.9   5.6   5.2]
 [ 34.2  31.5  39.   29.6]
 [  1.3   1.4   1.3   1. ]
 [  8.6   7.    6.1   4.4]
 [ 34.3  26.5  33.   33.5]
 [ 21.2   8.   10.1   7.1]
 [ 38.1  32.5  26.3  25.2]
 [ 33.1  24.6  30.7  22.3]
 [ 29.5  32.3  20.6  13.6]
 [ 11.4   5.4   4.4   3.8]
 [ 66.9  57.4  46.4  38.4]
 [  4.3   5.8   6.4   4.9]
 [ 30.3  26.3  19.2  17.5]
 [ 33.9  24.1  25.5  25. ]
 

In [119]:
sess = tf.InteractiveSession()
best_beta_val_STU = np.array(best_beta_val_STU, dtype=np.float64)
X_param2_2024 = tf.matmul(X_param2[:,1:], best_beta_val_STU, name="predictions")
s = X_param2_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param2_df = pd.DataFrame(X_param2)
X_param2_df = X_param2_df.rename(index=str, columns={0: "stu9094", 1: "stu9802",2: "stu0610",3: "stu1216"})
s[s < 0] = 0
X_param2_df['stu2226'] = s

[[ 28.21256802]
 [ 21.87541182]
 [ 12.07272449]
 [ 14.47777467]
 [  4.4536756 ]
 [ 14.3562902 ]
 [ 14.67557445]
 [ 27.23896527]
 [  3.65410657]
 [ 20.70976908]
 [ 20.22609978]
 [ 15.83077846]
 [  3.61700461]
 [  5.59682095]
 [ 24.69665229]
 [ 25.2758106 ]
 [ 17.55624757]
 [ 17.51271722]
 [ 13.03309686]
 [  1.46140223]
 [ 10.99538927]
 [  8.10657408]
 [ 16.1263535 ]
 [  3.66601501]
 [ 16.5461112 ]
 [  0.75174849]
 [  4.01505921]
 [  7.30274701]
 [  4.63207071]
 [ 13.83951466]
 [ 13.68844283]
 [ 19.75471849]
 [  2.61794024]
 [ 29.1878623 ]
 [  3.06159816]
 [ 12.35307531]
 [  8.51572077]
 [  7.64382203]
 [ 19.3396143 ]
 [ 19.25572515]
 [ 23.21968274]
 [ 14.87447898]
 [  8.96854813]
 [ 15.69642527]
 [ 18.90874803]
 [ 28.05890958]
 [ 18.37699597]
 [ 10.84758861]
 [ 14.13712208]
 [  2.28053441]
 [  5.53617218]
 [ 11.41035922]
 [ 23.05740919]
 [  1.28780291]
 [ 17.51254277]
 [ 20.06959448]
 [  2.89765966]
 [  6.88798828]
 [ 28.26557846]
 [ 23.11007838]
 [  3.57846715]
 [  3.807164  ]
 [ 22.77

In [123]:
X_param2_df_2024 = np.array(X_param2_df)
sess = tf.InteractiveSession()
best_beta_val_STU = np.array(best_beta_val_STU, dtype=np.float64)
X_param2_2032 = tf.matmul(X_param2_df_2024[:,2:], best_beta_val_STU, name="predictions")
s = X_param2_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param2_df_2024 = pd.DataFrame(X_param2_df_2024)
X_param2_df_2024 = X_param2_df_2024.rename(index=str, columns={0: "stu9094", 1: "stu9802",2: "stu0610",3: "stu1216",4:"stu2226"})
s[s < 0] = 0
X_param2_df_2024['stu3034'] = s

[[ 30.55502179]
 [  6.31813289]
 [  4.72601013]
 [ 26.2337111 ]
 [  5.43895764]
 [  6.29369925]
 [ 14.22333646]
 [ 23.89140568]
 [  1.15985421]
 [ 28.78002469]
 [ 10.18039614]
 [ 19.01061073]
 [  5.64317442]
 [  3.79504713]
 [ 16.32866758]
 [ 21.23318293]
 [ 25.02727927]
 [ 36.08700634]
 [ 34.30131424]
 [  1.01792053]
 [  0.65317188]
 [  7.78957446]
 [ 15.99323675]
 [  3.2679457 ]
 [ 26.26568117]
 [  0.72617461]
 [  2.87116682]
 [ 31.9486712 ]
 [  6.23050812]
 [ 18.40031216]
 [ 19.62577901]
 [  4.18643675]
 [  2.6209989 ]
 [ 25.51618995]
 [  4.06227149]
 [ 11.19870796]
 [ 21.97226192]
 [  6.29933944]
 [ 12.08738822]
 [ 38.54203772]
 [ 23.77803106]
 [ 19.70906171]
 [ 10.7409953 ]
 [ 17.2426092 ]
 [ 15.27805164]
 [ 27.30096838]
 [ 29.10929539]
 [  0.10331696]
 [ 17.40258118]
 [  4.37534091]
 [  4.81271272]
 [  5.86582521]
 [ 16.97907382]
 [  4.66585004]
 [  6.77006618]
 [ 36.23118084]
 [  2.58298564]
 [  9.49578703]
 [ 20.82664812]
 [ 22.70578989]
 [  2.94333011]
 [  5.79885475]
 [ 36.73

In [125]:
X_param2_df_2032 = np.array(X_param2_df_2024)
sess = tf.InteractiveSession()
best_beta_val_STU = np.array(best_beta_val_STU, dtype=np.float64)
X_param2_2040 = tf.matmul(X_param2_df_2032[:,3:], best_beta_val_STU, name="predictions")
s = X_param2_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param2_df_2032 = pd.DataFrame(X_param2_df_2032)
X_param2_df_2032 = X_param2_df_2032.rename(index=str, columns={0: "stu9094", 1: "stu9802",2: "stu0610",3: "stu1216",4:"stu2226",5:"stu3034"})
s[s < 0] = 0
X_param2_df_2032['stu3842'] = s

[[  1.57265429e+01]
 [  1.74558564e+01]
 [  9.76149650e+00]
 [  9.82105270e+00]
 [  2.62966458e+00]
 [  8.36551250e+00]
 [  7.51492999e+00]
 [  1.75272050e+01]
 [  2.71986727e+00]
 [  8.97550953e+00]
 [  1.36824085e+01]
 [  7.69803059e+00]
 [  2.26638400e+00]
 [  3.73899904e+00]
 [  1.67898874e+01]
 [  1.63346700e+01]
 [  8.84834948e+00]
 [  6.74354841e+00]
 [  4.13433273e+00]
 [  1.06420252e+00]
 [  9.12173087e+00]
 [  5.56192967e+00]
 [  8.82846348e+00]
 [  2.52339689e+00]
 [  6.08039269e+00]
 [  4.33745792e-01]
 [  2.53738534e+00]
 [ -9.22946637e-01]
 [  1.86524863e+00]
 [  8.14017096e+00]
 [  5.35721743e+00]
 [  1.54071684e+01]
 [  1.62962075e+00]
 [  1.86771040e+01]
 [  1.40116281e+00]
 [  8.34354647e+00]
 [  2.40095038e+00]
 [  5.33706245e+00]
 [  1.21427802e+01]
 [  8.09826017e+00]
 [  1.39762522e+01]
 [  9.11169557e+00]
 [  3.59342357e+00]
 [  8.04517470e+00]
 [  1.17679609e+01]
 [  1.66829078e+01]
 [  8.92338704e+00]
 [  1.00144539e+01]
 [  7.39278157e+00]
 [  1.22309023e+00]


In [127]:
X_param2_df_2032 = X_param2_df_2032.round(2)
X_param2_df_2032

Unnamed: 0,stu9094,stu9802,stu0610,stu1216,stu2226,stu3034,stu3842
0,51.3,54.7,51.3,40.9,28.21,30.56,15.73
1,37.2,39.2,23.1,17.9,21.88,6.32,17.46
2,22.9,23.6,13.2,11.7,12.07,4.73,9.76
3,52.9,47.1,29.2,37.6,14.48,26.23,9.82
4,7.1,9.9,8.2,7.5,4.45,5.44,2.63
5,23.5,17.7,20.8,9.4,14.36,6.29,8.37
6,30.0,24.1,26.8,18.0,14.68,14.22,7.51
7,71.5,54.0,43.2,36.1,27.24,23.89,17.53
8,7.5,6.0,4.2,2.8,3.65,1.16,2.72
9,43.7,39.1,44.7,34.0,20.71,28.78,8.98


In [184]:
X_param3=GHI_rdd4.map(lambda x: [x[3],x[7],x[11],x[15]]).filter(lambda x: x[0]!="wast9094")
X_param3=np.array(X_param3.collect())
print(X_param3)

best_beta_val_WAST = RidgeRegression(X_param3, 1)

[[ 11.5  13.4   8.9   9.5]
 [  9.4  12.2   9.4   6.2]
 [  7.1   3.1   4.1   4.1]
 [  7.9   8.7   8.2   4.9]
 [  1.6   1.6   1.2   1.6]
 [  5.    2.5   4.2   4.2]
 [  6.6   9.    6.8   3.1]
 [ 16.1  13.8  17.5  14.3]
 [  2.6   2.    1.9   1.8]
 [ 11.2   9.    8.4   4.5]
 [  3.6   1.6   1.4   1.6]
 [ 13.1   6.    7.2   6.8]
 [  2.9   2.2   1.6   1.9]
 [  3.4   3.2   3.4   2.8]
 [ 15.5  15.7  11.3   7.6]
 [ 13.9  16.9   8.9   9.6]
 [  4.5   6.2   7.3   5.2]
 [  9.7  10.5  12.2   7.2]
 [ 14.6  13.9  15.7  13. ]
 [  0.5   0.5   0.3   0.3]
 [  3.9   2.5   2.6   1.8]
 [  1.7   1.1   0.9   1.1]
 [  7.1   7.1   7.2   8.2]
 [  2.1   1.7   1.    1.4]
 [  8.3   6.9  14.    7.6]
 [  1.6   1.3   1.    1.2]
 [  3.3   2.4   2.4   2. ]
 [ 17.3  19.4  17.   21.5]
 [  2.2   1.5   2.3   2.4]
 [  2.8   3.2   2.7   2.3]
 [  4.3   6.9   7.9   9.5]
 [  1.4   1.5   1.6   2.1]
 [  4.6   2.7   2.5   2.5]
 [  9.2  12.4  10.8   9.9]
 [  9.8   7.2   6.3   5.3]
 [  4.    4.3   3.7   3.4]
 [ 10.8   8.9   8.5  11.1]
 

In [187]:
sess = tf.InteractiveSession()
best_beta_val_WAST = np.array(best_beta_val_WAST, dtype=np.float64)
X_param3_2024 = tf.matmul(X_param3[:,1:], best_beta_val_WAST, name="predictions")
s = X_param3_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param3_df = pd.DataFrame(X_param3)
X_param3_df = X_param3_df.rename(index=str, columns={0: "wast9094", 1: "wast9802",2: "wast0610",3: "wast1216"})
s[s < 0] = 0
X_param3_df['wast2226'] = s

[[  6.93627322]
 [  5.88587551]
 [  2.87339331]
 [  4.81822803]
 [  1.02762089]
 [  2.89321471]
 [  3.8030402 ]
 [ 11.22599153]
 [  1.33971317]
 [  4.76681762]
 [  1.08898847]
 [  4.95223314]
 [  1.29631466]
 [  2.22698348]
 [  7.20208357]
 [  7.23042655]
 [  4.45741088]
 [  6.95629228]
 [ 10.24205998]
 [  0.23057058]
 [  1.59169675]
 [  0.7295022 ]
 [  5.50690338]
 [  0.9061446 ]
 [  7.37587807]
 [  0.80883887]
 [  1.59043787]
 [ 13.92091129]
 [  1.62789562]
 [  1.84332666]
 [  6.14587381]
 [  1.31178692]
 [  1.81231015]
 [  7.5799674 ]
 [  4.25874109]
 [  2.60351266]
 [  7.01914957]
 [  1.46459149]
 [  4.99337178]
 [  0.84919817]
 [  5.94738992]
 [  4.37711535]
 [  5.14820562]
 [  5.33328818]
 [  0.99168273]
 [ 14.5014484 ]
 [  9.50986588]
 [  2.86942795]
 [  4.06068569]
 [  1.88076348]
 [  1.48750316]
 [  2.73648666]
 [  4.04934375]
 [  1.88570606]
 [  1.61982089]
 [  5.7033526 ]
 [  1.83645975]
 [  2.91434606]
 [  2.6407817 ]
 [  4.83519894]
 [  1.7667266 ]
 [  1.83305771]
 [  6.55

In [188]:
X_param3_df_2024 = np.array(X_param3_df)
sess = tf.InteractiveSession()
best_beta_val_WAST = np.array(best_beta_val_WAST, dtype=np.float64)
X_param3_2032 = tf.matmul(X_param3_df_2024[:,2:], best_beta_val_WAST, name="predictions")
s = X_param3_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param3_df_2024 = pd.DataFrame(X_param3_df_2024)
X_param3_df_2024 = X_param3_df_2024.rename(index=str, columns={0: "wast9094", 1: "wast9802",2: "wast0610",3: "wast1216",4: "wast2226"})
s[s < 0] = 0
X_param3_df_2024['wast3034'] = s

[[  5.91972816]
 [  4.58959818]
 [  2.53351258]
 [  3.74084816]
 [  0.92728279]
 [  2.57833027]
 [  2.74151812]
 [  9.481151  ]
 [  1.14613245]
 [  3.61562843]
 [  0.96288793]
 [  4.29470508]
 [  1.13984058]
 [  1.86422863]
 [  5.60505683]
 [  6.04975945]
 [  3.64408229]
 [  5.46626265]
 [  8.6160388 ]
 [  0.19224263]
 [  1.28331337]
 [  0.65085885]
 [  4.91161279]
 [  0.81000893]
 [  5.86461829]
 [  0.71577725]
 [  1.32937696]
 [ 12.56337062]
 [  1.45732372]
 [  1.52915736]
 [  5.57838416]
 [  1.20643377]
 [  1.56517059]
 [  6.40121369]
 [  3.53327316]
 [  2.19781877]
 [  6.4089019 ]
 [  1.28872586]
 [  3.77583076]
 [  0.58342868]
 [  5.01948127]
 [  3.67644837]
 [  4.20840014]
 [  4.16308379]
 [  0.86865625]
 [ 12.8292048 ]
 [  8.45521146]
 [  2.38711993]
 [  3.42918478]
 [  1.71195138]
 [  1.35783164]
 [  2.239951  ]
 [  3.11573775]
 [  1.75174389]
 [  1.50293765]
 [  4.43309282]
 [  1.64306004]
 [  2.46273202]
 [  2.04118211]
 [  3.93160884]
 [  1.59719011]
 [  1.61774049]
 [  5.53

In [190]:
X_param3_df_2032 = np.array(X_param3_df_2024)
sess = tf.InteractiveSession()
best_beta_val_WAST = np.array(best_beta_val_WAST, dtype=np.float64)
X_param3_2040 = tf.matmul(X_param3_df_2032[:,3:], best_beta_val_WAST, name="predictions")
s = X_param3_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param3_df_2032 = pd.DataFrame(X_param3_df_2032)
X_param3_df_2032 = X_param3_df_2032.rename(index=str, columns={0: "wast9094", 1: "wast9802",2: "wast0610",3: "wast1216",4: "wast2226",5: "wast3034"})
s[s < 0] = 0
X_param3_df_2032['wast3842'] = s

[[  4.83438754]
 [  3.81734642]
 [  2.04235164]
 [  3.10638211]
 [  0.74752457]
 [  2.07100976]
 [  2.32346038]
 [  7.71056422]
 [  0.93207953]
 [  3.01855803]
 [  0.77837977]
 [  3.47590864]
 [  0.92407761]
 [  1.52125197]
 [  4.66832223]
 [  4.97600118]
 [  2.98530619]
 [  4.51627009]
 [  7.01976095]
 [  0.15799415]
 [  1.0557292 ]
 [  0.52549384]
 [  3.95860895]
 [  0.65576408]
 [  4.80931339]
 [  0.57920232]
 [  1.08577929]
 [ 10.11409566]
 [  1.17024319]
 [  1.25316732]
 [  4.47657732]
 [  0.96619512]
 [  1.27069223]
 [  5.22427214]
 [  2.89436144]
 [  1.79408876]
 [  5.14406148]
 [  1.04831414]
 [  3.15705908]
 [  0.50968957]
 [  4.10044085]
 [  3.03111508]
 [  3.47713412]
 [  3.4293464 ]
 [  0.701818  ]
 [ 10.34481239]
 [  6.77797516]
 [  1.96193455]
 [  2.79843567]
 [  1.37846716]
 [  1.09356295]
 [  1.82680177]
 [  2.59237946]
 [  1.40086269]
 [  1.21292886]
 [  3.72336208]
 [  1.32672689]
 [  2.00869381]
 [  1.70798532]
 [  3.22809289]
 [  1.28983798]
 [  1.30225276]
 [  4.51

In [192]:
X_param3_df_2032 = X_param3_df_2032.round(2)
X_param3_df_2032

Unnamed: 0,wast9094,wast9802,wast0610,wast1216,wast2226,wast3034,wast3842
0,11.5,13.4,8.9,9.5,6.94,5.92,4.83
1,9.4,12.2,9.4,6.2,5.89,4.59,3.82
2,7.1,3.1,4.1,4.1,2.87,2.53,2.04
3,7.9,8.7,8.2,4.9,4.82,3.74,3.11
4,1.6,1.6,1.2,1.6,1.03,0.93,0.75
5,5.0,2.5,4.2,4.2,2.89,2.58,2.07
6,6.6,9.0,6.8,3.1,3.80,2.74,2.32
7,16.1,13.8,17.5,14.3,11.23,9.48,7.71
8,2.6,2.0,1.9,1.8,1.34,1.15,0.93
9,11.2,9.0,8.4,4.5,4.77,3.62,3.02


In [194]:
X_param4=GHI_rdd4.map(lambda x: [x[4],x[8],x[12],x[16]]).filter(lambda x: x[0]!="UM1992")
X_param4=np.array(X_param4.collect())
print(X_param4)

best_beta_val_UM = RidgeRegression(X_param4, 1)

[[ 16.8  13.7  11.    9.1]
 [  3.7   2.6   1.8   1.4]
 [  4.5   4.    2.9   2.6]
 [ 22.6  21.7  19.2  15.7]
 [  2.6   2.    1.5   1.3]
 [  4.5   3.    2.    1.4]
 [  9.5   7.4   4.3   3.2]
 [ 13.2   8.8   5.6   3.8]
 [  1.7   1.4   0.7   0.5]
 [ 17.   14.5  11.6  10. ]
 [ 11.5   8.    5.2   3.8]
 [  5.9   8.3   6.2   4.4]
 [  5.5   3.2   1.8   1.6]
 [  2.3   2.1   1.4   1. ]
 [ 20.2  18.6  13.    8.9]
 [ 11.8  10.8   5.2   2.9]
 [ 14.3  15.   11.3   8.8]
 [ 17.6  17.5  15.8  13. ]
 [ 20.9  19.   16.8  13.9]
 [  1.6   1.1   0.9   0.8]
 [  5.2   3.7   1.9   1.1]
 [  3.3   2.5   2.    1.6]
 [  9.7  12.2   7.3   4.5]
 [  1.6   1.3   1.    1. ]
 [ 15.3  14.6  11.7   9.3]
 [  1.1   0.8   0.6   0.4]
 [  1.2   0.8   0.6   0.6]
 [ 11.5  10.1   8.1   6.5]
 [  5.6   4.1   3.5   3.1]
 [  5.2   3.4   2.7   2.2]
 [  7.8   4.7   3.1   2.4]
 [  5.3   3.2   2.2   1.7]
 [  1.9   1.1   0.6   0.3]
 [ 19.5  14.5   8.7   5.9]
 [  2.8   2.5   2.4   2.2]
 [  9.1   8.5   6.9   5.1]
 [ 15.9  11.9   8.7   6.9]
 

In [None]:
['UM1992' 'UM2000' 'UM2008' 'UM2015']

In [195]:
sess = tf.InteractiveSession()
best_beta_val_UM = np.array(best_beta_val_UM, dtype=np.float64)
X_param4_2024 = tf.matmul(X_param4[:,1:], best_beta_val_UM, name="predictions")
s = X_param4_2024.eval()
print(s)

#close the session to release resources
sess.close()

X_param4_df = pd.DataFrame(X_param4)
X_param4_df = X_param4_df.rename(index=str, columns={0: "UM1992", 1: "UM2000",2: "UM2008",3: "UM2015"})
s[s < 0] = 0
X_param4_df['UM2024'] = s

[[  5.0976683 ]
 [  1.15982113]
 [  1.69720354]
 [  6.88510875]
 [  0.81508999]
 [  1.38956576]
 [  3.85178878]
 [  4.25443883]
 [  0.80467722]
 [  5.42413827]
 [  3.79494958]
 [  3.398243  ]
 [  1.70573717]
 [  0.97271887]
 [  8.21341758]
 [  6.34046398]
 [  6.07857818]
 [  5.34101294]
 [  6.03602423]
 [  0.3981289 ]
 [  2.09288227]
 [  0.93505448]
 [  6.20806758]
 [  0.51322962]
 [  5.44726321]
 [  0.32589895]
 [  0.32612736]
 [  3.7642565 ]
 [  1.38658113]
 [  1.28510622]
 [  2.19956357]
 [  1.43775713]
 [  0.59862856]
 [  7.36313916]
 [  0.66764546]
 [  3.11177337]
 [  4.99963483]
 [  2.00204854]
 [  3.83093745]
 [  2.16178497]
 [  7.36203367]
 [  7.62033802]
 [  1.46401746]
 [  3.99445795]
 [  1.55772162]
 [  3.85818907]
 [  2.31952806]
 [  1.7775103 ]
 [  1.5494164 ]
 [  0.72923423]
 [  1.07407958]
 [  2.26069551]
 [  5.13632397]
 [  0.44609185]
 [  2.18004614]
 [  4.97548205]
 [  0.8097694 ]
 [  1.08261321]
 [  2.80943039]
 [  9.79113924]
 [  0.55575778]
 [  0.65156948]
 [  5.42

In [197]:
X_param4_df_2024 = np.array(X_param4_df)
sess = tf.InteractiveSession()
best_beta_val_UM = np.array(best_beta_val_UM, dtype=np.float64)
X_param4_2032 = tf.matmul(X_param4_df_2024[:,2:], best_beta_val_UM, name="predictions")
s = X_param4_2032.eval()
print(s)

#close the session to release resources
sess.close()

X_param4_df_2024 = pd.DataFrame(X_param4_df_2024)
X_param4_df_2024 = X_param4_df_2024.rename(index=str, columns={0: "UM1992", 1: "UM2000",2: "UM2008",3: "UM2015",4: "UM2024"})
s[s < 0] = 0
X_param4_df_2024['UM2032'] = s

[[ 3.91095076]
 [ 0.70005758]
 [ 0.89681587]
 [ 6.94716786]
 [ 0.49384363]
 [ 0.88221584]
 [ 1.77040629]
 [ 2.55104795]
 [ 0.30243674]
 [ 3.85379917]
 [ 2.18673143]
 [ 2.69361637]
 [ 0.56663393]
 [ 0.6041464 ]
 [ 5.86751503]
 [ 2.79285064]
 [ 4.38598553]
 [ 5.66281073]
 [ 5.96987178]
 [ 0.28279762]
 [ 0.99314183]
 [ 0.74764964]
 [ 3.63022912]
 [ 0.23982989]
 [ 4.41393846]
 [ 0.27796557]
 [ 0.14391871]
 [ 3.01455037]
 [ 1.10703093]
 [ 0.9825436 ]
 [ 1.21333259]
 [ 0.86309607]
 [ 0.34530061]
 [ 3.96648953]
 [ 0.70899461]
 [ 2.86075983]
 [ 3.29355462]
 [ 0.92601452]
 [ 3.12474928]
 [ 1.42385926]
 [ 4.71289344]
 [ 5.41684757]
 [ 1.20756642]
 [ 2.9286149 ]
 [ 1.11690207]
 [ 2.78983916]
 [ 1.4671386 ]
 [ 0.83955971]
 [ 1.31303714]
 [ 0.65646664]
 [ 0.79565716]
 [ 1.42889835]
 [ 3.08806705]
 [ 0.39772471]
 [ 1.68722481]
 [ 3.33662623]
 [ 0.46516362]
 [ 0.46547522]
 [ 4.61199658]
 [ 4.50527439]
 [ 0.3931004 ]
 [ 0.68997801]
 [ 2.74853331]
 [ 4.81571265]
 [ 0.25884579]
 [ 5.76412236]
 [ 3.67460

In [199]:
X_param4_df_2032 = np.array(X_param4_df_2024)
sess = tf.InteractiveSession()
best_beta_val_UM = np.array(best_beta_val_UM, dtype=np.float64)
X_param4_2040 = tf.matmul(X_param4_df_2032[:,3:], best_beta_val_UM, name="predictions")
s = X_param4_2040.eval()
print(s)

#close the session to release resources
sess.close()

X_param4_df_2032 = pd.DataFrame(X_param4_df_2032)
X_param4_df_2032 = X_param4_df_2032.rename(index=str, columns={0: "UM1992", 1: "UM2000",2: "UM2008",3: "UM2015",4: "UM2024",5: "UM2032"})
s[s < 0] = 0
X_param4_df_2032['UM2040'] = s

[[  4.86408996e+00]
 [  4.96717188e-01]
 [  1.22814431e+00]
 [  9.67211513e+00]
 [  6.36584838e-01]
 [  3.42942190e-01]
 [  3.30749833e-01]
 [  6.07458594e-01]
 [ -8.42382545e-02]
 [  5.46374432e+00]
 [  9.15008590e-01]
 [  1.72716200e+00]
 [  3.12568303e-01]
 [  2.58218503e-01]
 [  2.59614241e+00]
 [ -1.60892509e+00]
 [  3.93434791e+00]
 [  8.24996203e+00]
 [  8.60302282e+00]
 [  4.61066303e-01]
 [ -4.01162727e-01]
 [  8.29314037e-01]
 [ -6.40649566e-02]
 [  5.65768499e-01]
 [  4.81224932e+00]
 [  1.45680118e-01]
 [  3.27269814e-01]
 [  3.39211993e+00]
 [  1.89131429e+00]
 [  1.14065278e+00]
 [  7.09910330e-01]
 [  5.83464596e-01]
 [ -1.27984021e-01]
 [  4.35420077e-01]
 [  1.55418457e+00]
 [  2.55599060e+00]
 [  2.92823574e+00]
 [ -2.49411448e-01]
 [  3.07471002e+00]
 [  1.19021102e+00]
 [  3.62019128e+00]
 [  3.35692252e+00]
 [  2.56711202e+00]
 [  3.60152432e+00]
 [  7.76193795e-01]
 [  1.78279140e+00]
 [  9.02639529e-01]
 [  2.64775096e-01]
 [  1.87335950e+00]
 [  9.67157966e-01]


In [203]:
X_param4_df_2032 = X_param4_df_2032.round(2)
X_param4_df_2032

Unnamed: 0,UM1992,UM2000,UM2008,UM2015,UM2024,UM2032,UM2040
0,16.8,13.7,11.0,9.1,5.10,3.91,4.86
1,3.7,2.6,1.8,1.4,1.16,0.70,0.50
2,4.5,4.0,2.9,2.6,1.70,0.90,1.23
3,22.6,21.7,19.2,15.7,6.89,6.95,9.67
4,2.6,2.0,1.5,1.3,0.82,0.49,0.64
5,4.5,3.0,2.0,1.4,1.39,0.88,0.34
6,9.5,7.4,4.3,3.2,3.85,1.77,0.33
7,13.2,8.8,5.6,3.8,4.25,2.55,0.61
8,1.7,1.4,0.7,0.5,0.80,0.30,0.00
9,17.0,14.5,11.6,10.0,5.42,3.85,5.46


In [218]:
UM2024 = X_param4_df_2032['UM2024'].tolist()
UM2032 = X_param4_df_2032['UM2032'].tolist()
UM2040 = X_param4_df_2032['UM2040'].tolist()
wast2226 = X_param3_df_2032['wast2226'].tolist()
wast3034 = X_param3_df_2032['wast3034'].tolist()
wast3842 = X_param3_df_2032['wast3842'].tolist()
stu2226 = X_param2_df_2032['stu2226'].tolist()
stu3034 = X_param2_df_2032['stu3034'].tolist()
stu3842 = X_param2_df_2032['stu3842'].tolist()
UN2325 = X_param1_df_2032['UN2325'].tolist()
UN3133 = X_param1_df_2032['UN3133'].tolist()
UN3941 = X_param1_df_2032['UN3941'].tolist()
GHI2024_a = X_ghi_2032['GHI2024'].tolist()
GHI2032_a = X_ghi_2032['GHI2032'].tolist()
GHI2040_a = X_ghi_2032['GHI2040'].tolist()

In [219]:
GHI2024_b = []
GHI2032_b = []
GHI2040_b = []
for i in range(0,len(UM2024)):
    x = (UN2325[i]/3) + (wast2226[i]/6) + (stu2226[i]/6) + (UM2024[i]/3)
    y = (UN3133[i]/3) + (wast3034[i]/6) + (stu3034[i]/6) + (UM2032[i]/3)
    z = (UN3941[i]/3) + (wast3842[i]/6) + (stu3842[i]/6) + (UM2040[i]/3)
    GHI2024_b.append(x)
    GHI2032_b.append(y)
    GHI2040_b.append(z)
    
GHI2024_b = [ round(elem, 2) for elem in GHI2024_b ]
GHI2032_b = [ round(elem, 2) for elem in GHI2032_b ]
GHI2040_b = [ round(elem, 2) for elem in GHI2040_b ]

In [225]:
GHI2024 = []
GHI2032 = []
GHI2040 = []
for i in range(0,len(GHI2024_a)):
    x = (GHI2024_a[i] + GHI2024_b[i])/2
    y = (GHI2032_a[i] + GHI2032_b[i])/2
    z = (GHI2040_a[i] + GHI2040_b[i])/2
    GHI2024.append(x)
    GHI2032.append(y)
    GHI2040.append(z)
    
GHI2024 = [ round(elem, 2) for elem in GHI2024 ]
GHI2032 = [ round(elem, 2) for elem in GHI2032 ]
GHI2040 = [ round(elem, 2) for elem in GHI2040 ]

In [235]:
df1 = pd.DataFrame({'GHI2024': GHI2024,'GHI2032': GHI2032,'GHI2040': GHI2040})

In [236]:
df1

Unnamed: 0,GHI2024,GHI2032,GHI2040
0,18.27,12.80,12.06
1,9.10,4.62,4.56
2,5.83,3.34,3.86
3,19.02,11.64,12.22
4,2.10,2.52,1.71
5,7.60,2.29,3.41
6,11.64,2.62,4.42
7,13.36,12.71,8.45
8,1.78,0.88,0.80
9,12.70,12.05,7.51
