## Data load

Machine Learning Algorithms

Dates: 2022-4-8

Author: Yung-Kyun Noh

Department of Computer Science, Hanyang University & KIAS


## Regression dataset #1

In [1]:
import pickle
import numpy as np
from lifelines.utils import concordance_index
import urllib


In [2]:
# with open('Regression_Dataset1', 'rb') as handle:
#     data = pickle.load(handle)
#     y = pickle.load(handle)
#     testdata = pickle.load(handle)
#     testy = pickle.load(handle)

target_url = 'http://aais.hanyang.ac.kr/classes/2022_Datasets/Regression_Dataset1'
with urllib.request.urlopen(target_url) as handle:
    data = pickle.load(handle)
    y = pickle.load(handle)
    testdata = pickle.load(handle)
    testy = pickle.load(handle)


In [3]:
XXT = np.matmul(data.T, data)
Xy = np.matmul(data.T, y)

sigma_sq = 1
sigma_sq0 = 5
invXXT = np.linalg.inv(XXT + sigma_sq/sigma_sq0*np.eye(len(XXT)))


In [4]:
pred_y = np.matmul(data, np.matmul(invXXT, Xy))
print(y, '\n', pred_y)
print('CI:', concordance_index(y, pred_y))
print('\n')

pred_y = np.matmul(testdata, np.matmul(invXXT, Xy))
print(testy, '\n', pred_y)
print('test CI:', concordance_index(testy, pred_y))


[2 4 1 ... 8 7 6] 
 [3.21280851 2.877127   1.96875164 ... 4.68229121 7.19685805 4.59707267]
CI: 0.8290274047762353


[5 0 9 ... 3 1 6] 
 [5.67676888 2.0197942  7.1502184  ... 4.51212968 1.83149875 4.14558866]
test CI: 0.8265978241005745


In [5]:
# Variance
# testnum = len(testdata)
testnum = 5
test_vars = np.zeros(testnum)

for itest in range(testnum):
    X = np.concatenate([testdata[itest].reshape([1,-1]), data])
    XXT = np.matmul(X.T, X)
    invXXT = np.linalg.inv(XXT + sigma_sq/sigma_sq0*np.eye(len(XXT)))
    test_vars[itest] = sigma_sq/ \
        (1 - np.matmul(testdata[itest], np.matmul(invXXT, testdata[itest])))
print(test_vars[0:testnum])


[1.03123976 1.01262719 1.01086518 1.01522543 1.00760819]


## Regression dataset #2

In [6]:
# with open('Regression_Dataset2', 'rb') as handle:
#     data = pickle.load(handle)
#     y = pickle.load(handle)
#     testdata = pickle.load(handle)
#     testy = pickle.load(handle)

target_url = 'http://aais.hanyang.ac.kr/classes/2022_Datasets/Regression_Dataset2'
with urllib.request.urlopen(target_url) as handle:
    data = pickle.load(handle)
    y = pickle.load(handle)
    testdata = pickle.load(handle)
    testy = pickle.load(handle)

In [7]:
XXT = np.matmul(data.T, data)
Xy = np.matmul(data.T, y)

sigma_sq = 10000
sigma_sq0 = 100
invXXT = np.linalg.inv(XXT + sigma_sq/sigma_sq0*np.eye(len(XXT)))


In [8]:
pred_y = np.matmul(data, np.matmul(invXXT, Xy))
print(y.T, '\n', np.floor(pred_y.T))
print('CI:', concordance_index(y, pred_y))
print('\n')

pred_y = np.matmul(testdata, np.matmul(invXXT, Xy))
print('test prediction:')
print(testy[0:10].T, '\n', np.floor(pred_y[0:10].T))
print('CI:', concordance_index(testy, pred_y))


[[190000 214000 174500 ... 212000 139000 181000]] 
 [[197540. 195827. 176544. ... 218326. 101610. 158389.]]
CI: 0.8593857558909187


test prediction:
[[122000 138500 145000 169000 162900 105000 147000 179600 142500 203000]] 
 [[70239500. 31493347. 34072926. 72821997. 49692925. 36252314. 53620118.
  42529231. 34177254. 38148129.]]
CI: 0.7424007859715979


In [9]:
num = len(data)
# num = 5
vars = np.zeros(num)

for i in range(num):
    X = np.concatenate([data[i].reshape([1,-1]), data])
    XXT = np.matmul(X.T, X)
    invXXT = np.linalg.inv(XXT + sigma_sq/sigma_sq0*np.eye(len(XXT)))
    vars[i] = sigma_sq/ \
        (1 - np.matmul(data[i], np.matmul(invXXT, data[i])))
print(vars[0:num])

[10018.7232647  10011.96975628 10032.83894465 ... 10029.91047951
 10016.87946753 10011.2242802 ]


In [10]:

testnum = len(testdata)
test_vars = np.zeros(testnum)

for itest in range(testnum):
    X = np.concatenate([testdata[itest].reshape([1,-1]), data])
    XXT = np.matmul(X.T, X)
    invXXT = np.linalg.inv(XXT + sigma_sq/sigma_sq0*np.eye(len(XXT)))
    test_vars[itest] = sigma_sq/ \
        (1 - np.matmul(testdata[itest], np.matmul(invXXT, testdata[itest])))
print(test_vars[0:testnum])

[6.42579150e+07 1.27778074e+07 1.49442536e+07 6.92453791e+07
 3.19662716e+07 1.70367526e+07 3.71806863e+07 2.33305662e+07
 1.50179788e+07 1.87181844e+07 3.34031182e+07 1.24932736e+07
 3.49391759e+07 2.78367401e+07 1.28109141e+07 4.27124884e+07
 3.93826322e+07 6.32922430e+07 1.53916635e+07 2.97312889e+07
 1.04511871e+07 3.07464772e+07 2.57580088e+07 4.46144934e+07
 2.12624276e+07 1.03825908e+07 1.71290800e+07 8.28452323e+06
 3.92886606e+07 6.06725656e+07 6.50857524e+07 3.99984781e+07
 4.69934712e+07 7.45707999e+06 1.27291961e+07 1.34131783e+07
 2.89862072e+07 2.68049077e+07 1.46884941e+07 5.22713625e+07
 8.61362067e+06 1.46363688e+07 2.59587542e+07 8.62238190e+06
 3.06710282e+07 3.43572532e+07 8.96125996e+07 3.40165286e+07
 1.46999771e+07 4.12219179e+07 8.02352485e+07 5.27588826e+07
 2.18273474e+07 1.53212290e+07 8.00955646e+06 8.28452323e+06
 3.63961602e+07 1.13093680e+07 5.14379459e+07 4.25791139e+07
 1.56728250e+07 9.12674707e+07 2.80555650e+07 4.64611210e+07
 2.68430044e+07 4.971821