In [871]:
from sklearn.cluster import KMeans
import numpy as np
import csv
import math
import matplotlib.pyplot
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.metrics import confusion_matrix

In [872]:
maxAcc = 0.0
maxIter = 0
C_Lambda = 0.03 #Lambda value (one of the hyper parameter)
TrainingPercent = 80 #80% of the data is taken as the training dataset
ValidationPercent = 10  #10% of the data is taken as the validation dataset
TestPercent = 10 #The remaining 10% of the data is taken as the testing dataset
M = 13 #no of clusters to be formed
PHI = [] #The basis function matrix
IsSynthetic = False

In [873]:
def GenerateTrainingTarget(rawTraining,TrainingPercent = 80):
    #this function divides the target value vector by 80% to the training target vector
    TrainingLen = int(math.ceil(len(rawTraining)*(TrainingPercent*0.01)))
    t           = rawTraining[:TrainingLen]
    #print(str(TrainingPercent) + "% Training Target Generated..")
    return t

def GenerateTrainingDataMatrix(rawData, TrainingPercent = 80):
     #this function divides the input values x by 80% to the training data
    T_len = int(math.ceil(len(rawData[0])*0.01*TrainingPercent))
    d2 = rawData[:,0:T_len]
    #print(str(TrainingPercent) + "% Training Data Generated..")
    return d2

def GenerateValData(rawData, ValPercent, TrainingCount): 
    #this function divides the input values x by 10% to the validation data
    valSize = int(math.ceil(len(rawData[0])*ValPercent*0.01))
    V_End = TrainingCount + valSize
    dataMatrix = rawData[:,TrainingCount+1:V_End]
    #print (str(ValPercent) + "% Val Data Generated..")  
    return dataMatrix

def GenerateValTargetVector(rawData, ValPercent, TrainingCount): 
    #this function divides the target vector t by 10% to the validation target vector
    valSize = int(math.ceil(len(rawData)*ValPercent*0.01))
    V_End = TrainingCount + valSize
    t =rawData[TrainingCount+1:V_End]
    #print (str(ValPercent) + "% Val Target Data Generated..")
    return t

def GenerateBigSigma(Data, MuMatrix,TrainingPercent,IsSynthetic):
    # this function computes the Σ(x−μ) part of the basis function which is basically the variance computation
    BigSigma    = np.zeros((len(Data),len(Data)))
    DataT       = np.transpose(Data)
    TrainingLen = math.ceil(len(DataT)*(TrainingPercent*0.01)) #converting percentage to integer 
    #taking the length of the dataset (80% for training, 10% for validation, 10% for the testing)
    varVect     = []
    for i in range(0,len(DataT[0])):
        vct = []
        for j in range(0,int(TrainingLen)):
            vct.append(Data[i][j])    
        varVect.append(np.var(vct))
    
    for j in range(len(Data)):
        BigSigma[j][j] = varVect[j]
        #the bigsigma matrix consists of the variance values only on the diagonal of the matrix where the remaining values are 0
    if IsSynthetic == True:
        BigSigma = np.dot(3,BigSigma)
    else:
        BigSigma = np.dot(200,BigSigma)
    ##print ("BigSigma Generated..")
    return BigSigma

def GetScalar(DataRow,MuRow, BigSigInv):  
    #this function computes the  (x−μ)TΣ-1(x−μ) part of the basis function by subtracting the mean from the value x and taking the transpose of the resultant row 
    # and multiplying with the row of the inverse of bigsigma i.e. the result from the GenerateBigSigma function
    R = np.subtract(DataRow,MuRow)
    T = np.dot(BigSigInv,np.transpose(R))  
    L = np.dot(R,T)
    return L

def GetRadialBasisOut(DataRow,MuRow, BigSigInv):    
    #this function returns the value of the gradiant radial basis function
    phi_x = math.exp(-0.5*GetScalar(DataRow,MuRow,BigSigInv))
    return phi_x

def GetPhiMatrix(Data, MuMatrix, BigSigma, TrainingPercent = 80):
    #this function creates as PHI-Matrix which is a matrix consisting of the basis function values as its elements
    DataT = np.transpose(Data)
    TrainingLen = math.ceil(len(DataT)*(TrainingPercent*0.01))         
    PHI = np.zeros((int(TrainingLen),len(MuMatrix))) 
    BigSigInv = np.linalg.inv(BigSigma)
    for  C in range(0,len(MuMatrix)):
        for R in range(0,int(TrainingLen)):
            PHI[R][C] = GetRadialBasisOut(DataT[R], MuMatrix[C], BigSigInv) #φ(x) calculation
    #print ("PHI Generated..")
    return PHI

def GetWeightsClosedForm(PHI, T, Lambda):
    #this function calculates the W = (λI+ØT Ø)-1 ØTt equation
    Lambda_I = np.identity(len(PHI[0])) # lambds identity matrix (λI)
    for i in range(0,len(PHI[0])):
        Lambda_I[i][i] = Lambda
    PHI_T       = np.transpose(PHI)    #transpose of Ø
    PHI_SQR     = np.dot(PHI_T,PHI)    # equation ØT Ø
    PHI_SQR_LI  = np.add(Lambda_I,PHI_SQR)#λI+ØT Ø
    PHI_SQR_INV = np.linalg.inv(PHI_SQR_LI) # inverse of λI+ØT Ø
    INTER       = np.dot(PHI_SQR_INV, PHI_T) # (λI+ØT Ø)-1 ØT
    W           = np.dot(INTER, T) #final W value
    ##print ("Training Weights Generated..")
    return W

def GetValTest(VAL_PHI,W):
    #y(x,w) = wT * φ(x) calculation which is the linear regression function
    Y = np.dot(W,np.transpose(VAL_PHI))    # wT * φ(x) calculation
    ##print ("Test Out Generated..")
    return Y

def sigmoid(Y):
    return 1/(1+np.exp(-Y))

def cost_function(x,y,A):
    t1 = np.multiply(-y, np.log(A))
    t2 = np.multiply((1-y), np.log(1- A))
    return np.sum(t1-t2)/(len(x))

def GetErms(VAL_TEST_OUT,ValDataAct):
    #calculation of the root mean square error defined as
    # ERMS = 􏰆pow(2E(w∗)/NV,2)
    sum = 0.0
    t=0
    accuracy = 0.0
    counter = 0
    val = 0.0
    for i in range (0,len(VAL_TEST_OUT)):
        sum = sum + math.pow((ValDataAct[i] - VAL_TEST_OUT[i]),2)
        if(int(np.around(VAL_TEST_OUT[i], 0)) == ValDataAct[i]):
            counter+=1
    accuracy = (float((counter*100))/float(len(VAL_TEST_OUT)))
    ##print ("Accuracy Generated..")
    ##print ("Validation E_RMS : " + str(math.sqrt(sum/len(VAL_TEST_OUT))))
    return (str(accuracy) + ',' +  str(math.sqrt(sum/len(VAL_TEST_OUT))))

def Get_Acc(VAL_TEST_OUT,ValDataAct):
    #calculation of the root mean square error defined as
    # ERMS = 􏰆pow(2E(w∗)/NV,2)
    sum = 0.0
    t=0
    accuracy = 0.0
    counter = 0
    val = 0.0

    for i in range (0,len(VAL_TEST_OUT)):
        #sum = sum + math.pow((ValDataAct[i] - VAL_TEST_OUT[i]),2)
        if(int(np.around(VAL_TEST_OUT[i], 0)) == ValDataAct[i]):
            counter+=1
    accuracy = (float((counter*100))/float(len(VAL_TEST_OUT)))
    ##print ("Accuracy Generated..")
    ##print ("Validation E_RMS : " + str(math.sqrt(sum/len(VAL_TEST_OUT))))
    return (str(accuracy))# + ',' +  str(math.sqrt(sum/len(VAL_TEST_OUT))))


#return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

In [874]:

def gradiant(x,y,A):
    error = A -y
    return np.dot(x.T,error)/len(x)

def Linear_regression(RawData,TrainingData,ValData,TestData,TrainingTarget,ValDataAct,TestDataAct):
    ErmsArr = []
    AccuracyArr = []
    #clustering and mean calculation using the KMeans algorithm from the training data
    kmeans = KMeans(n_clusters=M, random_state=0).fit(np.transpose(TrainingData))
    Mu = kmeans.cluster_centers_

    BigSigma     = GenerateBigSigma(RawData, Mu, TrainingPercent,IsSynthetic) #bigsigma function call on the raw data for the training dataset 
    TRAINING_PHI = GetPhiMatrix(RawData, Mu, BigSigma, TrainingPercent) #the PHI-Matrix calculation
    W            = GetWeightsClosedForm(TRAINING_PHI,TrainingTarget,(C_Lambda)) #weights calculation
    TEST_PHI     = GetPhiMatrix(TestData, Mu, BigSigma, 100) #phi- matrix for the testing dataset
    VAL_PHI      = GetPhiMatrix(ValData, Mu, BigSigma, 100) #phi-matrix for the validation dataset
    TR_TEST_OUT  = GetValTest(TRAINING_PHI,W) #linear regression calculation on the training data
    VAL_TEST_OUT = GetValTest(VAL_PHI,W)#linear regression calculation on the validation data
    TEST_OUT     = GetValTest(TEST_PHI,W)#linear regression calculation on the test data

    TrainingAccuracy   = str(GetErms(TR_TEST_OUT,TrainingTarget))
    ValidationAccuracy = str(GetErms(VAL_TEST_OUT,ValDataAct))
    TestAccuracy       = str(GetErms(TEST_OUT,TestDataAct))
    print ('UBITname      = saikalya')
    print ('Person Number = 50292522')
    print ("M = "+str(M)+" \nLambda = "+str(C_Lambda))
    print ("E_rms Training   = " + str(float(TrainingAccuracy.split(',')[1])))
    print ("E_rms Validation = " + str(float(ValidationAccuracy.split(',')[1])))
    print ("E_rms Testing    = " + str(float(TestAccuracy.split(',')[1])))
    W_Now        = np.dot(220, W)
    La           = 2 #lambda value for the stochastic gradient descent solution
    learningRate = 0.01 
    L_Erms_Val   = []
    L_Erms_TR    = []
    L_Erms_Test  = []
    W_Mat        = []

    for i in range(0,400):
        #∇E = ∇ED + λ∇EW computation
        #print ('---------Iteration: ' + str(i) + '--------------')
        Delta_E_D     = -np.dot((TrainingTarget[i] - np.dot(np.transpose(W_Now),TRAINING_PHI[i])),TRAINING_PHI[i])
        # ∇ED calculation where ∇ED =−(t −w⊤φ(x))φ(x)
        La_Delta_E_W  = np.dot(La,W_Now) #λ∇EW
        Delta_E       = np.add(Delta_E_D,La_Delta_E_W)    
        Delta_W       = -np.dot(learningRate,Delta_E) #∆w = −η∇E 
        W_T_Next      = W_Now + Delta_W
        W_Now         = W_T_Next

        #Erms values for the training, validation and the testing data
        #-----------------TrainingData Accuracy---------------------#
        TR_TEST_OUT   = GetValTest(TRAINING_PHI,W_T_Next) 
        Erms_TR       = GetErms(TR_TEST_OUT,TrainingTarget)
        L_Erms_TR.append(float(Erms_TR.split(',')[1]))

        #-----------------ValidationData Accuracy---------------------#
        VAL_TEST_OUT  = GetValTest(VAL_PHI,W_T_Next) 
        Erms_Val      = GetErms(VAL_TEST_OUT,ValDataAct)
        L_Erms_Val.append(float(Erms_Val.split(',')[1]))

        #-----------------TestingData Accuracy---------------------#
        TEST_OUT      = GetValTest(TEST_PHI,W_T_Next) 
        Erms_Test = GetErms(TEST_OUT,TestDataAct)
        L_Erms_Test.append(float(Erms_Test.split(',')[1]))
    print ('----------Gradient Descent Solution--------------------')
    print ("M = "+str(M)+" \nLambda  = "+str(La)+"\neta=0.01")
    print ("E_rms Training   = " + str(np.around(min(L_Erms_TR),5)))
    print ("E_rms Validation = " + str(np.around(max(L_Erms_Val),5)))
    print ("E_rms Testing    = " + str(np.around(min(L_Erms_Test),5)))

def Logistic_Regression(Data,Target):
    Data = np.transpose(Data)
    Target = np.array(Target)
    W=[0 for i in range(len(Data[0]))]
    W= np.matrix(W) #thetha value now
    L_Erms    = []
    L_Acc = []
    Data = np.matrix(Data)
    Target = np.transpose(np.matrix(Target))
    for i in range(200):
        A= sigmoid(Data*W.T)  #applying sigmoid function on Data*WT
        grad = gradiant(Data,Target,A) #calculating gradiant xT(A-y)
        delta_W = 0.001*grad
        W_next = W - np.transpose(delta_W) #W_next = W_now - l_r * gradiant
        W = W_next
        Acc = Get_Acc(A,Target)
        L_Acc.append(float(Acc))
    print ("Accuracy = " + str(np.around(max(L_Acc),5)))  

In [875]:
#importing the datasets
HO = pd.read_csv('HumanObserved-Features-Data.csv')
SP = pd.read_csv('same_pairs.csv')
DP = pd.read_csv('diffn_pairs.csv')
GSC = pd.read_csv('GSC-Features.csv')
GSC_SP = pd.read_csv('gsc_same_pairs.csv')
GSC_DP = pd.read_csv('gsc_diffn_pairs.csv')

In [876]:
#pre-processing
#Human-Observed-Features-Concatinated dataset

DP1, DP2 =  np.split(DP, [len(SP)], axis=0)
HO_concatinated = pd.concat([SP, DP1],axis=0)
HO_subtracted = HO_concatinated
HO_concatinated = pd.merge(HO_concatinated, HO, left_on=['img_id_A'],right_on=['img_id'],how='inner')
HO_concatinated = pd.merge(HO_concatinated, HO, left_on=['img_id_B'],right_on=['img_id'],how='inner')
HO_concatinated= HO_concatinated.drop(columns=['img_id_x','Unnamed: 0_x','img_id_y','Unnamed: 0_y'])
print(HO_concatinated)



     img_id_A img_id_B  target  f1_x  f2_x  f3_x  f4_x  f5_x  f6_x  f7_x  \
0       0359a    0359b       1     2     1     1     0     2     2     0   
1       0359a    0577a       0     2     1     1     0     2     2     0   
2       0359a    1120a       0     2     1     1     0     2     2     0   
3       0577a    1120a       0     2     1     1     0     2     2     0   
4       0359a    1120b       0     2     1     1     0     2     2     0   
5       0577a    1120b       0     2     1     1     0     2     2     0   
6       1120a    1120b       1     2     1     1     3     2     2     0   
7       0359a    1120c       0     2     1     1     0     2     2     0   
8       0577a    1120c       0     2     1     1     0     2     2     0   
9       1120a    1120c       1     2     1     1     3     2     2     0   
10      1120b    1120c       1     1     1     1     0     2     2     0   
11      0359a    1121a       0     2     1     1     0     2     2     0   
12      0577

In [877]:
#Human-Observed-features-Subtracted- dataset

HO_subtracted1 = pd.merge(HO_subtracted, HO, left_on=['img_id_A'],right_on=['img_id'],how='inner')
HO_subtracted2 = pd.merge(HO_subtracted, HO, left_on=['img_id_B'],right_on=['img_id'],how='inner')
HO_subtracted_1= HO_subtracted1.drop(columns=['img_id','Unnamed: 0','img_id_A','img_id_B','target'])
HO_subtracted_2= HO_subtracted2.drop(columns=['img_id','Unnamed: 0','img_id_A','img_id_B','target'])
HO_subtracted_1= HO_subtracted_1.sub(HO_subtracted_2,axis =0)
HO_subtracted_1= np.abs(HO_subtracted_1)
HO_subtracted_1.insert(0, "img_id_A", HO_subtracted1.img_id_A, allow_duplicates=False)
HO_subtracted_1.insert(1, "img_id_B", HO_subtracted1.img_id_B, allow_duplicates=False)
HO_subtracted_1.insert(2, "target", HO_subtracted1.target, allow_duplicates=False)
print(HO_subtracted_1)

     img_id_A img_id_B  target  f1  f2  f3  f4  f5  f6  f7  f8  f9
0       0359a    0359b       1   1   1   0   0   0   0   3   2   0
1       0359a    0577a       0   0   0   1   3   0   0   1   0   0
2       0359a    1120a       0   0   0   1   3   0   0   1   0   0
3       0359a    1120b       0   1   0   0   1   0   1   0   2   0
4       0359a    1120c       0   1   0   0   1   0   1   0   2   0
5       0359a    1121a       0   1   0   0   1   0   1   0   2   0
6       0359a    1121b       0   1   0   0   0   0   0   0   0   0
7       0359a    1121c       0   1   0   0   0   0   0   0   0   0
8       0359a    1229b       0   1   0   0   0   0   0   0   0   0
9       0359a    1302a       0   0   0   0   0   0   0   0   2   0
10      0359a    1302b       0   0   0   0   0   0   0   0   2   0
11      0359a    1302c       0   0   0   0   0   0   0   0   2   0
12      0359a    1305a       0   0   0   0   0   0   0   0   2   0
13      0359a    1305b       0   0   0   0   0   0   0   0   1

In [878]:
#GSC_Concatinated dataset

GSC_SP1, GSC_SP2 =  np.split(GSC_SP, [5000], axis=0)
GSC_DP1, GSC_DP2 =  np.split(GSC_DP, [5000], axis=0)
GSC_concatinated = pd.concat([GSC_SP1, GSC_DP1],axis=0)
GSC_concatinated = pd.merge(GSC_concatinated, GSC, left_on=['img_id_A'],right_on=['img_id'])
GSC_concatinated = pd.merge(GSC_concatinated, GSC, left_on=['img_id_B'],right_on=['img_id'])
GSC_concatinated= GSC_concatinated.drop(columns=['img_id_x','img_id_y'])
numeric_cols = [i for i in GSC_concatinated if GSC_concatinated[i].dtype.kind != 'O']
numeric_cols.remove('target')
GSC_concatinated[numeric_cols] += 0.0001
print(GSC_concatinated)

            img_id_A        img_id_B  target    f1_x    f2_x    f3_x    f4_x  \
0     0001a_num1.png  0001a_num2.png       1  0.0001  0.0001  0.0001  0.0001   
1     0001a_num1.png  0001a_num3.png       1  0.0001  0.0001  0.0001  0.0001   
2     0001a_num2.png  0001a_num3.png       1  0.0001  0.0001  0.0001  0.0001   
3     0001a_num1.png  0001a_num4.png       1  0.0001  0.0001  0.0001  0.0001   
4     0001a_num2.png  0001a_num4.png       1  0.0001  0.0001  0.0001  0.0001   
5     0001a_num3.png  0001a_num4.png       1  0.0001  0.0001  0.0001  0.0001   
6     0001a_num1.png  0001a_num5.png       1  0.0001  0.0001  0.0001  0.0001   
7     0001a_num2.png  0001a_num5.png       1  0.0001  0.0001  0.0001  0.0001   
8     0001a_num3.png  0001a_num5.png       1  0.0001  0.0001  0.0001  0.0001   
9     0001a_num4.png  0001a_num5.png       1  0.0001  0.0001  0.0001  0.0001   
10    0001a_num1.png  0001b_num1.png       1  0.0001  0.0001  0.0001  0.0001   
11    0001a_num2.png  0001b_num1.png    

In [879]:
#GSC_Subtracted dataset

GSC_subtracted1 = pd.merge(GSC_subtracted, GSC, left_on=['img_id_A'],right_on=['img_id'])
GSC_subtracted2 = pd.merge(GSC_subtracted, GSC, left_on=['img_id_B'],right_on=['img_id'])
GSC_subtracted_1= GSC_subtracted1.drop(columns=['img_id','img_id_A','img_id_B','target'])
GSC_subtracted_2= GSC_subtracted2.drop(columns=['img_id','img_id_A','img_id_B','target'])
GSC_subtracted_1.sub(GSC_subtracted_2, axis=0)
GSC_subtracted_1.insert(0, "img_id_A", GSC_subtracted1.img_id_A, allow_duplicates=False)
GSC_subtracted_1.insert(1, "img_id_B", GSC_subtracted1.img_id_B, allow_duplicates=False)
GSC_subtracted_1.insert(2, "target", GSC_subtracted1.target, allow_duplicates=False)
numeric_cols = [i for i in GSC_subtracted_1 if GSC_subtracted_1[i].dtype.kind != 'O']
numeric_cols.remove('target')
GSC_subtracted_1[numeric_cols] += 0.0001
print(GSC_subtracted_1)

            img_id_A        img_id_B  target      f1      f2      f3      f4  \
0     0001a_num1.png  0001a_num2.png       1  0.0001  0.0001  0.0001  0.0001   
1     0001a_num1.png  0001a_num3.png       1  0.0001  0.0001  0.0001  0.0001   
2     0001a_num1.png  0001a_num4.png       1  0.0001  0.0001  0.0001  0.0001   
3     0001a_num1.png  0001a_num5.png       1  0.0001  0.0001  0.0001  0.0001   
4     0001a_num1.png  0001b_num1.png       1  0.0001  0.0001  0.0001  0.0001   
5     0001a_num1.png  0001b_num2.png       1  0.0001  0.0001  0.0001  0.0001   
6     0001a_num1.png  0001b_num3.png       1  0.0001  0.0001  0.0001  0.0001   
7     0001a_num1.png  0001b_num4.png       1  0.0001  0.0001  0.0001  0.0001   
8     0001a_num1.png  0001b_num5.png       1  0.0001  0.0001  0.0001  0.0001   
9     0001a_num1.png  0001c_num1.png       1  0.0001  0.0001  0.0001  0.0001   
10    0001a_num1.png  0001c_num2.png       1  0.0001  0.0001  0.0001  0.0001   
11    0001a_num1.png  0001c_num3.png    

In [880]:
#Human_Observed_concatinated
HO_concatinated= HO_concatinated.sample(frac=1)
HO_subtracted_1 = HO_subtracted_1.sample(frac=1)
GSC_concatinated = GSC_concatinated.sample(frac=1)
GSC_subtracted_1 = GSC_subtracted_1.sample(frac=1)

target_HOC = np.array(HO_concatinated.target)
target_HOC = target_HOC.tolist()
Rawdata_HOC = HO_concatinated.drop(columns=['img_id_A', 'img_id_B' , 'target'])
Rawdata_HOC = np.transpose(Rawdata_HOC)
Rawdata_HOC = np.array(Rawdata_HOC)
print(len(target_HOC))
print(Rawdata_HOC.shape)

#Human_Observed_subtracted
target_HOS = np.array(HO_subtracted_1.target)
target_HOS = target_HOS.tolist()
Rawdata_HOS = HO_subtracted_1.drop(columns=['img_id_A', 'img_id_B' , 'target'])
Rawdata_HOS = np.transpose(Rawdata_HOS)
Rawdata_HOS = np.array(Rawdata_HOS)
print(len(target_HOS))
print(Rawdata_HOS.shape)

#GSC_concatinated
target_GSCC = np.array(GSC_concatinated.target)
target_GSCC = target_GSCC.tolist()
Rawdata_GSCC = GSC_concatinated.drop(columns=['img_id_A', 'img_id_B' , 'target'])
Rawdata_GSCC = np.transpose(Rawdata_GSCC)
Rawdata_GSCC = np.array(Rawdata_GSCC)
print(len(target_GSCC))
print(Rawdata_GSCC.shape)

#GSC_subtracted
target_GSCS = np.array(GSC_subtracted_1.target)
target_GSCS = target_GSCS.tolist()
Rawdata_GSCS = GSC_subtracted_1.drop(columns=['img_id_A', 'img_id_B' , 'target'])
Rawdata_GSCS = np.transpose(Rawdata_GSCS)
Rawdata_GSCS = np.array(Rawdata_GSCS)
print(len(target_GSCS))
print(Rawdata_GSCS.shape)

1582
(18, 1582)
1582
(9, 1582)
10000
(1024, 10000)
10000
(512, 10000)


In [881]:
#generating the training, validation and testing datasets
#Human_Observed_concatinated

HOC_TrainingTarget = np.array(GenerateTrainingTarget(target_HOC,TrainingPercent))
HOC_TrainingData   = GenerateTrainingDataMatrix(Rawdata_HOC,TrainingPercent)
print(HOC_TrainingData.shape)
print(HOC_TrainingTarget.shape)

HOC_ValDataAct = np.array(GenerateValTargetVector(target_HOC,ValidationPercent, (len(HOC_TrainingTarget))))
HOC_ValData    = GenerateValData(Rawdata_HOC,ValidationPercent, (len(HOC_TrainingTarget)))
print(HOC_ValDataAct.shape)
print(HOC_ValData.shape)

HOC_TestDataAct = np.array(GenerateValTargetVector(target_HOC,TestPercent, (len(HOC_TrainingTarget)+len(HOC_ValDataAct))))
HOC_TestData = GenerateValData(Rawdata_HOC,TestPercent, (len(HOC_TrainingTarget)+len(HOC_ValDataAct)))
print(HOC_TestDataAct.shape)
print(HOC_TestData.shape)

#Human_Observed_subtracted

HOS_TrainingTarget = np.array(GenerateTrainingTarget(target_HOS,TrainingPercent))
HOS_TrainingData   = GenerateTrainingDataMatrix(Rawdata_HOS,TrainingPercent)
print(HOS_TrainingData.shape)
print(HOS_TrainingTarget.shape)

HOS_ValDataAct = np.array(GenerateValTargetVector(target_HOS,ValidationPercent, (len(HOS_TrainingTarget))))
HOS_ValData    = GenerateValData(Rawdata_HOS,ValidationPercent, (len(HOS_TrainingTarget)))
print(HOS_ValDataAct.shape)
print(HOS_ValData.shape)

HOS_TestDataAct = np.array(GenerateValTargetVector(target_HOS,TestPercent, (len(HOS_TrainingTarget)+len(HOS_ValDataAct))))
HOS_TestData = GenerateValData(Rawdata_HOS,TestPercent, (len(HOS_TrainingTarget)+len(HOS_ValDataAct)))
print(HOS_TestDataAct.shape)
print(HOS_TestData.shape)

#GSC_concatinated

GSCC_TrainingTarget = np.array(GenerateTrainingTarget(target_GSCC,TrainingPercent))
GSCC_TrainingData   = GenerateTrainingDataMatrix(Rawdata_GSCC,TrainingPercent)
print(GSCC_TrainingData.shape)
print(GSCC_TrainingTarget.shape)

GSCC_ValDataAct = np.array(GenerateValTargetVector(target_GSCC,ValidationPercent, (len(GSCC_TrainingTarget))))
GSCC_ValData    = GenerateValData(Rawdata_GSCC,ValidationPercent, (len(GSCC_TrainingTarget)))
print(GSCC_ValDataAct.shape)
print(GSCC_ValData.shape)

GSCC_TestDataAct = np.array(GenerateValTargetVector(target_GSCC,TestPercent, (len(GSCC_TrainingTarget)+len(GSCC_ValDataAct))))
GSCC_TestData = GenerateValData(Rawdata_GSCC,TestPercent, (len(GSCC_TrainingTarget)+len(GSCC_ValDataAct)))
print(GSCC_TestDataAct.shape)
print(GSCC_TestData.shape)

#GSC_Subtracted

GSCS_TrainingTarget = np.array(GenerateTrainingTarget(target_GSCS,TrainingPercent))
GSCS_TrainingData   = GenerateTrainingDataMatrix(Rawdata_GSCS,TrainingPercent)
print(GSCS_TrainingData.shape)
print(GSCS_TrainingTarget.shape)

GSCS_ValDataAct = np.array(GenerateValTargetVector(target_GSCS,ValidationPercent, (len(GSCS_TrainingTarget))))
GSCS_ValData    = GenerateValData(Rawdata_GSCS,ValidationPercent, (len(GSCS_TrainingTarget)))
print(GSCS_ValDataAct.shape)
print(GSCS_ValData.shape)

GSCS_TestDataAct = np.array(GenerateValTargetVector(target_GSCS,TestPercent, (len(GSCS_TrainingTarget)+len(GSCS_ValDataAct))))
GSCS_TestData = GenerateValData(Rawdata_GSCS,TestPercent, (len(GSCS_TrainingTarget)+len(GSCS_ValDataAct)))
print(GSCS_TestDataAct.shape)
print(GSCS_TestData.shape)


(18, 1266)
(1266,)
(158,)
(18, 158)
(157,)
(18, 157)
(9, 1266)
(1266,)
(158,)
(9, 158)
(157,)
(9, 157)
(1024, 8000)
(8000,)
(999,)
(1024, 999)
(999,)
(1024, 999)
(512, 8000)
(8000,)
(999,)
(512, 999)
(999,)
(512, 999)


In [882]:
Linear_regression(Rawdata_HOC,HOC_TrainingData,HOC_ValData,HOC_TestData,HOC_TrainingTarget,HOC_ValDataAct,HOC_TestDataAct)

UBITname      = saikalya
Person Number = 50292522
M = 13 
Lambda = 0.03
E_rms Training   = 0.3415937515788723
E_rms Validation = 0.33158969082546114
E_rms Testing    = 0.3125147661442923
----------Gradient Descent Solution--------------------
M = 13 
Lambda  = 2
eta=0.01
E_rms Training   = 0.3477
E_rms Validation = 130.23562
E_rms Testing    = 0.3092


In [883]:
Linear_regression(Rawdata_HOS,HOS_TrainingData,HOS_ValData,HOS_TestData,HOS_TrainingTarget,HOS_ValDataAct,HOS_TestDataAct)

UBITname      = saikalya
Person Number = 50292522
M = 13 
Lambda = 0.03
E_rms Training   = 0.42953835667247176
E_rms Validation = 0.4327214528179625
E_rms Testing    = 0.43618748766592463
----------Gradient Descent Solution--------------------
M = 13 
Lambda  = 2
eta=0.01
E_rms Training   = 0.42959
E_rms Validation = 94.91724
E_rms Testing    = 0.43493


In [None]:
Linear_regression(Rawdata_GSCC,GSCC_TrainingData,GSCC_ValData,GSCC_TestData,GSCC_TrainingTarget,GSCC_ValDataAct,GSCC_TestDataAct)

In [None]:
Linear_regression(Rawdata_GSCS,GSCS_TrainingData,GSCS_ValData,GSCS_TestData,GSCS_TrainingTarget,GSCS_ValDataAct,GSCS_TestDataAct)

In [None]:
print("Training")
Logistic_Regression(HOC_TrainingData,HOC_TrainingTarget)
print("Validation")
Logistic_Regression(HOC_ValData,HOC_ValDataAct)
print("Testing")
Logistic_Regression(HOC_TestData,HOC_TestDataAct)

In [None]:
print("Training")
Logistic_Regression(HOS_TrainingData,HOS_TrainingTarget)
print("Validation")
Logistic_Regression(HOS_ValData,HOS_ValDataAct)
print("Testing")
Logistic_Regression(HOS_TestData,HOS_TestDataAct)

In [None]:
print("Training")
Logistic_Regression(GSCC_TrainingData,GSCC_TrainingTarget)
print("Validation")
Logistic_Regression(GSCC_ValData,GSCC_ValDataAct)
print("Testing")
Logistic_Regression(GSCC_TestData,GSCC_TestDataAct)

In [None]:
print("Training")
Logistic_Regression(GSCS_TrainingData,GSCS_TrainingTarget)
print("Validation")
Logistic_Regression(GSCS_ValData,GSCS_ValDataAct)
print("Testing")
Logistic_Regression(GSCS_TestData,GSCS_TestDataAct)