In [10]:
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import numpy as np
import os.path
import csv
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn import grid_search
import sklearn.linear_model as linear_model
import sklearn.metrics as metrics
from sklearn import preprocessing

class PatientPhenotype:
    
    def __init__(self, eid, case, sex, yearBirth):
        
        self.eid = eid
        self.case = case
        self.sex = sex
        self.yearBirth = yearBirth
        self.snps = {}
        
    def getEid(self):
        return self.eid
     
    def getCase(self):
        return self.case
    
    def getSex(self):
        return self.sex
    
    def getYearBirth(self):
        return self.yearBirth
        
    def addSnps(self, snpId, allele1,allele2):
        self.snps[snpId] = Snp(snpId,allele1,allele2)
        
    def snpCode(self,chromosomes):
    
         for i in range(len(chromosomes.keys())):
    
            chro = 'chr'+str(i+1)
            print(chro)
            for snp in chromosomes[chro].keys():
                
                allele1 = chromosomes[chro][snp][0]
                allele2 = chromosomes[chro][snp][1]
    
                self.snps[snp].setSnpCode(allele1,allele2)
        
    def getSnpCode(self,snpId):
        return self.snps[snpId].getSnpCode()
        
    def getSize(self):
        return len(self.snps)
        
        
class Snp:
    
    def __init__(self,snpId,allele1,allele2):
        
        self.snpId = snpId
        self.allele1 = allele1
        self.allele2 = allele2
        self.snpCode = -1
        
    def getId(self):
        
        return self.snpId
        
    def getAllele1(self):
        
        return self.allele1
        
    def getAllele2(self):
        
        return self.allele2
        
    def setSnpCode(self,allele1,allele2):
      
        if self.allele1 == allele1 and self.allele2 == allele1:
            code = 2
           
        elif self.allele1 == allele1 and self.allele2 != allele1:
            code = 1
           
        elif self.allele1 != allele1 and self.allele2 == allele1:
            code = 1
            
        elif self.allele1 != allele1 and self.allele2 != allele1:
            code = 0
            
        self.snpCode = code
        
    def getSnpCode(self):
        
        return self.snpCode
    
class Read:
    def __init__(self,path,numberOfChromosomes):
        
        self.chromosomes = {}
        self.numberOfSnps = 0
        self.path = path
        self.numberOfChromosomes = numberOfChromosomes
        
    def readPatients(self,kind):
        patients = {}
        f = open(self.path + kind,'r')
        f.readline()

        for line in f:
            patients[line.split()[0]] = PatientPhenotype(line.split()[0],line.split()[3],line.split()[1],line.split()[2])
        
        return patients
    
    def readSnps(self,fileKind):
        
        for i in range(self.numberOfChromosomes):
    
            chro = 'chr'+str(i+1)
            path = self.path + chro + fileKind
            f = open(path,'r')
            f.readline()
    
            self.chromosomes[chro] = self.__readSnpsOfChromosome(f)
        
        return self.chromosomes
    
    def __readSnpsOfChromosome(self,file):
        
        snps = {}   
        for line in file:
            
            alleles = []
            alleles.append(line.split()[3])
            alleles.append(line.split()[6])
            snps[line.split()[1]] = alleles
            self.numberOfSnps += 1
            
        return snps
        
    def readLgen(self,patients,kind):
    
        for i in range(self.numberOfChromosomes):
    
            chro = 'chr'+str(i+1)
            path = self.path + chro + kind +'.lgen'
    
            if os.path.exists(path):
                
                f = open(path,'r')
    
                for line in f:
                    patients[line.split()[0]].addSnps(line.split()[2], line.split()[3],line.split()[4])
        
        return patients
    
    def getListOfSnps(self):
        snps = []
        for i in range(self.numberOfChromosomes):
            chro = 'chr'+str(i+1)
            for snp in self.chromosomes[chro].keys():
                snps.append(snp)
        
        return snps
        
    def getNumberOfSnps(self):
        
        return self.numberOfSnps
    
class Write:
    
    def __init__(self,path,numberOfChromosomes):
        
        self.path = path
        self.numberOfChromosomes = numberOfChromosomes
        
    def writePatientsList(self,patients,kind):
        
        path = self.path + kind
        write = open(path,'w')
        for patient in patients.keys():
            write.write(patient + '\n')
            
        write.close()
        
        
    def writeSnpsList(self,chromosomes):
        
        for i in range(self.numberOfChromosomes):
    
            chro = 'chr'+str(i+1)
            path = self.path + chro + 'snpList.txt'
            write = open(path,'w')
            for snp in chromosomes[chro].keys():
                write.write(snp + '\n')
            
            write.close()
            
class DataSet:
    
    
    
    def __init__(self,patients,ids):
        
        self.n = len(ids['patients']['nameToId'].keys())
        self.m =len(ids['snps']['nameToId'].keys()) 
        self.patients = patients
        self.ids = ids
                     
        self.xTable = np.zeros((self.n,self.m),dtype = int)
        self.yTable = np.zeros((self.n,1),dtype = int)
                     
        self.__fillXTable()
        self.__fillYTable()
                     
                     
    def __fillXTable(self):
    
        for i in range(self.n):
            for j in range(self.m):
        
                patient = self.ids['patients']['idToName'][i]
                snp = self.ids['snps']['idToName'][j]
        
                self.xTable[i,j] = self.patients[patient].getSnpCode(snp)
                     
    def __fillYTable(self):
    
        for i in range(self.n):
    
            patient = self.ids['patients']['idToName'][i]
            self.yTable[i] = self.patients[patient].getCase()
        
    def getXTable(self):
                     
        return self.xTable
                     
    def getYTable(self):
                     
        return self.yTable
    

def setIdToName(aList):
    
    ids = {}
    nameToId = {}
    idToName = {}
    count = 0
    
    for i in aList:
        
        nameToId[i] = count
        idToName[count] = i
        count += 1
        
    ids['nameToId'] = nameToId
    ids['idToName'] = idToName
    
    return ids


def setSnpsCode(patients,chromosomes):
    
    for i in patients.keys():
        patients[i].snpCode(chromosomes)
        
    return patients
    
    
def mergeXTrainXTestTable(test,train):
    
    n = len(test) + len(train)
    m = len(test.T)
    count = 0
    
    mergeTable = np.zeros((n,m),dtype = int)
    
    for i in range(len(train)):
        for j in range(len(train.T)):
            mergeTable[count,j] = train[i,j]
        count += 1
    
    
    for i in range(len(test)):
        for j in range(len(test.T)):
            mergeTable[count,j] = test[i,j]
        count += 1

        
    return mergeTable


def mergeYTrainYTestTable(test,train):
    
    n = len(test) + len(train)
   
    count = 0
    
    mergeTable = np.zeros((n,1),dtype = int)
    
    for i in range(len(train)):
        mergeTable[count] = train[i]
        count += 1
    
    for i in range(len(test)):
        mergeTable[count] = test[i]
        count += 1
        
    return mergeTable

In [11]:
path = 'C:\\Users\\Antonis\\Desktop\\test1\\'
numberOfChromosomes = 8
patientsTrain = {}
patientsTest = {}
chromosomes = {}

read = Read(path,numberOfChromosomes)
write = Write(path,numberOfChromosomes)

patientsTrain = read.readPatients('phenotype_euro_train.txt')
patientsTest = read.readPatients('phenotype_euro_test.txt')

chromosomes = read.readSnps(".assoc.fisher")

write.writePatientsList(patientsTrain,'trainPatient.txt')
write.writePatientsList(patientsTest,'testPatient.txt')

write.writeSnpsList(chromosomes)


In [12]:
for i in patientsTest.keys():
    print(patientsTest[i].getSize())
    
print(read.getNumberOfSnps())

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
42


# run train_lgen bat and test_leg bat

In [13]:
patientsTrain = read.readLgen(patientsTrain,'train')
patientsTest = read.readLgen(patientsTest,'test')

patientsTrain = setSnpsCode(patientsTrain,chromosomes)
patientsTest = setSnpsCode(patientsTest,chromosomes)

snps = read.getListOfSnps()

ids = {} 
idsTest = {}

ids['patients'] = setIdToName(list(patientsTrain.keys()))
ids['snps'] = setIdToName(snps)

idsTest['patients'] = setIdToName(list(patientsTest.keys()))
idsTest['snps'] = ids['snps']




chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8


In [14]:
trainSet = DataSet(patientsTrain,ids)
testSet = DataSet(patientsTest,idsTest)

xTraining = trainSet.getXTable()
yTraining = trainSet.getYTable()
        
xTest = testSet.getXTable()
yTest = testSet.getYTable()


mergeXtable = mergeXTrainXTestTable(xTraining,xTest)
mergeYtable = mergeYTrainYTestTable(yTraining,yTest)
    


In [18]:
count = 0

for patient in patientsTrain.keys():
            
    if patientsTrain[patient].getSize() == read.getNumberOfSnps():
        
        count += 1
        
print("countTrain is ",count)
print("patientsTrain is ",len(patientsTrain.keys())) 
print("snps is Train",read.getNumberOfSnps())
print("shape id ",xTraining.shape)
print("xTraining = ",len(xTraining))
print("xTraining.T = ",len(xTraining.T))
print("yTraining = ",len(yTraining))

print()

count = 0

for patient in patientsTest.keys():
            
    if patientsTest[patient].getSize() == read.getNumberOfSnps():
        
        count += 1
        
print("count is Test",count)
print("patientsTest is ",len(patientsTrain.keys()))
print("snps is Test",read.getNumberOfSnps())
print("shape id Test",xTest.shape)
print("xtest = ",len(xTest))
print("xtest.T = ",len(xTest.T))
print("ytest = ",len(yTest))

print()
print("mergeXtable = ",len(mergeXtable))
print("mergeXtable.T = ",len(mergeXtable.T))
print("mergeYtable = ",len(mergeYtable))
print("shape id ",mergeXtable.shape)

print()


print("id to name patients",len(ids['patients']['idToName'].keys()))
print("name to id patients",len(ids['patients']['nameToId'].keys()))
print()
print("id to name snps",len(ids['snps']['idToName'].keys()))
print("name to id snps",len(ids['snps']['nameToId'].keys()))

print()
count0 = 0
count1 = 0
count2 = 0
count = 0

for i in range(len(xTraining)):
    for j in range(len(xTraining.T)):
        if xTraining[i,j] == 2:
            count2 += 1
                
        elif xTraining[i,j] == 1:
            count1 += 1
                
        elif xTraining[i,j] == 0:
            
            count0 +=1
        else:
            count += 1
            
print("xTraining")
print("count = ",count)
print("count0 = ",count0)
print("count1 = ",count1)
print("coun2 = ",count2)

print()

count0 = 0
count1 = 0
count2 = 0
count = 0

for i in range(len(xTest)):
    for j in range(len(xTest.T)):
        if xTest[i,j] == 2:
            count2 += 1
                
        elif xTest[i,j] == 1:
            count1 += 1
                
        elif xTest[i,j] == 0:
            
            count0 +=1
        else:
            count += 1
            
print("xTest")
print("count = ",count)
print("count0 = ",count0)
print("count1 = ",count1)
print("coun2 = ",count2)

count0 = 0
count1 = 0
count2 = 0
count = 0

for i in range(len(mergeXtable)):
    for j in range(len(mergeXtable.T)):
        if mergeXtable[i,j] == 2:
            count2 += 1
                
        elif mergeXtable[i,j] == 1:
            count1 += 1
                
        elif mergeXtable[i,j] == 0:
            
            count0 +=1
        else:
            count += 1
print()
            
print("mergeXtable")
print("count = ",count)
print("count0 = ",count0)
print("count1 = ",count1)
print("coun2 = ",count2)

countTrain is  4482
patientsTrain is  4482
snps is Train 42
shape id  (4482, 42)
xTraining =  4482
xTraining.T =  42
yTraining =  4482

count is Test 498
patientsTest is  4482
snps is Test 42
shape id Test (498, 42)
xtest =  498
xtest.T =  42
ytest =  498

mergeXtable =  4980
mergeXtable.T =  42
mergeYtable =  4980
shape id  (4980, 42)

id to name patients 4482
name to id patients 4482

id to name snps 42
name to id snps 42

xTraining
count =  0
count0 =  132786
count1 =  43762
coun2 =  11696

xTest
count =  0
count0 =  14685
count1 =  4782
coun2 =  1449

mergeXtable
count =  0
count0 =  147471
count1 =  48544
coun2 =  13145


# Linear Regression

In [19]:


regr = linear_model.LinearRegression()
regr.fit(xTraining, yTraining)
yPredict1 = regr.predict(xTest)



In [21]:
count = 0
for i in range(len(yPredict1)):
    count += yPredict1[i]

mo = count / len(yPredict1)

for i in range(len(yPredict1)):
    if yPredict1[i] < mo:
        yPredict1[i] = 0
    else:
        yPredict1[i] = 1

error1 = mean_squared_error(yTest, yPredict1)
print("error 1 = ",error1)
print(metrics.accuracy_score(yTest,yPredict1))
print(metrics.confusion_matrix(yTest,yPredict1))
RMSE1 = mean_squared_error(yTest,yPredict1)**0.5
print("RMSE1 = ",RMSE1)

print("cros validation = ",cross_val_score(regr, mergeXtable, mergeYtable, cv=5))

error 1 =  0.411646586345
0.588353413655
[[226 164]
 [ 41  67]]
RMSE1 =  0.641596903317
cros validation =  [ 0.06680242  0.03472705  0.08662203  0.08328843  0.059651  ]


# SVM

In [23]:

clf = SVC()
clf.fit(xTraining, yTraining)
yPredict2 = clf.predict(xTest)
print(metrics.accuracy_score(yTest,yPredict2))
print(metrics.confusion_matrix(yTest,yPredict2))
error2 = mean_squared_error(yTest, yPredict2)
print("error 2 = ",error2)
RMSE2 = mean_squared_error(yTest,yPredict2)**0.5
print("RMSE2 = ",RMSE2)

#print("cros validation = ",cross_val_score(clf, mergeXtable, mergeYtable, cv=5))

  y = column_or_1d(y, warn=True)


0.78313253012
[[390   0]
 [108   0]]
error 2 =  0.21686746988
RMSE2 =  0.465690315424


# RF

In [38]:

rfr = RandomForestRegressor(n_estimators = 500, random_state = 2016, verbose = 20)
rfr.fit(xTraining, yTraining)
yPredict3 = rfr.predict(xTest)

mo3 = 0
s3 = 0
for i in yPredict3:
    s3 += i
    
mo3 = s3 / len(yPredict3)

  app.launch_new_instance()


building tree 1 of 500
building tree 2 of 500
building tree 3 of 500
building tree 4 of 500
building tree 5 of 500
building tree 6 of 500
building tree 7 of 500
building tree 8 of 500
building tree 9 of 500
building tree 10 of 500
building tree 11 of 500
building tree 12 of 500
building tree 13 of 500
building tree 14 of 500
building tree 15 of 500
building tree 16 of 500


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  1

building tree 17 of 500
building tree 18 of 500
building tree 19 of 500
building tree 20 of 500
building tree 21 of 500
building tree 22 of 500
building tree 23 of 500
building tree 24 of 500
building tree 25 of 500
building tree 26 of 500
building tree 27 of 500
building tree 28 of 500
building tree 29 of 500
building tree 30 of 500
building tree 31 of 500
building tree 32 of 500
building tree 33 of 500
building tree 34 of 500
building tree 35 of 500
building tree 36 of 500
building tree 37 of 500
building tree 38 of 500
building tree 39 of 500
building tree 40 of 500
building tree 41 of 500
building tree 42 of 500
building tree 43 of 500
building tree 44 of 500
building tree 45 of 500
building tree 46 of 500
building tree 47 of 500
building tree 48 of 500
building tree 49 of 500
building tree 50 of 500
building tree 51 of 500
building tree 52 of 500
building tree 53 of 500
building tree 54 of 500
building tree 55 of 500
building tree 56 of 500
building tree 57 of 500
building tree 58

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    6.8s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapse

In [39]:
for i in range(len(yPredict3)):
    
    if yPredict3[i] < mo3:
        yPredict3[i]=0
    else:
        yPredict3[i]=1
        
print(metrics.accuracy_score(yTest,yPredict3))
print(metrics.confusion_matrix(yTest,yPredict3))
error3 = mean_squared_error(yTest, yPredict3)
print("error 3 = ",error3)
RMSE3 = mean_squared_error(yTest,yPredict3)**0.5
print("RMSE3 = ",RMSE3)

print("cros validation = ",cross_val_score(rfr, mergeXtable, mergeYtable, cv=5))

0.60843373494
[[245 145]
 [ 50  58]]
error 3 =  0.39156626506
RMSE3 =  0.625752558972


  estimator.fit(X_train, y_train, **fit_params)


building tree 1 of 500
building tree 2 of 500
building tree 3 of 500
building tree 4 of 500
building tree 5 of 500
building tree 6 of 500
building tree 7 of 500
building tree 8 of 500
building tree 9 of 500
building tree 10 of 500
building tree 11 of 500
building tree 12 of 500
building tree 13 of 500
building tree 14 of 500
building tree 15 of 500
building tree 16 of 500
building tree 17 of 500
building tree 18 of 500


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  1

building tree 19 of 500
building tree 20 of 500
building tree 21 of 500
building tree 22 of 500
building tree 23 of 500
building tree 24 of 500
building tree 25 of 500
building tree 26 of 500
building tree 27 of 500
building tree 28 of 500
building tree 29 of 500
building tree 30 of 500
building tree 31 of 500
building tree 32 of 500
building tree 33 of 500
building tree 34 of 500
building tree 35 of 500
building tree 36 of 500
building tree 37 of 500
building tree 38 of 500
building tree 39 of 500
building tree 40 of 500
building tree 41 of 500
building tree 42 of 500
building tree 43 of 500
building tree 44 of 500
building tree 45 of 500
building tree 46 of 500
building tree 47 of 500
building tree 48 of 500
building tree 49 of 500
building tree 50 of 500
building tree 51 of 500
building tree 52 of 500
building tree 53 of 500
building tree 54 of 500
building tree 55 of 500
building tree 56 of 500
building tree 57 of 500
building tree 58 of 500
building tree 59 of 500
building tree 60

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    6.2s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapse

building tree 1 of 500
building tree 2 of 500
building tree 3 of 500
building tree 4 of 500
building tree 5 of 500
building tree 6 of 500
building tree 7 of 500
building tree 8 of 500
building tree 9 of 500
building tree 10 of 500
building tree 11 of 500
building tree 12 of 500
building tree 13 of 500
building tree 14 of 500
building tree 15 of 500
building tree 16 of 500


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  1

building tree 17 of 500
building tree 18 of 500
building tree 19 of 500
building tree 20 of 500
building tree 21 of 500
building tree 22 of 500
building tree 23 of 500
building tree 24 of 500
building tree 25 of 500
building tree 26 of 500
building tree 27 of 500
building tree 28 of 500
building tree 29 of 500
building tree 30 of 500
building tree 31 of 500
building tree 32 of 500
building tree 33 of 500
building tree 34 of 500


[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:    0.1s remaining:    0.0s


building tree 35 of 500
building tree 36 of 500
building tree 37 of 500
building tree 38 of 500
building tree 39 of 500
building tree 40 of 500
building tree 41 of 500
building tree 42 of 500
building tree 43 of 500
building tree 44 of 500
building tree 45 of 500
building tree 46 of 500
building tree 47 of 500
building tree 48 of 500
building tree 49 of 500
building tree 50 of 500
building tree 51 of 500
building tree 52 of 500
building tree 53 of 500
building tree 54 of 500
building tree 55 of 500
building tree 56 of 500
building tree 57 of 500
building tree 58 of 500
building tree 59 of 500
building tree 60 of 500
building tree 61 of 500
building tree 62 of 500
building tree 63 of 500
building tree 64 of 500
building tree 65 of 500
building tree 66 of 500
building tree 67 of 500
building tree 68 of 500
building tree 69 of 500
building tree 70 of 500
building tree 71 of 500
building tree 72 of 500
building tree 73 of 500
building tree 74 of 500
building tree 75 of 500
building tree 76

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    6.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapse

building tree 1 of 500
building tree 2 of 500
building tree 3 of 500
building tree 4 of 500
building tree 5 of 500
building tree 6 of 500
building tree 7 of 500
building tree 8 of 500
building tree 9 of 500
building tree 10 of 500
building tree 11 of 500
building tree 12 of 500
building tree 13 of 500
building tree 14 of 500
building tree 15 of 500
building tree 16 of 500
building tree 17 of 500
building tree 18 of 500


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  1

building tree 19 of 500
building tree 20 of 500
building tree 21 of 500
building tree 22 of 500
building tree 23 of 500
building tree 24 of 500
building tree 25 of 500
building tree 26 of 500
building tree 27 of 500
building tree 28 of 500
building tree 29 of 500
building tree 30 of 500
building tree 31 of 500
building tree 32 of 500
building tree 33 of 500
building tree 34 of 500
building tree 35 of 500
building tree 36 of 500
building tree 37 of 500
building tree 38 of 500
building tree 39 of 500
building tree 40 of 500
building tree 41 of 500
building tree 42 of 500
building tree 43 of 500
building tree 44 of 500
building tree 45 of 500
building tree 46 of 500
building tree 47 of 500
building tree 48 of 500
building tree 49 of 500
building tree 50 of 500
building tree 51 of 500
building tree 52 of 500
building tree 53 of 500
building tree 54 of 500
building tree 55 of 500
building tree 56 of 500
building tree 57 of 500
building tree 58 of 500
building tree 59 of 500
building tree 60

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    5.9s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapse

building tree 1 of 500
building tree 2 of 500
building tree 3 of 500
building tree 4 of 500
building tree 5 of 500
building tree 6 of 500
building tree 7 of 500
building tree 8 of 500
building tree 9 of 500
building tree 10 of 500
building tree 11 of 500
building tree 12 of 500
building tree 13 of 500
building tree 14 of 500
building tree 15 of 500
building tree 16 of 500
building tree 17 of 500
building tree 18 of 500


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  1

building tree 19 of 500
building tree 20 of 500
building tree 21 of 500
building tree 22 of 500
building tree 23 of 500
building tree 24 of 500
building tree 25 of 500
building tree 26 of 500
building tree 27 of 500
building tree 28 of 500
building tree 29 of 500
building tree 30 of 500
building tree 31 of 500
building tree 32 of 500
building tree 33 of 500
building tree 34 of 500
building tree 35 of 500
building tree 36 of 500
building tree 37 of 500
building tree 38 of 500
building tree 39 of 500
building tree 40 of 500
building tree 41 of 500
building tree 42 of 500
building tree 43 of 500
building tree 44 of 500
building tree 45 of 500
building tree 46 of 500
building tree 47 of 500
building tree 48 of 500
building tree 49 of 500
building tree 50 of 500
building tree 51 of 500
building tree 52 of 500
building tree 53 of 500
building tree 54 of 500
building tree 55 of 500
building tree 56 of 500
building tree 57 of 500
building tree 58 of 500
building tree 59 of 500
building tree 60

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    5.6s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapse

building tree 1 of 500
building tree 2 of 500
building tree 3 of 500
building tree 4 of 500
building tree 5 of 500
building tree 6 of 500
building tree 7 of 500
building tree 8 of 500
building tree 9 of 500
building tree 10 of 500
building tree 11 of 500
building tree 12 of 500
building tree 13 of 500
building tree 14 of 500
building tree 15 of 500
building tree 16 of 500
building tree 17 of 500
building tree 18 of 500


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  1

building tree 19 of 500
building tree 20 of 500
building tree 21 of 500
building tree 22 of 500
building tree 23 of 500
building tree 24 of 500
building tree 25 of 500
building tree 26 of 500
building tree 27 of 500
building tree 28 of 500
building tree 29 of 500
building tree 30 of 500
building tree 31 of 500
building tree 32 of 500
building tree 33 of 500
building tree 34 of 500
building tree 35 of 500
building tree 36 of 500
building tree 37 of 500
building tree 38 of 500
building tree 39 of 500
building tree 40 of 500
building tree 41 of 500
building tree 42 of 500
building tree 43 of 500
building tree 44 of 500
building tree 45 of 500
building tree 46 of 500
building tree 47 of 500
building tree 48 of 500
building tree 49 of 500
building tree 50 of 500
building tree 51 of 500
building tree 52 of 500
building tree 53 of 500
building tree 54 of 500
building tree 55 of 500
building tree 56 of 500
building tree 57 of 500
building tree 58 of 500
building tree 59 of 500
building tree 60

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    5.8s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapse

# LINEAR LOGISTIC REGRESSION 

In [40]:


lr_clf = linear_model.LogisticRegression()
lr_clf.fit(xTraining, yTraining)
yPredict4 = lr_clf.predict(xTest)

print(metrics.accuracy_score(yTest,yPredict4))
print(metrics.confusion_matrix(yTest,yPredict4))
error4 = mean_squared_error(yTest, yPredict4)
print("error 4 = ",error4)
RMSE4 = mean_squared_error(yTest,yPredict4)**0.5
print("RMSE4 = ",RMSE4)
#print("cros validation = ",cross_val_score(lr_clf, mergeXtable, mergeYtable, cv=5))

0.791164658635
[[384   6]
 [ 98  10]]
error 4 =  0.208835341365
RMSE4 =  0.456985055954


  y = column_or_1d(y, warn=True)
