In [1]:
%pylab inline
import warnings
warnings.filterwarnings('ignore')

random_seed = 42 #set random state to this variable (when applicable) so results can be reproduced

clfList = []

Populating the interactive namespace from numpy and matplotlib


# Loading Dataset and splitting

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

from sklearn.metrics import accuracy_score

In [3]:
#Creating partial slices of the existing train set 
X_train2 = X_train[:249]
y_train2 = y_train[:249]

X_train3 = X_train[250:]
y_train3 = y_train[250:]

In [4]:
class Miner:
    def __init__(self, clf, X_train, y_train, uuid):
        self.clf = clf
        self.performance = 0
        self.correctPredictions = 0
        self.predictionsMade = 0
        self.currentPred = -1
        self.X_train = X_train.tolist()
        self.y_train = y_train.tolist()
        self.uuid = uuid
    def getPerformance(self):
        return float(self.correctPredictions / self.predictionsMade)
    def getClf(self):
        return self.clf
    def setCurrentPred(self, pred):
        self.currentPred = pred
    def predict(self, data):
        self.setCurrentPred(self.clf.predict(data))
        return self.currentPred
    def predict_proba(self, data):
        temp = self.clf.predict_proba(data)
        if temp[0][0] > temp[0][1]:
            self.setCurrentPred(0)
        else:
            self.setCurrentPred(1)
        return temp
    def updatePrediction(self, val):
        self.predictionsMade = self.predictionsMade + 1
        if self.currentPred == val:
            self.correctPredictions = self.correctPredictions + 1
    def updateDataset(self, X, y):
        self.X_train.append(X.tolist())
        self.y_train.append(y.tolist())
    def retrain(self):
        self.clf.fit(np.array(self.X_train), np.array(self.y_train))
    def getUUID(self):
        return self.uuid
    

## Decision Tree Classifier

In [5]:
from sklearn.tree import DecisionTreeClassifier
import uuid

dt = DecisionTreeClassifier(random_state=random_seed)
dt = dt.fit(X_train, y_train)

y_pred = dt.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(dt, X_train, y_train, str(uuid.uuid1())))

0.9298245614035088


In [6]:
dt = DecisionTreeClassifier(random_state=random_seed)
dt = dt.fit(X_train2, y_train2)

y_pred = dt.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(dt, X_train, y_train, str(uuid.uuid1())))

0.9122807017543859


In [7]:
dt = DecisionTreeClassifier(random_state=random_seed)
dt = dt.fit(X_train3, y_train3)

y_pred = dt.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(dt, X_train, y_train, str(uuid.uuid1())))

0.9181286549707602


## Logistic Regression Classifier

In [8]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(random_state=random_seed)
lr = lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(lr, X_train, y_train, str(uuid.uuid1())))

0.9649122807017544


In [9]:
lr = LogisticRegression(random_state=random_seed)
lr = lr.fit(X_train2, y_train2)

y_pred = lr.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(lr, X_train, y_train, str(uuid.uuid1())))

0.9590643274853801


In [10]:
lr = LogisticRegression(random_state=random_seed)
lr = lr.fit(X_train3, y_train3)

y_pred = lr.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(lr, X_train, y_train, str(uuid.uuid1())))

0.9590643274853801


## Random Forest

In [11]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=random_seed)
rf = rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(rf, X_train, y_train, str(uuid.uuid1())))

0.9590643274853801


In [12]:
rf = RandomForestClassifier(random_state=random_seed)
rf = rf.fit(X_train2, y_train2)

y_pred = rf.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(rf, X_train, y_train, str(uuid.uuid1())))

0.9298245614035088


In [13]:
rf = RandomForestClassifier(random_state=random_seed)
rf = rf.fit(X_train3, y_train3)

y_pred = rf.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(rf, X_train, y_train, str(uuid.uuid1())))

0.935672514619883


## Gaussian Naive Bayes

In [14]:
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb = nb.fit(X_train, y_train)

y_pred = nb.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(nb, X_train, y_train, str(uuid.uuid1())))

0.9239766081871345


In [15]:
nb = GaussianNB()
nb = nb.fit(X_train2, y_train2)

y_pred = nb.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(nb, X_train, y_train, str(uuid.uuid1())))

0.9298245614035088


In [16]:
nb = GaussianNB()
nb = nb.fit(X_train3, y_train3)

y_pred = nb.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(nb, X_train, y_train, str(uuid.uuid1())))

0.9122807017543859


## Support Vector Classifier

In [17]:
from sklearn.svm import SVC

svc = SVC(kernel='linear', probability=True, random_state=random_seed)
svc = svc.fit(X_train, y_train)

y_pred = svc.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(svc, X_train, y_train, str(uuid.uuid1())))

0.9590643274853801


In [18]:
svc = SVC(kernel='linear', probability=True, random_state=random_seed)
svc = svc.fit(X_train2, y_train2)

y_pred = svc.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(svc, X_train, y_train, str(uuid.uuid1())))

0.9239766081871345


In [19]:
svc = SVC(kernel='linear', probability=True, random_state=random_seed)
svc = svc.fit(X_train3, y_train3)

y_pred = svc.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(svc, X_train, y_train, str(uuid.uuid1())))

0.9707602339181286


## Nearest Neighbors Classifier

In [20]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn = knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(knn, X_train, y_train, str(uuid.uuid1())))

0.9473684210526315


In [21]:
knn = KNeighborsClassifier()
knn = knn.fit(X_train2, y_train2)

y_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(knn, X_train, y_train, str(uuid.uuid1())))

0.9590643274853801


In [22]:
knn = KNeighborsClassifier()
knn = knn.fit(X_train3, y_train3)

y_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_pred))

clfList.append(Miner(knn, X_train, y_train, str(uuid.uuid1())))

0.9473684210526315


In [23]:
import heapq, random

class Chain:
    def __init__(self):
        self.blockList = []
    def addToChain(self, block):
        self.blockList.append(block)
    def getBlockList(self):
        return self.blockList
    
class Block:
    def __init__(self):
        self.transactionList = []
    def addTransaction(self, transaction):
        self.transactionList.append(transaction)
    def getTransactionList(self):
        return self.transactionList
    
class Transaction:
    def __init__(self, data):
        self.data = data
        self.label = -1
        self.timeConfirmed = -1
        self.valID = -1
    def getData(self):
        return self.data
    def setLabel(self, label):
        self.label = label
    def getLabel(self):
        return self.label
    def confirm(self, time, valID):
        self.timeConfirmed = time
        self.valID = valID
    def getTimestamp(self):
        return self.timeConfirmed
    def getValID(self):
        return self.valID

def pickRandomValidator(minerList):
    validator = random.choice(minerList)
    minerList.remove(validator)
    return validator

def pickBestValidator(minerList):
    accList = []
    for clf in minerList:
        accList.append(clf.getPerformance())
    validator = minerList[accList.index(max(accList))]
    minerList.remove(validator)
    return validator

def pickFromTopValidator(minerList):
    topList = []
    for clf in minerList:
        if len(topList) < 5:
            topList.append(clf)
        else:
            for cand in topList:
                if clf.getPerformance() > cand.getPerformance():
                    topList.append(clf)
                    topList.remove(cand)
    validator = random.choice(topList)
    minerList.remove(validator)
    return validator


def validateData(minerList, validator, data):
    mProba = [0, 0]
    for clf in minerList:
        mProba = mProba + clf.predict_proba(data)
    mProba = mProba / len(minerList)

    if mProba[0][0] > mProba[0][1]:
        consensus = 0
    else:
        consensus = 1
        
    vPred = validator.predict(data)
    for clf in minerList:
        clf.updatePrediction(vPred)
        clf.updateDataset(data, vPred)
    if consensus == vPred:
        return consensus
    else:
        validator = None
        return -1

def retrain_models(minerList):
    for clf in minerList:
        clf.retrain()

In [24]:
tList = [] #list of unconfirmed transactions

for i in range(0, len(X_test)):  
    tList.append(Transaction(X_test[i].reshape(1,-1)))

## Sample Transactions using Random Validator Selection

In [25]:
from datetime import datetime

ml_chain = Chain()
tempBlock = Block()
rCtr = 0

for transaction in tList:
    
    validator = pickRandomValidator(clfList)
    consensusPred = validateData(clfList, validator, transaction.getData())
    
    if consensusPred > -1: #Successful validation
        transaction.setLabel(consensusPred)
        tempBlock.addTransaction(transaction)
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        transaction.confirm(current_time, validator.getUUID())
        rCtr = rCtr + 1

        #Fills a block with the last 10 transactions 
        if rCtr % 10 == 0:
            ml_chain.addToChain(tempBlock)
            print("\n")
            print("Added Block to Chain\n\nTransactions\n")
            print("#\tTimestamp\tValidator")
            tNum = rCtr - 10
            for t in tempBlock.getTransactionList():
                tNum = tNum + 1
                print(f"{tNum}\t{t.getTimestamp()}\t{t.getValID()}")
            tempBlock = Block()
        #Retrains the models every 100 instances added to the dataset
        if rCtr % 100 == 0:
            #retrain_models(clfList)
            print("\n*supposedly* retraining models\n")
    else:
        print("Confirmation Failed")
    clfList.append(validator)
print(f"\nSucceeded in confirming {rCtr} transactions! (Out of {len(X_test)})")



Added Block to Chain

Transactions

#	Timestamp	Validator
1	00:13:55	35ac32d2-1c31-11ea-9fc0-54bef70d5eb5
2	00:13:55	35ac32d2-1c31-11ea-9fc0-54bef70d5eb5
3	00:13:55	359a0cfa-1c31-11ea-a7cf-54bef70d5eb5
4	00:13:55	35921ee8-1c31-11ea-8e0f-54bef70d5eb5
5	00:13:55	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
6	00:13:55	35ac32d2-1c31-11ea-9fc0-54bef70d5eb5
7	00:13:55	35909878-1c31-11ea-822b-54bef70d5eb5
8	00:13:55	39fde9f4-1c31-11ea-836c-54bef70d5eb5
9	00:13:55	35ac32d2-1c31-11ea-9fc0-54bef70d5eb5
10	00:13:55	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
Confirmation Failed


Added Block to Chain

Transactions

#	Timestamp	Validator
11	00:13:55	35af6698-1c31-11ea-9d67-54bef70d5eb5
12	00:13:55	3ad6b222-1c31-11ea-92b1-54bef70d5eb5
13	00:13:55	3ad4417a-1c31-11ea-8bf7-54bef70d5eb5
14	00:13:55	35af6698-1c31-11ea-9d67-54bef70d5eb5
15	00:13:55	359a0cfa-1c31-11ea-a7cf-54bef70d5eb5
16	00:13:55	35aad362-1c31-11ea-ac1d-54bef70d5eb5
17	00:13:55	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
18	00:13:55	358f391a-1c31-11ea-8991-

Succeeded in confirming 159 transactions! (Out of 171)


## Sample Transactions using Best Validator

In [26]:
ml_chain = Chain()
tempBlock = Block()
validator2 = None
rCtr = 0

for transaction in tList:
    
    if validator2 is None:
        validator2 = pickBestValidator(clfList)
    consensusPred = validateData(clfList, validator2, transaction.getData())
    
    if consensusPred > -1: #Successful validation
        transaction.setLabel(consensusPred)
        tempBlock.addTransaction(transaction)
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        transaction.confirm(current_time, validator2.getUUID())
        rCtr = rCtr + 1

        #Fills a block with the last 10 transactions 
        if rCtr % 10 == 0:
            ml_chain.addToChain(tempBlock)
            print("\n")
            print("Added Block to Chain\n\nTransactions\n")
            print("#\tTimestamp\tValidator")
            tNum = rCtr - 10
            for t in tempBlock.getTransactionList():
                tNum = tNum + 1
                print(f"{tNum}\t{t.getTimestamp()}\t{t.getValID()}")
            tempBlock = Block()
        #Retrains the models every 100 instances added to the dataset
        if rCtr % 100 == 0:
            #retrain_models(clfList)
            print("\n*supposedly* retraining models\n")
    else:
        print("Confirmation Failed")
        clfList.append(validator2)
        validator2 = None
print(f"\nSucceeded in confirming {rCtr} transactions! (Out of {len(X_test)})")



Added Block to Chain

Transactions

#	Timestamp	Validator
1	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
2	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
3	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
4	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
5	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
6	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
7	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
8	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
9	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
10	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5


Added Block to Chain

Transactions

#	Timestamp	Validator
11	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
12	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
13	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
14	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
15	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
16	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
17	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
18	00:13:56	35a889d0-1c31-11ea-8b0c-54bef70d5eb5
19	00:1



Added Block to Chain

Transactions

#	Timestamp	Validator
151	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
152	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
153	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
154	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
155	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
156	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
157	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
158	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
159	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5
160	00:13:57	35ade03a-1c31-11ea-8b5f-54bef70d5eb5

Succeeded in confirming 165 transactions! (Out of 171)


## Sample Transactions using Randomly Selected Top Validator

In [27]:
ml_chain = Chain()
tempBlock = Block()
validator3 = None
rCtr = 0

for transaction in tList:
    
    if validator3 is None:
        validator3 = pickFromTopValidator(clfList)
    consensusPred = validateData(clfList, validator3, transaction.getData())
    
    if consensusPred > -1: #Successful validation
        transaction.setLabel(consensusPred)
        tempBlock.addTransaction(transaction)
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        transaction.confirm(current_time, validator3.getUUID())
        rCtr = rCtr + 1

        #Fills a block with the last 10 transactions 
        if rCtr % 10 == 0:
            ml_chain.addToChain(tempBlock)
            print("\n")
            print("Added Block to Chain\n\nTransactions\n")
            print("#\tTimestamp\tValidator")
            tNum = rCtr - 10
            for t in tempBlock.getTransactionList():
                tNum = tNum + 1
                print(f"{tNum}\t{t.getTimestamp()}\t{t.getValID()}")
            tempBlock = Block()
        #Retrains the models every 100 instances added to the dataset
        if rCtr % 100 == 0:
            #retrain_models(clfList)
            print("\n*supposedly* retraining models\n")
    else:
        print("Confirmation Failed")
        clfList.append(validator3)
        validator3 = None
print(f"\nSucceeded in confirming {rCtr} transactions! (Out of {len(X_test)})")



Added Block to Chain

Transactions

#	Timestamp	Validator
1	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
2	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
3	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
4	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
5	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
6	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
7	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
8	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
9	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
10	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
Confirmation Failed


Added Block to Chain

Transactions

#	Timestamp	Validator
11	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
12	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
13	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
14	00:13:57	35af6698-1c31-11ea-9d67-54bef70d5eb5
15	00:13:57	35ac32d2-1c31-11ea-9fc0-54bef70d5eb5
16	00:13:57	35ac32d2-1c31-11ea-9fc0-54bef70d5eb5
17	00:13:57	35ac32d2-1c31-11ea-9fc0-54bef70d5eb5
18	00:13:57	35ac32d2-1c31-11ea-9fc0-

## RANDOM VALIDATOR PER ROUND (deprecated)

test = Chain() #Instantiate a test blockchain

failcount = 0

#Rerun this cell to simulate using random data points from the test set
for i in range(0,10000):
    randomData = random.choice(X_test)
    validator = pickRandomValidator(clfList)
    consensusPred = validateData(clfList, validator, randomData.reshape(1,-1))
    if consensusPred > -1:
        temp = Block(randomData, consensusPred)
        test.addToChain(temp)
    else:
        failcount = failcount + 1
    clfList.append(validator)

print("Validation Failed " + str(failcount) + " times.")
print("Final block height: " + str(len(test.getBlockList())))

## PICK BEST PERFORMING VALIDATOR 

#### (Assumes that the models already have a prediction history)

test2 = Chain() #Instantiate a test blockchain
validator2 = None

failcount2 = 0

#Rerun this cell to simulate using random data points from the test set
for i in range(0,10000):
    randomData = random.choice(X_test)
    if validator2 is None:
        validator2 = pickBestValidator(clfList)
    consensusPred = validateData(clfList, validator2, randomData.reshape(1,-1))
    if consensusPred > -1:
        temp = Block(randomData, consensusPred)
        test2.addToChain(temp)
    else:
        clfList.append(validator2)
        validator2 = None
        failcount2 = failcount2 + 1
        
print("Validation Failed " + str(failcount2) + " times.")
print("Final block height: " + str(len(test2.getBlockList())))

## PICK RANDOM VALIDATOR FROM TOP 5 PERFORMING MODELS

test3 = Chain() #Instantiate a test blockchain
validator3 = None

failcount3 = 0

#Rerun this cell to simulate using random data points from the test set
for i in range(0,10000):
    randomData = random.choice(X_test)
    if validator3 is None:
        validator3 = pickFromTopValidator(clfList)
    consensusPred = validateData(clfList, validator3, randomData.reshape(1,-1))
    if consensusPred > -1:
        temp = Block(randomData, consensusPred)
        test3.addToChain(temp)
    else:
        clfList.append(validator2)
        validator3 = None
        failcount3 = failcount3 + 1
        
print("Validation Failed " + str(failcount3) + " times.")
print("Final block height: " + str(len(test3.getBlockList())))