In [1]:
from sklearn.svm import LinearSVC
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
import numpy as np
from sklearn.datasets import load_digits
from collections import defaultdict
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
# Load iris data-set
iris=load_iris()
X=iris.data
y=iris.target
X_Final = []
y_Final = []
# Filter the data with 2 labels
for idx_of_label, val_of_label in enumerate(y):
    if(val_of_label != 2):
        X_Final.append(X[idx_of_label])
        y_Final.append(val_of_label) 
X_Final = np.array(X_Final)
y_Final = np.array(y_Final)

In [8]:
# Process each feature using the perceptron model
# Input: feature data, labels
# Return: Predicted value by the model

def ProcessFeatureByPerceptronModel (samples, labels):
    clf = Perceptron(random_state=0)
    clf.fit(samples,labels)
    cv = CountVectorizer(max_df=0.95, min_df=2,
                                     max_features=10000,
                                     stop_words='english')
    print(clf.score(samples, labels))    
    return clf.predict(samples)

In [9]:
# Get the feature data from a data-set for a provided feature index
# Input: feature data, feature index for which data needs to be extracted
# Return: Data in the given feature

def GetFeatureData(samples,featureIndex):
    feature_data = []
    for idx, data in enumerate(samples):  
        array = []
        array.append(samples[idx][featureIndex])
        feature_data.append(array)
    return feature_data

In [10]:
# Split master data by predicted labels by the model
# Input: predicted labels
# Return: Dictionary with the left and right sub-sets (split the master data)

def SplitMasterDataByLabels(labels):
    uniqueLabels = set(labels)
    dictionaryLabels = defaultdict(list)
    for unique in uniqueLabels:
        indices = [i for i, x in enumerate(labels) if x == unique]
        dictionaryLabels[unique].append(indices)
    splittedData = defaultdict()
    splittedData["leftExamples"] = FilterDataByIndices(X_Final,list(dictionaryLabels.values())[0])
    splittedData["leftLabels"] = FilterDataByIndices(y_Final,list(dictionaryLabels.values())[0])
    splittedData["rightExamples"] = FilterDataByIndices(X_Final,list(dictionaryLabels.values())[1])
    splittedData["rightLabels"] = FilterDataByIndices(y_Final,list(dictionaryLabels.values())[1])      
    return splittedData

In [11]:
# Get instances for the list of row indexes
# Input: sample data, list of row index
# Return: data instances at the provided index

def FilterDataByIndices(data, indexList):
    for index in indexList:
            tempDataList = data[index]           
    return tempDataList    

In [12]:
# Main method:  All the processing happens here
predictedLabelsByPerceptron = ProcessFeatureByPerceptronModel(X_Final, y_Final)
labelIndicesOfPredictedData = SplitMasterDataByLabels(predictedLabelsByPerceptron)
print(labelIndicesOfPredictedData["leftExamples"])
print(labelIndicesOfPredictedData["leftLabels"])
predictedLabelsByPerceptronForLeftSplit = ProcessFeatureByPerceptronModel(labelIndicesOfPredictedData["leftExamples"], 
                                                                          labelIndicesOfPredictedData["leftLabels"])
print(predictedLabelsByPerceptronForLeftSplit)



1.0
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.1 1.5 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

ValueError: The number of class labels must be greater than one.