# SVM over trees:

## Importing panda and numpy:

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(20)


## Importing the dataset and converting as panda framework:

In [2]:
def import_data (filename):
    """
    This function, imports the train/test data and create the attribute matrix and labels using the input data
    """
    Matrix = []
    Label = []
    with open(filename) as f:

        for line in f:
            sample = line.split()
            Label.append(float(sample[0]))
            sample.pop(0)
            row = []
            for s in sample:
                feature, value = s.split(':')
                z = len(row)
                nz = int(feature) - (z+1)
                for i in range (nz):
                    row.append(0)
                row.append(float(value))
            Matrix.append(row)
    data =[]
    M = max(len(row) for row in Matrix)
    #print("M:",M)
    for row in Matrix:
        nz = M - (len(row))
        for i in range (nz):
            row.append(0)
        data.append(row)
    Label1 = np.array(Label)
    data1= np.array(data)
    #print("aaa:",Label1, data1.shape)
    S1 = np.concatenate((data1, Label1[:,None]),axis=1)
    attributes = np.arange(1, np.size(data1,1)+2)
    #print(attributes)
    samples = range(0,np.size(data1,0))
    data2 = pd.DataFrame(S1, columns=attributes, index=samples)
    #print('label',data2[6])

    return data2
    #print("data1:",data1.shape)

In [3]:
def accuracy (D, w, b, loss_metric = "SVM", C = 1, sig2=0):
    if loss_metric == "Logestic regression":
        C = 1
    
    #print(sig2, C)
    
    """
        This function returns the accuracy of the dataset based on set D and weight and bias. 
    """
    if type(D) != np.ndarray:
        D = D.to_numpy()
    n_correct_prediction = 0
    n_samples = np.size(D,0)
    label_ix = np.size(D,1)
    if loss_metric == "SVM":
        loss = .5 *(np.dot(w,w) + b*b)
    elif loss_metric == "Logestic regression":
        loss = (np.dot(w,w) + b*b)/sig2
        #print("loss",loss)
    for i in range(n_samples):
        sample = D[i,:]
        true_label = sample[-1]
        xi = sample[:-1]
        dot_product = np.dot(xi,w) + b
        predicted_label = np.sign (dot_product)
        if loss_metric == "SVM":
            loss += np.max([0, 1.0 - true_label * dot_product])
        elif loss_metric == "Logestic regression":
            loss += np.log (1 + np.exp(-true_label * dot_product))
        if predicted_label == true_label:
            n_correct_prediction += 1
    acc = n_correct_prediction/n_samples * 100
    loss = C*loss
    return acc, loss
def prediction (D, w, b):
    """
        This function returns the prediction of the dataset based on set D and weight and bias. 
    """
    if type(D) != np.ndarray:
        D = D.to_numpy()
    n_samples = np.size(D,0)
    label_ix = np.size(D,1)
    pred = []
    for i in range(n_samples):
        sample = D[i,:]
        xi = sample[:-1]
        predicted_label = np.sign (np.dot(xi,w) + b)
        #print(predicted_label[0])
        if predicted_label == -1.0:
            predicted_label = [0.0]
        pred.append([i, predicted_label[0]])
        
    Pred = pd.DataFrame(pred, columns=['example_id', 'label'])

    return Pred
        

### Fold database creation:

In [4]:
def update_label(D):
    x,y = D.shape
    for i in range(x):
        if D[y][i] ==0.0:
            D[y][i] = -1.0
    return (D)
def k_fold(D,k):
    cols = D.columns
    D = D.to_numpy()
    r_n, _ = D.shape
    k_n = (r_n//5)
    lb = (k-1)*k_n
    if k == 5:
        ub = r_n
    else:
        ub = k*k_n-1
    
    fk = D [lb:ub, :] 
    
    Fk = pd.DataFrame(fk, columns=cols)
    return Fk

def import_label (D, new_feature):
    D = D.to_numpy()
    D = D.copy()
    new_feature = new_feature.to_numpy()
    labels = D[:, -1]
    labels = labels[:,None]

    D_out = np.append(new_feature, labels, axis=1)
    
    attributes = np.arange(1, np.size(D_out,1)+1)
    D_out = pd.DataFrame(D_out, columns=attributes)
    
    return D_out

### Importing the glove datasets:

In [5]:
Train_data1 = import_data('glove.train.libsvm')
Train_data_glove = update_label(Train_data1)
Test_data1 = import_data('glove.test.libsvm')
Test_data_glove = update_label(Test_data1)
Eval_data_glove = import_data('glove.eval.anon.libsvm')

### Importing the miscellaneous datasets:

In [6]:
misc_train = pd.read_csv ('misc-attributes-train.csv')
train_samples, _ = misc_train.shape
misc_test = pd.read_csv ('misc-attributes-test.csv')
test_samples, _ = misc_test.shape
misc_eval = pd.read_csv ('misc-attributes-eval.csv')
eval_samples, _ = misc_eval.shape

## In order to convert the database to one hot encoding, all the dataset are concatenated and converted to correlate the cominations.

In [7]:
database = pd.concat([misc_train, misc_test, misc_eval], axis=0)
database[database.isnull().any(axis=1)]
# Converting "NaN" to no_gender in victom_genders category:
database = database.fillna({"victim_genders": "no_gender"})
database.head()

# convert all string data in defendant such as not known ,... to Nan and then substitute nan with 0;
database['defendant_age'] = pd.to_numeric(database.defendant_age, errors='coerce')
database = database.fillna({"defendant_age": 0})
database

Unnamed: 0,defendant_age,defendant_gender,num_victims,victim_genders,offence_category,offence_subcategory
0,62.0,female,1,male,theft,theftFromPlace
1,17.0,male,1,male,theft,pocketpicking
2,0.0,male,1,male,theft,pocketpicking
3,0.0,male,1,male,theft,simpleLarceny
4,52.0,male,1,female,theft,pocketpicking
...,...,...,...,...,...,...
5245,0.0,male,1,male,theft,theftFromPlace
5246,0.0,male,0,no_gender,sexual,sodomy
5247,0.0,male,1,male,theft,stealingFromMaster
5248,26.0,male,1,male,theft,burglary


In [8]:
# Now that all the data are free of Nan we can convert them to one-hot encoding.
misc_transfered = pd.concat([database.defendant_age, database.num_victims, pd.get_dummies(database.defendant_gender), pd.get_dummies(database.victim_genders), pd.get_dummies(database.offence_category), pd.get_dummies(database.offence_subcategory)], axis=1)
# for dicision tree i convert all of the featres to one-hot encoding
misc_transfered_all_bin = pd.concat([pd.get_dummies(database.defendant_age), pd.get_dummies(database.num_victims), pd.get_dummies(database.defendant_gender), pd.get_dummies(database.victim_genders), pd.get_dummies(database.offence_category), pd.get_dummies(database.offence_subcategory)], axis=1)

In [9]:
Train_misc_transfered = misc_transfered_all_bin.iloc[:train_samples,:] 
Test_misc_transfered = misc_transfered_all_bin.iloc[train_samples:train_samples+test_samples,:]
Eval_misc_transfered = misc_transfered_all_bin.iloc[train_samples+test_samples:,:]

In [10]:
Train_misc = import_label(Train_data_glove, Train_misc_transfered)
Test_misc = import_label(Test_data_glove, Test_misc_transfered)
Eval_misc = import_label(Eval_data_glove, Eval_misc_transfered)
print(Train_misc.shape)

(17500, 227)


In [13]:
Train_misc.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,218,219,220,221,222,223,224,225,226,227
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0


## 3. SVM over trees:

### SVM:

In [11]:

def SVM (D, max_epoch, learning_rate, Cost, threshold=0.0005):
    c = Cost
    if type(D) != np.ndarray:
        D = D.to_numpy()
    lr0 = learning_rate
    #print('training size:')
     
    w_size = np.size(D,1)-1
    w = -.01 + 0.02 * np.random.rand(w_size)
    #w = -.01 * np.ones(w_size)
    
    b = -.01 + 0.02 * np.random.rand(1)
    #b = -0.01
    update = 0
    ep_w = []
    ep_b = []
    ep_update = []
    train_loss = []
    losses = []
    j = 0
    for epoch in range(1, max_epoch+1):
        #1.shuffle the data
        lr = lr0/(1+epoch)
        np.random.shuffle(D)
        #2.Update weights:
        for i in range (np.size(D,0)):
            xi = D[i,:-1]
            yi = D[i,-1]
            if yi * (np.dot(xi, w) + b) <= 1:
                update += 1
                w = (1-lr)*w + lr * c * yi * xi
                b = (1-lr)*b + lr * c * yi
            else:
                w = (1-lr)*w
                b = (1-lr)*b 
        
        #print("w0:", w[0])
        w1 = w
        b1 = b
        update1 = update
        _, loss = accuracy (D, w,b, "SVM", c)
        loss = loss[0]
        if epoch == 1:
            loss1 = loss
        train_loss.append(loss/loss1)
        losses.append(loss)
        
        # Stopping criteria:
        if epoch> 5:
            #print(epoch)
            #print(train_loss)
            if (abs(train_loss[epoch-1] - train_loss[epoch-2])) < threshold and (abs(train_loss[epoch-2] - train_loss[epoch-3])) < threshold and (abs(train_loss[epoch-3] - train_loss[epoch-4])) < threshold and (abs(train_loss[epoch-4] - train_loss[epoch-5])) < threshold:
                j = 1
                #print(j) 
                #print(i)
                break
        #print(b)
        #print(b1)
        ep_w.append(w1.copy())
        ep_b.append(b1.copy())
        ep_update.append(update1)
        #print('ep_b:',ep_b)
    #print('update:', update)
    ep_w = np.array(ep_w)
    ep_b = np.array(ep_b)
    ep_update = np.array(ep_update)
    return ep_w, ep_b, losses

### Full tree:

In [12]:
class DecisionTree:
    """
    This class contains all the functions needed for creating decision tree.
    It gets the dataset and measure index which can be "Entropy" or "Gini" and trains the decision tree.
    """
    def __init__(self, S,  Index):
        self.label_ix = np.size(S,1)
        #print('label_ix',np.size(S,1) )
        self.attributes_ix = np.arange(1,np.size(S,1))
        self.sample_size = np.size(S,0)
        self.measure = Index
        self.S = S
    
    def entropy(self, S):
        """
        Returns the Entropy measure of label array
        """
        tot_samples = np.size(S,0)
        
        if tot_samples == 0:
            return 0
        else:
            p_plus = np.size(S[S[:,-1]==+1],0) / tot_samples
            p_min = 1 - p_plus
            if p_plus == 0:
                return 0
            elif p_min == 0:
                return 0
            else:
                return - p_plus * np.log2(p_plus) - p_min * np.log2(p_min)
    
    def gini (self, S):
        """
        Returns the Gini measure of label array
        """
        tot_samples = np.size(S,0)
        if tot_samples == 0:
            return 0
        else:
            p_plus = np.size(S[S[:,-1]==+1],0) / tot_samples
            p_min = 1 - p_plus
            return p_plus * (1- p_plus) + p_min * (1- p_min)
    
    
    def info_gain (self, S, A):
        """
        This function is going to calculate the information Gain for each set and attribute 
        using Entropy or Gini measure functions
        """
        nS = np.size(S,0)
        #S_0 = pd.DataFrame() ;
        #S_1 = pd.DataFrame() ;
        S_0 = S [S[:,A]==0]
        #S_0.reset_index(inplace = True, drop = True)
        nS_0 = np.size(S_0,0)
        S_1 = S [S[:,A]==1]
        #S_1.reset_index(inplace = True, drop = True)
        nS_1 = np.size(S_1,0)

        if (self.measure == 'Entropy'):
            gain = self.entropy(S) - (nS_0/nS) * self.entropy(S_0)  - (nS_1/nS) * self.entropy(S_1)
        else:
            if (self.measure == 'Gini'):
                gain = self.gini(S) - (nS_0/nS) * self.gini(S_0) - (nS_1/nS) * self.gini(S_1)
        return gain
       
    
    def common_label(self,S):
        """
        Create common label for set S:
        """
        
        label = np.unique(S[:,-1])[np.argmax(np.unique(S[:,-1],return_counts=True)[1])]
    
        return label
    
        
    def ID3_depthlim(self, S, attributes,max_depth, parent_common_label = None, depth = 0):
        if depth == max_depth:
            return self.common_label(S), depth
        
        if len(np.unique(S[:,-1])) == 1:
            return np.unique(S[:,-1])[0], depth
        
        if S.size == 0:
            return parent_node_class, depth
        else:
            common_label = self.common_label(S)

            gain_vals = [self.info_gain(S, a) for a in attributes]
            best_attribute = attributes[np.argmax(gain_vals)]

            tree = {best_attribute:{}}

            attributes = [i for i in attributes if i != best_attribute]
            
            d =[]
            #print((S[:,best_attribute]))

            for v in np.unique(S[:,best_attribute]):
                v = v
                #print(v)
                S_v = S[S[:,best_attribute] == v]
                #print(S_v)
                
                #print(best_attribute)

                sub_tree, dd = self.ID3_depthlim(S_v,attributes, max_depth, common_label, depth+1)
                d.append (dd)

                tree[best_attribute][v] = sub_tree
            #print(d)   
            depth = np.max(d)
            return tree, depth
    
    
    def predict(self, sample, attributes, tree):
        """
        This function returns the predicted label of a sample based on the trained tree
        """
        if tree == 1.0 or tree == -1.0:
            #print('tree',tree)
            return tree
        else:   
            for attribute in attributes:
                if [attribute] == list(tree.keys()):
                    at_val = sample[attribute]
                    #print( [attribute])
                    label = self.predict (sample, attributes, tree [attribute][at_val])
                    return label
                
    def accuracy (self, S, tree):
        """
        This function returns the accuracy of the trained tree based on set S. 
        """
        n_samples = np.size(S,0)
        #print(n_samples)
        label_ix = np.size(S,1)-1
        attributes_ix = np.arange(0,np.size(S,1)-1)
        #print(attributes_ix)
        n_correct_prediction = 0
        for i in range(n_samples):
            sample = S[i,:]
            #print(sample)
            true_label = sample[label_ix]
            #print(true_label)
            predicted_label = self.predict(sample, attributes_ix, tree)
            #print (predicted_label)
            if predicted_label == true_label:
                n_correct_prediction += 1
        acc = n_correct_prediction/n_samples * 100
        return acc

### Importing the folded datasets:

In [14]:
# Trees construction:
def tree_const (S, tree_n, max_depth):
    if type(S) != np.ndarray:
        S = S.to_numpy()
    trees = []
    for i in range(tree_n):
        np.random.shuffle(S)
        S1 = S
        n,_ = S1.shape
        S2 = S1[1:n//10,:]
        #print(S2)
        Training_class = DecisionTree(S2, 'Gini')
        attributes_ix = np.arange(0,np.size(S2,1)-1)
        tree_i, _ = Training_class.ID3_depthlim(S2, attributes_ix, max_depth)
        trees.append (tree_i)
    return(trees)

In [15]:
def dataset_transform (S, trees):
    if type(S) != np.ndarray:
        S = S.to_numpy()
    attributes_ix = np.arange(0,np.size(S,1)-1)
    TC = DecisionTree(S, 'Gini')
    phi = np.zeros((np.size(S,0), len(trees)+1))
    i = 0
    for sample in S:
        j = 0
        for tree in trees:
            phi[i,j] = TC.predict(sample, attributes_ix, tree)
            j += 1
        i+= 1
    phi[:,-1] = S[:, -1]
    return phi

In [16]:
def cross_val_svm_tree(f1, f2, f3, f4, f5, max_epoch, learning_rate, max_depth, C, max_trees):
    """
    The function calculates the mean accuracy and std based on the 5-fold cross validation
    """

    #train_data = pd.DataFrame(columns = f1.columns)
    dataset = []
    acc = []
    loss = []
    learning_strategy='SVM'
    for i in range (1,6):
        valid_data = eval("f"+str(i))
        train_name =[]
        val_name = ["f"+str(i)]
        #print(i,val_name)
        #print(valid_data)
        for j in range(1,6):
            if j != i:
                #print(j)
                train_name.append ("f"+str(j))
                dataset.append(eval("f"+str(j)))
        train_data = np.concatenate(dataset)
        dataset = []
        #print(train_data)
        trees = tree_const (train_data, max_trees, max_depth)
        train_transfered = dataset_transform (train_data, trees)
        valid_transfered = dataset_transform (valid_data, trees)
        w, b, _ = SVM (train_transfered, max_epoch, learning_rate, C)
        w = w[-1]
        #print(w)
        b = b [-1]
        
        #print(train_name)
        ac, lo = accuracy (valid_transfered, w, b, learning_strategy, C, C)
        acc.append (ac)
        loss.append (lo)
    #print("accuracy:", acc)
    Mean_acc = np.mean(acc)
    Mean_loss = np.mean(loss)
    
    return Mean_acc, Mean_loss

In [17]:
def cross_val_svm_tree1(f1, f2, f3, f4, f5, max_epoch, learning_rate, C):
    """
    The function calculates the mean accuracy and std based on the 5-fold cross validation
    """
    #train_data = pd.DataFrame(columns = f1.columns)
    dataset = []
    acc = []
    loss = []
    learning_strategy='SVM'
    for i in range (1,6):
        valid_data = eval("f"+str(i))
        train_name =[]
        val_name = ["f"+str(i)]
        #print(i,val_name)
        #print(valid_data)
        for j in range(1,6):
            if j != i:
                #print(j)
                train_name.append ("f"+str(j))
                dataset.append(eval("f"+str(j)))
        train_data = np.concatenate(dataset)
        dataset = []
        #print(train_data)
        #trees = tree_const (train_data, max_trees, max_depth)
        #train_transfered = dataset_transform (train_data, trees)
        #valid_transfered = dataset_transform (valid_data, trees)
        w, b, _ = SVM (train_data, max_epoch, learning_rate, C)
        w = w[-1]
        #print(w)
        b = b [-1]
        
        #print(train_name)
        ac, lo = accuracy (valid_data, w, b, learning_strategy, C, C)
        acc.append (ac)
        loss.append (lo)
    #print("accuracy:", acc)
    Mean_acc = np.mean(acc)
    Mean_loss = np.mean(loss)
    
    return Mean_acc, Mean_loss

In [18]:
# Evaluating the network accuracy based on different values for learning rates and loss tradeoff:
"""
"""
Learning_rates = [10**(-1), 10**(-2), 10**(-3), 10**(-4)]
cost = [10**3, 10**(2), 10**(1), 10**(0)]
max_depth = [1,2, 4, 8] 
max_epoch = 10
acc_mean = []
acc_std = []
result = []
max_trees = 100
combination = 0
for depth in max_depth:
    # Constracting the trees using the training dataset:
    trees = tree_const (Train_misc, max_trees, depth)
    train_transfered = dataset_transform (Train_misc, trees)
    print("Trees are done...")
    # Creating folds for cross validation
    Data1 = Train_misc
    cols = Data1.columns
    Data1 = Data1.to_numpy()
    np.random.shuffle(Data1)
    Data1 = pd.DataFrame(Data1, columns=cols)
    f1 = k_fold(Data1,1)
    f2 = k_fold(Data1,2)
    f3 = k_fold(Data1,3)
    f4 = k_fold(Data1,4)
    f5 = k_fold(Data1,5)
    f1 = f1.to_numpy()
    f2 = f2.to_numpy()
    f3 = f3.to_numpy()
    f4 = f4.to_numpy()
    f5 = f5.to_numpy() 
    for lr in Learning_rates:
        for c in cost:
            mean_acc, mean_loss = cross_val_svm_tree1(f1, f2, f3, f4, f5, max_epoch, lr, c)
            acc_mean.append(mean_acc)
            result.append([lr, c, depth, mean_acc, mean_loss])
            combination += 1
            print("Combination:", combination, "lr:", lr, "c:", c, "depth:", depth, "mean_acc:", mean_acc, "mean_loss:", mean_loss)

result = np.array(result)
Best_lr = result[np.argmax(result[:,3]), 0]
Best_c = result[np.argmax(result[:,3]), 1]
Best_depth = result[np.argmax(result[:,3]), 2]
best_acc = result[np.argmax(result[:,3]), 3]

print('Cross validation results for different Learning rates, loss tradeoff and depth:')
result = pd.DataFrame(result, columns=['Learning rate', 'Loss tradeoff', 'depth', 'accuracy mean', 'loss mean'])

pd.set_option('display.max_rows', None)
print(result.to_string(index = False))
print('Best learning rate:', Best_lr)
print('Best Cost:', Best_c)
print('Best depth:', Best_depth)

report1 = [{'Best learning rate':Best_lr, 'Best Loss tradeoff':Best_c, 'Best depth':Best_depth, 'Best accuracy':best_acc}]
report1 = pd.DataFrame.from_records(report1)
print(report1.to_string(index = False))


Trees are done...
Combination: 1 lr: 0.1 c: 1000 depth: 1 mean_acc: 61.08173110684686 mean_loss: 39432398.869728595
Combination: 2 lr: 0.1 c: 100 depth: 1 mean_acc: 69.33624627444576 mean_loss: 413475.17481399234
Combination: 3 lr: 0.1 c: 10 depth: 1 mean_acc: 75.74857632793042 mean_loss: 21455.683167976807
Combination: 4 lr: 0.1 c: 1 depth: 1 mean_acc: 69.3471881762136 mean_loss: 3188.5303234837456
Combination: 5 lr: 0.01 c: 1000 depth: 1 mean_acc: 70.98206018045974 mean_loss: 4708972.933477053
Combination: 6 lr: 0.01 c: 100 depth: 1 mean_acc: 77.60056505940473 mean_loss: 204326.0552023374
Combination: 7 lr: 0.01 c: 10 depth: 1 mean_acc: 78.09209978361164 mean_loss: 17728.513773482424
Combination: 8 lr: 0.01 c: 1 depth: 1 mean_acc: 71.85161474707058 mean_loss: 3188.4582791974562
Combination: 9 lr: 0.001 c: 1000 depth: 1 mean_acc: 78.43502715061446 mean_loss: 1817723.317455511
Combination: 10 lr: 0.001 c: 100 depth: 1 mean_acc: 78.09209978361164 mean_loss: 159034.3018968655
Combination

In [19]:
result = np.array(result)
Best_lr = result[np.argmax(result[:,3]), 0]
Best_c = result[np.argmax(result[:,3]), 1]
Best_depth = result[np.argmax(result[:,3]), 2]
best_acc = result[np.argmax(result[:,3]), 3]

report1 = [{'Best learning rate':Best_lr, 'Best Loss tradeoff':Best_c, 'Best depth':Best_depth, 'Best accuracy':best_acc}]
report1 = pd.DataFrame.from_records(report1)
print(report1.to_string(index = False))

 Best learning rate  Best Loss tradeoff  Best depth  Best accuracy
             0.0001              1000.0         4.0      78.749429


In [20]:
max_epoch = 500
max_trees=200
trees = tree_const (Train_misc, max_trees, Best_depth)
train_transfered = dataset_transform (Train_misc, trees)
test_transfered = dataset_transform (Test_misc, trees)


In [21]:
w, b, loss = SVM (train_transfered, max_epoch, Best_lr, Best_c, threshold = 0.001)
#print(b)
train_acc = []
train_acc1 =[]
train_loss = []
acc = [0,0,0]
j = 0
for i in range (len(b)):
    #print(i)
    #print(w[i][0])
    acc_, loss = accuracy (train_transfered, w[i][:],b[i], "SVM", Best_c)
    loss = loss[0]
    train_acc.append (acc_)
    acc[0] = i
    acc[1] = acc_
    acc[2] = loss
    train_acc1.append(acc.copy())

    
#print(train_acc)

train_acc = np.array(train_acc)

best_epoch = np.argmax(train_acc) + 1


test_acc, test_loss =  accuracy (test_transfered, w[best_epoch-1][:],b[best_epoch-1])

In [23]:
report1 = [{'Best learning rate':Best_lr, 'Best loss tradeoff':Best_c,'Best depth':Best_depth, 'Best cross val. acc. (%)':best_acc, 
            'Best epoch':best_epoch,
            'Train accuracy (%)':train_acc1[best_epoch-1][1], 
            'Test accuracy (%)':test_acc}]

report1 = pd.DataFrame.from_records(report1)
print(report1.to_string(index = False))

 Best learning rate  Best loss tradeoff  Best depth  Best cross val. acc. (%)  Best epoch  Train accuracy (%)  Test accuracy (%)
             0.0001              1000.0         4.0                 78.749429          52           79.085714          79.911111


In [24]:
pred2 = prediction (dataset_transform (Eval_misc, trees),  w[best_epoch-1][:], b[best_epoch-1])
#print(pred1)
pred2.to_csv ('misc_labels.csv', index = False, header=True)