In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import logging
from argparse import ArgumentParser
from decimal import *
import math
import numpy.matlib
import types
import random
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix as cm
import os
from sklearn.model_selection import KFold
from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error)

In [2]:

def parameter_file(data_name):
        FOLD_NO = 10
        a = 0.0
        c = 1.0
        b = 0.89 # Found by hyperparametertuning 
        q = 0.499
        length = 20000
        num_classes = 2  
        epsilon = 0.043
        return a, b, c, q, length, num_classes, epsilon, FOLD_NO

In [3]:
def skew_tent(x,a,b,c):
# b is the parameters of the map.a and c are 0 and 1 respectively.
# GLS maps are piece wise linear
# Based on the value of check- the function will return any of the two diffrent maps.  If Check = "Sk-T", 
# then skew-tent map is returned else skew-binary map is returned.
        if x < b:
            xn = ((c - a)*(x-a))/(b - a)
        else:
            xn = ((-(c-a)*(x-b))/(c - b)) + (c - a)
        return xn

In [4]:
def iterations(q, a, b, c, length):
    #The function return a time series and its index values 
    timeseries = (np.zeros((length,2)))
    timeseries[0,0] = q
    for i in range(1, length):
        timeseries[i,0] = skew_tent((timeseries[i-1,0]), a, b, c)
        timeseries[i,1] = np.int(i)
    return timeseries

In [5]:

def firingtime_calculation(X_train, timeseries, epsilon):
# Code for calculating firing time
    M = X_train.shape[0]
    N = X_train.shape[1]
    firingtime = np.zeros((M,N))
    for i in range(0,M):
        for j in range(0,N):
            A = (np.abs((X_train[i,j]) - timeseries[:,0]) < epsilon)
            firingtime[i,j] = timeseries[A.tolist().index(True),1]
    return firingtime

In [6]:
def probability_calculation(X_train, timeseries, b, epsilon):
    # Code for calculating tt-ss method based feature extraction
    M = X_train.shape[0]
    N = X_train.shape[1]
    probability = np.zeros((M,N))
    for i in range(0,M):
        for j in range(0,N):
            A = (np.abs(X_train[i,j] - timeseries[:,0]) < epsilon)
            #firingtime[i,j] = timeseries[A.tolist().index(True),1]
            freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
            if len(freq) == 0:
                probability[i, j] = 0
            else: 
                probability[i,j] = freq.tolist().count(False)/np.float(len(freq))
    return probability 

In [7]:
# code for Caluculating the mean representation vectors
def class_avg_distance(DistMat, y_train, lab):
    samples = y_train.shape[0]
    P = y_train.tolist().count([lab])
    Q = DistMat.shape[1]
    class_dist = np.zeros((P,Q))
    k =0
    for i in range(0, samples):
        if (y_train[i] == lab):
            class_dist[k,:]=DistMat[i,:]
            k = k+1
    return np.sum(class_dist, axis = 0)/class_dist.shape[0]

In [8]:

def cosine_similar_measure(test_firingtime, y_test, a, b, c, avg_class_dist):    
    i = 0
    y_pred_val = []
    sim = []
    tot_sim = []
    for a_val in test_firingtime:
        sim = []
        for b_val in avg_class_dist:
            sim.append(cosine_similarity(a_val.reshape(1,len(a_val)),b_val.reshape(1, len(b_val))))
        tot_sim.append(sim)
        y_pred_val.append(np.argmax(tot_sim[i]))
        i = i+1
    accuracy = accuracy_score(y_test, y_pred_val)*100
    recall = recall_score(y_test, y_pred_val , average="macro")
    precision = precision_score(y_test, y_pred_val , average="macro")
    f1 = f1_score(y_test, y_pred_val, average="macro")
    cma = cm(y_test,y_pred_val)  

    #print("Confusion matrix\n", cma)
    return y_pred_val

In [9]:
def class_wise_data(X_train, y_train, lab):
## This code takes all data from a given class and stores in the matrix data
    N = len(y_train)
    l = y_train.tolist().count(lab)
    label = lab * np.ones((l,1))
    data = np.zeros((l,X_train.shape[1]))
    p = 0
    for i in range(0, N):
        if (y_train[i] == lab):
            data[p, :] = X_train[i, :]
            p = p+1
    return data, label


In [10]:
def test_split_generator(X_train, y_train, num_classes, samples_per_class):
# This function generates the test size required for train test split 
    data_array =[] 
    test_split = []
    for i in range(0, num_classes):
        X, y = class_wise_data(X_train, y_train, lab = i)
        sample_split = []
        for j in range(1, samples_per_class + 1):
            sample_split.append(1 - np.float(j)/X.shape[0])
        test_split.append(sample_split)
    return test_split


In [11]:
def CHAOSNET(X_train, y_train, X_test, y_test, num_classes, q, a, b, c, length, epsilon):
        num_features = X_train.shape[1]
        timeseries = iterations(q, a, b, c, length )
        test_proba = probability_calculation(X_test, timeseries, b, epsilon)


        train_proba = probability_calculation(X_train, timeseries, b, epsilon)
        avg_class_dist_1 = np.zeros((num_classes,num_features))

        for n_cl in range(0, num_classes):
            avg_temp = class_avg_distance(train_proba, y_train, lab = n_cl)
            avg_class_dist_1[n_cl,:] = avg_temp
    
        #avg_class_dist_1 = []
        #for n_cl in range(0,num_classes):
        #avg_class_dist_1.append(class_avg_distance(train_proba, y_train_subsample, lab = n_cl))
        print( " avg_class_dist_1 = ", avg_class_dist_1)
        y_pred_val = cosine_similar_measure(test_proba, y_test, a, b, c, avg_class_dist_1)
        accuracy = accuracy_score(y_test, y_pred_val)*100
        f1 = f1_score(y_test, y_pred_val, average="macro")
        print("acc and f1 are", accuracy, f1 )
        return y_pred_val, avg_class_dist_1




In [12]:
def k_cross_validation(FOLD_NO, X, y, num_classes, q, b, epsilon):
    KF = KFold(n_splits= FOLD_NO, shuffle=True) 
    KF.get_n_splits(X) # returns the number of splitting iterations in the cross-validator
    print(KF) 
    
    FSCORE = []
    ACCur = []
    PRE = []
    REC = []



            
    for TRAIN_INDEX, VAL_INDEX in KF.split(X):

        X_TRAIN, X_VAL = X[TRAIN_INDEX], X[VAL_INDEX]
        Y_TRAIN, Y_VAL = y[TRAIN_INDEX], y[VAL_INDEX]


        Y_PRED, mean_each_class = CHAOSNET( X_TRAIN, Y_TRAIN, X_VAL, Y_VAL, num_classes, q, a, b, c, length, epsilon)
        accuracy = accuracy_score(Y_VAL, Y_PRED)*100
        #recall = recall_score(Y_VAL, Y_PRED , average="macro")
        #precision = precision_score(Y_TRAIN, Y_PRED , average="macro")
        f1 = f1_score(Y_VAL, Y_PRED, average="macro")
        
        FSCORE.append(f1)
        ACCur.append(accuracy)
        #PRE.append(precision)
        #REC.append(recall)
    
         
                 
    MAX_FSCORE = np.max(FSCORE)
    print("BEST F1SCORE", MAX_FSCORE)
    MAX_ACC = np.max(ACCur)
    print("avg accuracy", np.mean(ACCur))
    print("BEST ACCURACY", MAX_ACC)
    print('worst accuracy', min(ACCur))

    return


    

In [15]:
data = pd.read_csv("C:/Users/DEll/Desktop/chaosnet/ChaosNet-master/Datas/hypoprotein.csv")
data_name = "hy.csv"
data = np.array(data)
y = np.array(data[:,-1])
X = np.array(data[:,0:-1])
y = y.reshape(len(y),1)

a, b, c, q, length, num_classes, epsilon, FOLD_NO = parameter_file(data_name)

k_cross_validation(FOLD_NO, X, y, num_classes, q, b, epsilon)


KFold(n_splits=10, random_state=None, shuffle=True)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12520525 0.14942529 0.12561576 0.13136289 0.16666667
  0.0952381  0.12397373 0.0952381 ]
 [0.15252976 0.16592262 0.1235119  0.11904762 0.14880952 0.12946429
  0.12946429 0.13020833 0.140625  ]]
acc and f1 are 96.66666666666667 0.9614890885750963


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12212413 0.14981273 0.12252541 0.13255752 0.16666667
  0.0952381  0.1253344  0.0952381 ]
 [0.15346791 0.16589027 0.1255176  0.12008282 0.14803313 0.12939959
  0.12939959 0.1247412  0.13793996]]
acc and f1 are 93.33333333333333 0.9333333333333333


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12317106 0.14791168 0.12317106 0.13514233 0.16666667
  0.0952381  0.1235701  0.0952381 ]
 [0.15332287 0.16588174 0.12349555 0.11643119 0.14861329 0.13055992
  0.13055992 0.12585034 0.13919414]]
acc and f1 are 83.33333333333334 0.8285714285714285


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12372695 0.14932563 0.12413983 0.13281035 0.16666667
  0.0952381  0.12372695 0.0952381 ]
 [0.15488463 0.16593029 0.12322042 0.11953854 0.14973    0.12690231
  0.12616593 0.12616593 0.14015709]]
acc and f1 are 100.0 1.0


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12438424 0.14942529 0.12479475 0.13341544 0.16666667
  0.0952381  0.12479475 0.0952381 ]
 [0.15327381 0.16592262 0.125      0.11755952 0.14732143 0.13095238
  0.12946429 0.12872024 0.13988095]]
acc and f1 are 96.66666666666667 0.9614890885750963


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12290118 0.1476741  0.12331407 0.13322323 0.16666667
  0.0952381  0.12331407 0.0952381 ]
 [0.15341188 0.16666667 0.12469318 0.11953854 0.14973    0.13058419
  0.12763868 0.12616593 0.13721159]]
acc and f1 are 100.0 1.0


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12348668 0.14931396 0.12389023 0.13317191 0.16666667
  0.0952381  0.12389023 0.0952381 ]
 [0.15207373 0.16589862 0.12365591 0.11904762 0.14900154 0.12672811
  0.13056836 0.12672811 0.1390169 ]]
acc and f1 are 90.0 0.8942420681551116


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12364244 0.14786967 0.12406015 0.13408521 0.16666667
  0.0952381  0.12280702 0.0952381 ]
 [0.15295815 0.16594517 0.12337662 0.11760462 0.14862915 0.12842713
  0.12770563 0.12698413 0.13924964]]
acc and f1 are 96.66666666666667 0.9509001636661211


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12464986 0.14943978 0.12507003 0.13347339 0.16666667
  0.0952381  0.12422969 0.0952381 ]
 [0.15309524 0.16595238 0.1252381  0.11952381 0.14809524 0.12880952
  0.12952381 0.12809524 0.1402381 ]]
acc and f1 are 100.0 1.0


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  timeseries[i,1] = np.int(i)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  freq = (timeseries[0:np.int(timeseries[A.tolist().index(True),1]),0] - b < 0)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  probability[i,j] = freq.tolist().count(False)/np.float(len(freq))


 avg_class_dist_1 =  [[0.16666667 0.12267958 0.14971751 0.12308313 0.13236481 0.16666667
  0.0952381  0.12389023 0.0952381 ]
 [0.15284178 0.16589862 0.12442396 0.11981567 0.14669739 0.12749616
  0.12903226 0.12519201 0.13671275]]
acc and f1 are 96.66666666666667 0.9657142857142857
BEST F1SCORE 1.0
avg accuracy 95.33333333333334
BEST ACCURACY 100.0
worst accuracy 83.33333333333334
