In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import NearestNeighbors
import pandas as pd
import math

In [None]:
def check_numeric(X):
  newX = np.array(X).reshape(-1)
  return all(not isinstance(n, str) for n in newX)

# X=np.array([[6,7,8],[3,'0',5]])
# print(check_numeric(X))
# print(X)
# print(check_numeric([1,2,3]))

False
[['6' '7' '8']
 ['3' '0' '5']]
True


In [None]:
def error_return(p):
  ranked = np.arange(p, dtype=int) 
  weight = np.empty((1,p))
  weight = np.squeeze(weight) 
  weight[:] = np.nan
  return ranked, weight


### Preprocessing data and call feature selection algorithm

In [None]:
def RFS_configure(X, Y, K = 7, theta = 3):
  X = np.array(X)
  Y = np.array(Y)

  if not check_numeric(X):
    print('X does not contain numeric data')
    p = X.shape[1] # no of attributes
    return error_return(p) 

  # Check if the input sizes are consistent
  if Y.shape[0] != X.shape[0]:
    print('number of instances and output labels doesnot match')
    p = X.shape[1] # no of attributes
    return error_return(p)
  
  # converting classes as 0 to #classes
  [Y, grp] = pd.factorize(Y)

  # grpToInd contains class to index mapping, grpToInd[className]= ind
  grpToInd={}
  for ind, g in enumerate(grp):
    grpToInd[g]= ind

  # removing incomplete instances
  df_XY = pd.DataFrame(X)
  df_XY['Y'] = Y
  df_XY = df_XY.dropna()

  X, Y = np.array(df_XY.iloc[:, 0:-1]), np.array(df_XY.iloc[:, -1])
  Ngrp = len(grp)
  N = X.shape[0]
  C = np.zeros((N,Ngrp))
  C[np.arange(N), Y] = True
  
  #  Do we have enough observations?
  if len(Y)<2:
    print('not enough instances')
    p = X.shape[1] # no of attributes
    return error_return(p)   

  # Find max and min for every predictor
  p = X.shape[1] # no of attributes
  Xmax = X.max(0)
  Xmin = X.min(0) 
  Xdiff = Xmax - Xmin
  Xmean = np.mean(X, axis=0) 

  # Exclude single-valued attributes
  isDiffValue = Xdiff >= 1e-9  # boolean array of size #attributes [1,0,0,0]
  if not any(isDiffValue):
    print("All attributes are single valued attributes.")
    p = X.shape[1] # no of attributes
    return error_return(p)
  
  X = X[:, isDiffValue ] 
  Xdiff = Xdiff[isDiffValue] 
  Xmean = Xmean[isDiffValue] 
  rejected = [ i for i in range(len(isDiffValue)) if not isDiffValue[i]]  # indices of the deleted attributes (values range from 1 to p)
  accepted = [ i for i in range(len(isDiffValue)) if isDiffValue[i]]  # indices of remaining attributes (values range from 1 to p) 

  # Call Relief. By default all Rvalue are set to NaN.
  Rvalue = np.empty(p) 
  Rvalue[:] = np.nan

  numInstances,numAttr = X.shape

  acceptedRvalues = np.array([])
  for i in range(numAttr):
    acceptedRvalues = np.append(acceptedRvalues, RFS(X[:,i], Y, K, theta))

  Rvalue[accepted] = acceptedRvalues

  # Assign ranks to attributes
  sorted = np.argsort(Rvalue[accepted])
  accepted = np.array(accepted)
  ranked = accepted[sorted]
  ranked = np.append(ranked, rejected)
  ranked = ranked.astype(int)

  return ranked, Rvalue



# Relief algorithm function

In [None]:
def RFS(V, C, K, theta):
  N = len(V) 
  OV = np.zeros(N) 
  Y = 0

  # sorting the values of V maintaining correspondense with C
  sortedIdx = np.argsort(V)
  sortedC = C[sortedIdx]
  sortedV = V[sortedIdx]

  for i in range(N):
    left = max(0, i-K)
    right = min(N-1, i+K)
    # print(left,right)
    rangeValues = sortedV[left: right+1] # +1 for inclusion
    rangeValues = np.expand_dims(rangeValues,axis=1)
    # print(rangeValues)

    nearestNeighbors = NearestNeighbors(n_neighbors=K+1, algorithm='auto', metric= 'manhattan').fit(rangeValues)
    Vidx = nearestNeighbors.kneighbors([[sortedV[i]]], K+1, return_distance=False)
    Vidx = np.squeeze(Vidx)
    Vidx = Vidx + left
    accepted = (Vidx != i)
    Vidx = Vidx[accepted]

    X = 0 # no of element in nearestNeighbours that have different class value 
    for j in Vidx:
      if sortedC[j] != sortedC[i]:
        X = X + 1 
    
    if X >= theta: 
      OV[i] = 1

  Y = np.sum(OV)

  return Y/N

In [None]:
# X = np.array([[0,0,0,0,1,0,0],
#               [0,1,0,1,0,1,0],
#               [1,0,0,0,1,1,0],
#               [0,0,0,1,0,0,0],
#               [1,1,0,0,0,0,0],
#               [0,1,0,0,1,0,0],
#               [0,0,0,0,0,1,0],
#               [1,0,0,1,0,0,0],
#               [1,0,1,0,0,0,1],
#               [1,1,0,0,1,0,1],
#               [0,0,0,0,0,0,1],
#               [1,0,1,1,1,1,1],
#               [1,0,0,0,0,0,0],
#               [1,0,0,1,0,1,1],
#               [0,1,1,0,0,0,0],
#               [0,0,1,1,0,0,1],
#               [1,1,0,1,0,0,0],
#               [1,0,1,0,1,0,1],
#               [0,1,1,1,0,0,1],
#               [1,1,1,1,1,1,1]])

# Y = np.array([0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,0,1,1,0])

# RFS_configure(X,Y)


0
1
2
3
4
5
6


(array([0., 4., 1., 2., 3., 5., 6.]),
 array([0.8 , 0.9 , 0.95, 1.  , 0.85, 1.  , 1.  ]))

In [None]:
# X = np.array([[1,0,1],
#               [1,0,0],
#               [0,1,1],
#               [0,1,0],
#               [0,0,1],
#               [0,0,0],
#               [1,1,1],
#               [1,1,0]])

# Y = np.array([1,1,1,1,0,0,0,0])
# ranked, weight= RFS_configure(X,Y)

# print('ranked')
# print(ranked)
# print('weight')
# print(weight)


0
1
2
ranked
[0. 1. 2.]
weight
[1. 1. 1.]


In [None]:
# def permute(x):
#   if len(x) == 7:
#     global X
#     global Y
#     Y.append((x[0] ^ x[1]))
#     X.append(x)
#     return
#   x = np.append(x,0)
#   permute(x)
#   x = x[0:-1]
#   x = np.append(x,1)
#   permute(x)

In [None]:
# X = []
# Y = []
# x = np.array([], dtype=int)
# permute(x)
# X = np.array(X)
# Y = np.array(Y)
# ranked, weight=RFS_configure(X,Y)

# print('ranked')
# print(ranked)
# print('weight')
# print(weight)


0
1
2
3
4
5
6
ranked
[0. 1. 5. 6. 2. 3. 4.]
weight
[0.0859375 0.0859375 0.3671875 0.375     0.390625  0.3203125 0.3515625]


In [None]:
# # test classification dataset
# from sklearn.datasets import make_classification
# # define dataset
# X, Y = make_classification(n_samples=100, n_features=8, n_informative=4, n_redundant=0,shuffle=False, random_state=1)
# # summarize the dataset
# print(X.shape, Y.shape)
# ranked, weight= RFS_configure(X,Y)
# print('ranked')
# print(ranked)
# print('weight')
# print(weight[np.array(ranked, dtype=int)])

(100, 8) (100,)
0
1
2
3
4
5
6
7
ranked
[3. 2. 4. 0. 5. 1. 6. 7.]
weight
[0.52 0.53 0.58 0.65 0.75 0.79 0.8  0.96]
