<a href="https://colab.research.google.com/github/mohammad2682/FVS-SVM-RKHS/blob/main/SVM_RKHS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
!git clone https://github.com/mahyahashemi/RKHS_Project

Cloning into 'FVS-SVM-RKHS'...
remote: Enumerating objects: 86, done.[K
remote: Counting objects: 100% (86/86), done.[K
remote: Compressing objects: 100% (69/69), done.[K
remote: Total 86 (delta 31), reused 41 (delta 17), pack-reused 0[K
Unpacking objects: 100% (86/86), done.


In [3]:
def sigma(df):
  N = df.shape[0]
  sigma = []
  for i in range(N):
    for j in range(i+1, N):
      x_i = df.iloc[[i], :-1]
      x_j = df.iloc[[j], :-1]
      sigma.append(np.linalg.norm(x_i.values - x_j.values))
  return 0.1 * np.max(sigma)

In [4]:
def K(FV1, FV2, sigma):
  dist = np.linalg.norm(FV1-FV2)
  return np.exp(-1*np.square(dist)/(2*sigma))

In [5]:
def give_KSS(S, sigma):
  KSS = []
  for vec1 in S:
    for vec2 in S:
      k = K(np.array(vec1), np.array(vec2), sigma = sigma)
      KSS.append(k)

  return np.array(KSS).reshape((len(S), len(S)))

In [6]:
def give_KSx(S, x, sigma):
  KSx = []
  for s in S:
    kx = K(np.array(s), x, sigma = sigma)
    KSx.append(kx)
  return np.array(KSx).reshape((len(S), 1))

In [7]:
def minor_major(df):
  cls = df.iloc[:,-1].unique()
  cls0 = df[df.iloc[:,-1]==cls[0]].shape[0]
  cls1 = df[df.iloc[:,-1]==cls[1]].shape[0]
  minor = cls[0]
  major = cls[1]
  if cls0>cls1:
    minor = cls[1]
    major = cls[0]
  return minor, major

In [9]:
def S_creator(df, FV1):
  df_new = df.drop(index = FV1.index)
  FV1 = FV1.iloc[0].values
  tau = min(0.001, 1/df.shape[0]) #minimum of 0.001, 1/N
  S = [list(FV1)]
  cnt = 1
  for vec in df_new.iloc[:, :-1].values:
    KSx = give_KSx(S, vec, sigma = sig)
    KSS = give_KSS(S, sigma = sig)
    LF = abs(1-(np.dot(np.dot(np.transpose(KSx), np.linalg.inv(KSS)), KSx)))
    #print(LF)
    if LF > tau:
      S.append(list(vec))
      cnt += 1
      if cnt == 5:
        break
  return S

In [10]:
def give_landa(df, minor_cls, major_cls):
  minor = df[df["Class"] == minor_cls]
  major = df[df["Class"] == major_cls]
  landa = []
  for col in df.columns[:-1]:
    mean_minor = minor[col].mean()
    mean_major = major[col].mean()
    var_minor = minor[col].var()
    var_major = major[col].var()
    landa_i = np.square(mean_minor - mean_major)/(var_minor - var_major)
    landa.append(landa_i)
  return landa

In [11]:
df = pd.read_csv("FVS-SVM-RKHS/datasets/pima.csv")
df.head()

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,14,175,62,30,0,33.6,0.212,38,tested_positive
1,4,146,78,0,0,38.5,0.52,67,tested_positive
2,15,136,70,32,110,37.1,0.153,43,tested_positive
3,3,107,62,13,48,22.9,0.678,23,tested_positive
4,3,169,74,19,125,29.9,0.268,31,tested_positive


In [12]:
df_test1 = df.iloc[:3, :]
df_test2 = df.iloc[-7:, :]
df_test = pd.concat([df_test1, df_test2]).reset_index(drop=True)
df_test = df_test[["preg", "plas", "class"]]

In [13]:
df_test

Unnamed: 0,preg,plas,class
0,14,175,tested_positive
1,4,146,tested_positive
2,15,136,tested_positive
3,6,114,tested_negative
4,6,92,tested_negative
5,5,117,tested_negative
6,4,83,tested_negative
7,7,119,tested_negative
8,1,95,tested_negative
9,1,181,tested_positive


In [14]:
sig = sigma(df = df_test)
minor_cls, major_cls = minor_major(df_test)

In [15]:
print(sig)
print(minor_cls, major_cls)

9.804590761474953
tested_positive tested_negative


In [16]:
minor_df = df_test[df_test["class"]==minor_cls]
S_T = []
FV_T = pd.DataFrame()
for i in range(minor_df.shape[0]):
  FV1 = minor_df.iloc[[i], :-1]
  S_T.append(S_creator(df_test, FV1))
  FV_T = pd.concat([FV_T, FV1])

In [18]:
minor_df.shape[0]

4

In [17]:
FV_T

Unnamed: 0,preg,plas
0,14,175
1,4,146
2,15,136
9,1,181


In [21]:
S_T

[[[14, 175], [4, 146], [15, 136], [6, 114], [6, 92]],
 [[4, 146], [14, 175], [15, 136], [6, 114], [6, 92]],
 [[15, 136], [14, 175], [4, 146], [6, 114], [6, 92]],
 [[1, 181], [14, 175], [4, 146], [15, 136], [6, 114]]]

In [24]:
landa_max = -1000
for i, S in enumerate(S_T):

  FV1 = FV_T.iloc[[i]]
  df_new = df_test.drop(index = FV1.index, axis = 0 )
  cls = []
  flag = 0
  #sort Data
  for vector in df_new.values:
    vec = np.array(list(vector[:-1]))
    cls.append(vector[-1])
    KSx = give_KSx(S, vec, sigma = sig)
    KSS = give_KSS(S, sigma = sig)
    DSS = np.sqrt(KSS)
    beta = np.dot(np.dot(np.transpose(KSx), np.linalg.inv(KSS)), DSS)
    if flag == 0:
      B = beta
      flag = -1
      continue
    B = np.concatenate((B, beta))

  B_df = pd.DataFrame(B)
  B_df["Class"] = cls

  landa = give_landa(B_df, minor_cls, major_cls)

  landa_t = np.sum(landa)/len(S)
  if landa_t > landa_max:
    print(landa_t)
    landa_max = landa_t
    best_S = S

0.03830932513633186
0.1544038245127352


In [25]:
best_S

[[1, 181], [14, 175], [4, 146], [15, 136], [6, 114]]