<a href="https://colab.research.google.com/github/mohammad2682/FVS-SVM-RKHS/blob/main/SVM_RKHS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
!git clone https://github.com/mohammad2682/FVS-SVM-RKHS

Cloning into 'FVS-SVM-RKHS'...
remote: Enumerating objects: 95, done.[K
remote: Counting objects: 100% (95/95), done.[K
remote: Compressing objects: 100% (78/78), done.[K
remote: Total 95 (delta 35), reused 41 (delta 17), pack-reused 0[K
Unpacking objects: 100% (95/95), done.


In [3]:
def sigma(df):
  N = df.shape[0]
  sigma = []
  for i in range(N):
    for j in range(i+1, N):
      x_i = df.iloc[[i], :-1]
      x_j = df.iloc[[j], :-1]
      sigma.append(np.linalg.norm(x_i.values - x_j.values))
  return 0.1 * np.max(sigma)

In [4]:
def K(FV1, FV2, sigma):
  dist = np.linalg.norm(FV1-FV2)
  return np.exp(-1*np.square(dist)/(2*sigma))

In [5]:
def give_KSS(S, sigma):
  KSS = []
  for vec1 in S:
    for vec2 in S:
      k = K(np.array(vec1), np.array(vec2), sigma = sigma)
      KSS.append(k)

  return np.array(KSS).reshape((len(S), len(S)))

In [6]:
def give_KSx(S, x, sigma):
  KSx = []
  for s in S:
    kx = K(np.array(s), x, sigma = sigma)
    KSx.append(kx)
  return np.array(KSx).reshape((len(S), 1))

In [7]:
def minor_major(df):
  cls = df.iloc[:,-1].unique()
  cls0 = df[df.iloc[:,-1]==cls[0]].shape[0]
  cls1 = df[df.iloc[:,-1]==cls[1]].shape[0]
  minor = cls[0]
  major = cls[1]
  if cls0>cls1:
    minor = cls[1]
    major = cls[0]
  return minor, major

In [8]:
def S_creator(df, FV1):
  df_new = df.drop(index = FV1.index)
  FV1 = FV1.iloc[0].values
  tau = min(0.001, 1/df.shape[0]) #minimum of 0.001, 1/N
  S = [list(FV1)]
  cnt = 1
  for vec in df_new.iloc[:, :-1].values:
    KSx = give_KSx(S, vec, sigma = sig)
    KSS = give_KSS(S, sigma = sig)
    LF = abs(1-(np.dot(np.dot(np.transpose(KSx), np.linalg.inv(KSS)), KSx)))
    #print(LF)
    if LF > tau:
      S.append(list(vec))
      cnt += 1
      if cnt == 5:
        break
  return S

In [9]:
def give_landa(df, minor_cls, major_cls):
  minor = df[df["Class"] == minor_cls]
  major = df[df["Class"] == major_cls]
  landa = []
  for col in df.columns[:-1]:
    mean_minor = minor[col].mean()
    mean_major = major[col].mean()
    var_minor = minor[col].var()
    var_major = major[col].var()
    landa_i = np.square(mean_minor - mean_major)/(var_minor - var_major)
    landa.append(landa_i)
  return landa

In [10]:
df = pd.read_csv("FVS-SVM-RKHS/datasets/pima.csv")
df.head()

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,14,175,62,30,0,33.6,0.212,38,tested_positive
1,4,146,78,0,0,38.5,0.52,67,tested_positive
2,15,136,70,32,110,37.1,0.153,43,tested_positive
3,3,107,62,13,48,22.9,0.678,23,tested_positive
4,3,169,74,19,125,29.9,0.268,31,tested_positive


In [11]:
df_test1 = df.iloc[:3, :]
df_test2 = df.iloc[-7:, :]
df_test = pd.concat([df_test1, df_test2]).reset_index(drop=True)
df_test = df_test[["preg", "plas", "class"]]

In [12]:
sig = sigma(df = df_test)
minor_cls, major_cls = minor_major(df_test)

In [13]:
print(sig)
print(minor_cls, major_cls)

9.804590761474953
tested_positive tested_negative


In [14]:
minor_df = df_test[df_test["class"]==minor_cls]
S_T = []
FV_T = pd.DataFrame()
for i in range(minor_df.shape[0]):
  FV1 = minor_df.iloc[[i], :-1]
  S_T.append(S_creator(df_test, FV1))
  FV_T = pd.concat([FV_T, FV1])

In [15]:
landa_max = -1000
for i, S in enumerate(S_T):

  FV1 = FV_T.iloc[[i]]
  df_new = df_test.drop(index = FV1.index, axis = 0 )
  cls = []
  flag = 0
  #sort Data
  for vector in df_new.values:
    vec = np.array(list(vector[:-1]))
    cls.append(vector[-1])
    KSx = give_KSx(S, vec, sigma = sig)
    KSS = give_KSS(S, sigma = sig)
    DSS = np.sqrt(KSS)
    beta = np.dot(np.dot(np.transpose(KSx), np.linalg.inv(KSS)), DSS)
    if flag == 0:
      B = beta
      flag = -1
      continue
    B = np.concatenate((B, beta))

  B_df = pd.DataFrame(B)
  B_df["Class"] = cls

  landa = give_landa(B_df, minor_cls, major_cls)

  landa_t = np.sum(landa)/len(S)
  if landa_t > landa_max:
    print(landa_t)
    landa_max = landa_t
    best_S = S

0.03830932513633186
0.1544038245127352


## Second Section

In [16]:
omeg = []
for m in range(np.shape(best_S)[0]):
  omeg_m = []
  for n in range(np.shape(best_S)[0]):
    omeg_mn = np.sum(np.dot(B_df.iloc[:, m], B_df.iloc[:, n]))
    omeg_m.insert(n, omeg_mn)
  if m == 0:
    omeg = omeg_m
    continue
  omeg = np.concatenate((omeg, omeg_m))
omeg = omeg.reshape((5, 5))

In [17]:
H = []
for m in range(np.shape(best_S)[0]):
  H_m = []
  H_j = np.sum(B_df.iloc[m, :-1])
  H_m = np.sum(np.dot(B_df.iloc[:,m], (1-H_j)))
  H.insert(m, H_m)

In [18]:
tau = H.copy()

In [19]:
C = []
for m in range(np.shape(best_S)[0]):
  c_val = 1 - np.sum(B_df.iloc[m, :-1])
  C.insert(m, c_val)
C = np.sum(C)

In [20]:
B_df["y_i"] = B_df['Class']
B_df["y_i"].replace({minor_cls: -1, major_cls: 1}, inplace=True)

In [21]:
P = []
for m in range(np.shape(best_S)[0]):
  P_m = np.sum(np.dot(B_df.iloc[:, m], B_df["y_i"]))
  P.insert(m, P_m)

In [65]:
 L = np.sum(np.dot((1 - B_df.iloc[:, :-2].sum(axis=1)), B_df.iloc[:, -1]))

In [27]:
H = np.array(H).reshape((-1, 1))
tau = np.array(tau).reshape((-1, 1))
P = np.array(P).reshape((-1, 1))

In [61]:
y_hat = np.divide((C*P - H*L), (omeg*C - np.dot(H, tau.transpose())))

In [62]:
b = np.divide((np.dot((-1*tau.transpose()), P) + omeg*L), (omeg*C-np.dot(H, tau.transpose())))

In [63]:
y_hat

array([[-1.82066289e+02, -9.77475413e-01,  5.01689089e+04,
         3.23497276e+08, -9.63428477e+01],
       [-1.83471199e+02, -9.85021306e-01,  7.60254370e+04,
         4.90253311e+08, -1.45996397e+02],
       [ 9.45025383e+06,  7.62966879e+04, -9.88523254e-01,
        -1.38438349e+02, -1.45973369e+02],
       [ 6.18641411e+10,  4.99490047e+08, -1.40545179e+02,
        -1.00355409e+00, -4.18051350e+05],
       [-1.09592433e+05, -8.84791968e+02, -8.81507286e+02,
        -2.48669408e+06,  2.84910848e+00]])

In [64]:
b

array([[-1.22590459e+05, -6.53985759e+02,  3.37813238e+07,
         2.17827464e+11, -6.48726273e+04],
       [-6.53985759e+02,  6.65349673e-01,  2.72733745e+05,
         1.75873532e+09, -5.23747651e+02],
       [ 3.37813238e+07,  2.72733745e+05,  6.65394780e-01,
        -4.90669299e+02, -5.21803301e+02],
       [ 2.17827464e+11,  1.75873532e+09, -4.90669299e+02,
         6.65386127e-01, -1.47198228e+06],
       [-6.48726273e+04, -5.23747651e+02, -5.21803301e+02,
        -1.47198228e+06, -1.18159006e+00]])