In [0]:
import numpy as np
import tensorflow as tf
import matplotlib as plt
import pandas as pd
from sklearn import svm
import random

In [0]:
colab = True
dataset = 'person_dirty.csv'
USE_GPU = True

In [197]:
if colab:
  from google.colab import drive
  drive.mount('/content/gdrive', force_remount=True)
  data = pd.read_csv('/content/gdrive/My Drive/CS848/Data/' + dataset)
else:
  data = pd.read_csv('Data/csv/' + dataset)

Mounted at /content/gdrive


In [0]:
# Encode categorical columns as ints based on dictionary of unique values
def encode_string_col(col):
  encoded = []
  unique_vals = np.unique(col)
  for i,v in enumerate(col):
    ind, = np.where(unique_vals == v)
    encoded.append(ind[0])
  return encoded

In [0]:
for col in data:
  if isinstance(data[col][0], str):
    data[col] = encode_string_col(data[col].values)

In [0]:
def split_data(data, split=70):
  train_x, train_y, test_x, test_y = [],[],[],[]
  for is_dirty, vals in data.groupby('is_dirty'):
    l = vals.values.tolist()
    ind = int(len(l)*(split/100))
    for t in l[:ind]:
      train_x.append(t[:-1])
      train_y.append(t[-1])
    for t in l[ind:]:
      test_x.append(t[:-1])
      test_y.append(t[-1])
  return list(zip(train_x, train_y)), list(zip(test_x, test_y))

In [0]:
train, test = split_data(data)

In [0]:
def train_test_svm(train, test):
  random.shuffle(train)
  random.shuffle(test)
  X,Y = zip(*train)
  c = svm.SVC()
  c.fit(X, Y)
  conf = np.zeros((2,2), dtype=int)
  for i, t in enumerate(test):
    pred = c.predict([t[0]])
    if pred[0] == 0 and t[1] == 0:
      conf[0][0] +=1
    elif pred[0] == 1 and t[1] == 0:
      conf[0][1] +=1
    elif pred[0] == 0 and t[1] == 1:
      conf[1][0] += 1
    elif pred[0] == 1 and t[1] == 1:
      conf[1][1] += 1
  print(conf)
  return conf

def read_cfm(conf, verbose=True):
  acc = (conf[0][0] + conf[1][1])/np.sum(cf)
  prec = conf[1][1]/(conf[1][1] + conf[0][1])
  rec = conf[1][1]/(conf[1][1] + conf[1][0])
  fm = 2*(prec*rec)/(prec+rec)
  if verbose:
    print('Accuracy:', acc)
    print('Precision:', prec)
    print('Recall:',rec )
    print('F-Measure:', fm)
  return [acc, prec, rec, fm]

In [247]:
cf = train_test_svm(train, test)
base_svm_stats = read_cfm(cf)

[[2850    0]
 [ 135   15]]
Accuracy: 0.955
Precision: 1.0
Recall: 0.1
F-Measure: 0.18181818181818182


(0.955, 1.0, 0.1, 0.18181818181818182)