In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import sklearn.svm
import sklearn.metrics
from sklearn.model_selection import train_test_split
import glob


In [3]:
def svm_classification(X_train,y_train,X_test,y_test):
    svm_model = sklearn.svm.SVC(random_state=0)
    svm_model.fit(X_train,y_train)
    y_predicttrain = svm_model.predict(X_train)
    y_predicttest = svm_model.predict(X_test)
    return sklearn.metrics.accuracy_score(y_train,y_predicttrain),sklearn.metrics.accuracy_score(y_test,y_predicttest);

In [4]:
def svm_classification_tuned(X_train,y_train):
    svm_model = sklearn.svm.SVC()
    kernels = ['linear','rbf','poly','sigmoid']
    C = np.logspace(0,3,4)
    gamma = np.logspace(-2,1,4)
    param_grid = {'kernel':kernels,'C':C,'gamma':gamma}
    svm_model_gs = sklearn.model_selection.GridSearchCV(svm_model,param_grid,verbose=1,cv=3) 
    svm_model_gs.fit(X_train,y_train)
    return svm_model_gs;


 ## OCCUPANCY DATASET


In [5]:
data_occupancy = np.loadtxt('Occupany_train.txt',delimiter=',', skiprows=1, usecols = (2,3,4,5,6,7))
data_occupany_test=np.loadtxt('Occupany_test.txt', delimiter=',', skiprows=1, usecols = (2,3,4,5,6,7))
X_train=data_occupancy[:, 0:5]
y_train=data_occupancy[:, 5].astype(np.int32)
X_test=data_occupany_test[:, 0:5]
y_test=data_occupany_test[:, 5].astype(np.int32)

In [None]:
train_acc,test_acc = svm_classification(X_train,y_train,X_test,y_test)
print("%.2f training accuracy"%(train_acc*100))
print("%.2f testing accuracy"%(test_acc*100))

In [None]:
model = svm_classification_tuned(X_train,y_train)
print("%.2f training accuracy"%(model.best_score_*100))
print("Best hyperparameters are :",model.best_params_)

Fitting 3 folds for each of 64 candidates, totalling 192 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


## BANK MARKETTING

In [28]:
data_bank = pd.read_csv('BankMarketing_train.csv', delimiter=';')
data_bank_test= pd.read_csv('BankMarketing_test.csv', delimiter=';')

bank_labels ={'no':0,'yes':1}
data_bank.y = [bank_labels[i] for i in data_bank.y]
data_bank_test.y = [bank_labels[i] for i in data_bank_test.y]

X_train = data_bank.iloc[:,:-1]
y_train = data_bank.iloc[:,-1].astype('int32')

X_test = data_bank_test.iloc[:,:-1]
y_test = data_bank_test.iloc[:,-1].astype('int32')


In [29]:
le = sklearn.preprocessing.LabelEncoder()

for i in [1,2,3,4,5,6,7,8,9,14]:
    X_train.iloc[:,i] = le.fit_transform(X_train.iloc[:,i])
    X_test.iloc[:,i] = le.fit_transform(X_test.iloc[:,i])



In [30]:
train_acc,test_acc = svm_classification(X_train,y_train,X_test,y_test)
print("%.2f training accuracy"%(train_acc*100))
print("%.2f testing accuracy"%(test_acc*100))

89.80 training accuracy
90.09 testing accuracy


## ACTIVITY RECOGNITION IN HEALTHY OLD PEOPLE

In [31]:
files = glob.glob("Datasets_Healthy_Older_People/S2_Dataset/d*")
files.extend(glob.glob("Datasets_Healthy_Older_People/S1_Dataset/d*"))
ar_files = [pd.read_csv(f, header=None) for f in files]
data_ar=pd.concat(ar_files,ignore_index=True).values

X=data_ar[:, 1:8]
y=data_ar[:, 8].astype('int32')

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [None]:
train_acc,test_acc = svm_classification(X_train,y_train,X_test,y_test)
print("%.2f training accuracy"%(train_acc*100))
print("%.2f testing accuracy"%(test_acc*100))

## MAGIC GAMMA TELESCOPE

In [21]:
data_magic = pd.read_csv('MagicGamma.txt',delimiter=',')
X=data_magic.iloc[:,:-1]
y=data_magic.iloc[:,-1]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)


In [22]:
train_acc,test_acc = svm_classification(X_train,y_train,X_test,y_test)
print("%.2f training accuracy"%(train_acc*100))
print("%.2f testing accuracy"%(test_acc*100))

82.35 training accuracy
82.05 testing accuracy


## BITCOIN HEIST DATA

In [None]:
data_bitcoin =pd.read_csv('BitcoinHeistData.csv')


bitcoin_labels = {'princetonCerber':1, 'princetonLocky':1, 'montrealCryptoLocker':1,
 'montrealCryptXXX':1, 'paduaCryptoWall':1, 'montrealWannaCry':1,
 'montrealDMALockerv3':1, 'montrealCryptoTorLocker2015':1, 'montrealSamSam':1,
 'montrealFlyper':1, 'montrealNoobCrypt':1, 'montrealDMALocker':1, 'montrealGlobe':1,
 'montrealEDA2':1, 'paduaKeRanger':1, 'montrealVenusLocker':1, 'montrealXTPLocker':1,
 'paduaJigsaw':1, 'montrealGlobev3':1, 'montrealJigSaw':1, 'montrealXLockerv5.0':1,
 'montrealXLocker':1, 'montrealRazy':1, 'montrealCryptConsole':1,
 'montrealGlobeImposter':1, 'montrealSam':1, 'montrealComradeCircle':1,
 'montrealAPT':1, 'white':0}
data_bitcoin.label = [bitcoin_labels[i] for i in data_bitcoin.label]
X = data_bitcoin.iloc[:,:-1]
y= data_bitcoin.iloc[:,-1]


le = sklearn.preprocessing.LabelEncoder()
X.iloc[:,0] = le.fit_transform(X.iloc[:,0])


X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1)
train_acc,test_acc = svm_classification(X_train,y_train,X_test,y_test)
print("%.2f training accuracy"%(train_acc*100))
print("%.2f testing accuracy"%(test_acc*100))


