In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer
    
from statistics import mean
from hyperopt import Trials, hp, fmin, tpe, STATUS_OK, space_eval #for hyperparameter tuning and minimizing objective function

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fraudulent-transactions-data/Fraud.csv


In [4]:
df = pd.read_csv('/kaggle/input/fraudulent-transactions-data/Fraud.csv')
df = df.head(1000)
df = df.drop(['nameDest', 'nameOrig'], axis = 1)
encoder = pd.get_dummies(df['type'])
df = df.drop('type', axis = 1)
df = df.join(encoder)
imp = SimpleImputer(missing_values = np.nan, strategy = 'mean') #could also use mean, median, most freq
df = pd.DataFrame(imp.fit_transform(df), columns = df.columns)
# df.head()
x_train, x_test, y_train, y_test = train_test_split(df.drop('isFraud', axis = 1), df['isFraud'])

In [5]:

space = {
    'C' : hp.choice('C', [0.1, 1, 10]),
    'gamma' : hp.choice('gamma', [0.1, 1, 10, 'scale', 'auto']),
    'kernel' : hp.choice('kernel', ['rbf', 'poly'])
} #defines the space in which we do hyperparameter tuning for C, gamma and kernel
kfold = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 0)

def objective(params) :
    svc = SVC(**params) #grab all keyword paramaters
    #cross_val_score takes in object to fit, x, y shape, cv generator (in this case kfold), 
    #scoring metric, and number of parallel processings (just do 1 cuz h0m3l355)
    scores = cross_val_score(svc, x_train, y_train, cv = kfold, scoring = 'accuracy', n_jobs = 1)
    best_score = mean(scores) 
    loss = -best_score
    return {'loss': loss, 'params': params, 'status': STATUS_OK}
    
num_trials = Trials()
best = fmin(fn = objective, space = space, algo = tpe.suggest, max_evals = 30, trials = num_trials)
print(best)

100%|██████████| 30/30 [00:01<00:00, 18.34trial/s, best loss: -0.9946666666666667]
{'C': 1, 'gamma': 0, 'kernel': 0}


In [6]:
svc = SVC(C = space_eval(space, best)['C'], gamma = space_eval(space, best)['gamma'], kernel = space_eval(space, best)['kernel'])
svc.fit(x_train, y_train)
svc.score(x_test, y_test)

0.98