# **Bank Project**

## **Data Preprocessing**

###Import Libraries 

In [1]:
import numpy as np
import pandas as pd

### Import the Dataset

In [2]:
dataset = pd.read_csv('bank.csv', delimiter = ";" )
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [3]:
print(x)

[[30 'unemployed' 'married' ... -1 0 'unknown']
 [33 'services' 'married' ... 339 4 'failure']
 [35 'management' 'single' ... 330 1 'failure']
 ...
 [57 'technician' 'married' ... -1 0 'unknown']
 [28 'blue-collar' 'married' ... 211 3 'other']
 [44 'entrepreneur' 'single' ... 249 7 'other']]


In [4]:
print(y)

['no' 'no' 'no' ... 'no' 'no' 'no']


### Encoding categorical data

In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [1,2,3,4,6,7,8,10,15])], remainder = 'passthrough')
x = np.array(ct.fit_transform(x))

In [6]:
print(x)

[[0.0 0.0 0.0 ... 1 -1 0]
 [0.0 0.0 0.0 ... 1 339 4]
 [0.0 0.0 0.0 ... 1 330 1]
 ...
 [0.0 0.0 0.0 ... 11 -1 0]
 [0.0 1.0 0.0 ... 4 211 3]
 [0.0 0.0 1.0 ... 2 249 7]]


### Encoding the dependent variable

In [7]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [8]:
print(y)

[0 0 0 ... 0 0 0]


### Splitting the dataset into the Training set and Test set

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = 1)

In [10]:
print(x_train)

[[0.0 0.0 0.0 ... 2 -1 0]
 [0.0 1.0 0.0 ... 6 -1 0]
 [1.0 0.0 0.0 ... 2 -1 0]
 ...
 [0.0 0.0 0.0 ... 2 -1 0]
 [0.0 1.0 0.0 ... 1 -1 0]
 [0.0 0.0 0.0 ... 3 -1 0]]


In [11]:
print(x_test)

[[1.0 0.0 0.0 ... 2 -1 0]
 [0.0 1.0 0.0 ... 1 -1 0]
 [0.0 1.0 0.0 ... 1 -1 0]
 ...
 [0.0 0.0 0.0 ... 2 -1 0]
 [0.0 1.0 0.0 ... 8 342 9]
 [0.0 1.0 0.0 ... 2 -1 0]]


In [12]:
print(y_train)

[0 0 0 ... 1 0 0]


In [13]:
print(y_test)

[0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0
 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 

### Feature Scaling

In [14]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [15]:
print(x_train)

[[-0.33558216 -0.51198549 -0.20143371 ... -0.25832264 -0.39768549
  -0.31524727]
 [-0.33558216  1.95318034 -0.20143371 ...  1.01602207 -0.39768549
  -0.31524727]
 [ 2.97989621 -0.51198549 -0.20143371 ... -0.25832264 -0.39768549
  -0.31524727]
 ...
 [-0.33558216 -0.51198549 -0.20143371 ... -0.25832264 -0.39768549
  -0.31524727]
 [-0.33558216  1.95318034 -0.20143371 ... -0.57690882 -0.39768549
  -0.31524727]
 [-0.33558216 -0.51198549 -0.20143371 ...  0.06026354 -0.39768549
  -0.31524727]]


In [16]:
print(x_test)

[[ 2.97989621 -0.51198549 -0.20143371 ... -0.25832264 -0.39768549
  -0.31524727]
 [-0.33558216  1.95318034 -0.20143371 ... -0.57690882 -0.39768549
  -0.31524727]
 [-0.33558216  1.95318034 -0.20143371 ... -0.57690882 -0.39768549
  -0.31524727]
 ...
 [-0.33558216 -0.51198549 -0.20143371 ... -0.25832264 -0.39768549
  -0.31524727]
 [-0.33558216  1.95318034 -0.20143371 ...  1.65319443  3.03596951
   5.11301046]
 [-0.33558216  1.95318034 -0.20143371 ... -0.25832264 -0.39768549
  -0.31524727]]


## **Training the Kernel SVM model on the training set**

In [17]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(x_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

## **Predicting the test set result**

In [18]:
y_pred = classifier.predict(x_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


## **Making the Confusion Matrix**

In [19]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[775  15]
 [ 91  24]]


0.8828729281767956