# Importing the libraries and dataset

In [23]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [24]:
dataset = pd.read_csv('kredit.csv')
X = dataset[['laufkont','laufzeit','sparkont','moral']]
y = dataset['kredit']

# Splitting the dataset into the training set and test set

In [25]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [26]:
print(X_train)

     laufkont  laufzeit  sparkont  moral
253         4        36         1      2
667         2        13         1      4
85          1        12         4      2
969         1        21         5      1
75          2         9         5      2
..        ...       ...       ...    ...
835         2        24         1      2
192         2        48         5      2
629         1        12         1      2
559         2        14         3      2
684         1        12         1      2

[750 rows x 4 columns]


In [27]:
print(y_train)

253    1
667    1
85     1
969    0
75     1
      ..
835    0
192    1
629    1
559    1
684    1
Name: kredit, Length: 750, dtype: int64


In [28]:
print(X_test)

     laufkont  laufzeit  sparkont  moral
993         1        18         1      4
859         1        12         2      2
298         2        12         4      2
553         4        10         5      2
672         2        18         1      2
..        ...       ...       ...    ...
462         4        15         2      2
356         1        24         1      2
2           2        12         2      2
478         4        12         1      4
695         4        60         1      3

[250 rows x 4 columns]


In [29]:
print(y_test)

993    0
859    0
298    1
553    1
672    1
      ..
462    1
356    1
2      1
478    1
695    1
Name: kredit, Length: 250, dtype: int64


# Feature Scalling


In [30]:
#actually in LR it doesn't needs feature scalling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [31]:
#print features in same range, transform features in same range
print(X_train)

[[ 1.14276934  1.23662853 -0.69463841 -0.54268053]
 [-0.44588127 -0.64282893 -0.69463841  1.32863165]
 [-1.24020657 -0.72454447  1.20674409 -0.54268053]
 ...
 [-1.24020657 -0.72454447 -0.69463841 -0.54268053]
 [-0.44588127 -0.56111338  0.57294993 -0.54268053]
 [-1.24020657 -0.72454447 -0.69463841 -0.54268053]]


In [32]:
#predicted value must in this scale (range)
print(X_test)

[[-1.24020657 -0.23425122 -0.69463841  1.32863165]
 [-1.24020657 -0.72454447 -0.06084424 -0.54268053]
 [-0.44588127 -0.72454447  1.20674409 -0.54268053]
 [ 1.14276934 -0.88797555  1.84053826 -0.54268053]
 [-0.44588127 -0.23425122 -0.69463841 -0.54268053]
 [-0.44588127  0.50118865 -0.69463841 -2.41399271]
 [-0.44588127 -0.96969109 -0.69463841  1.32863165]
 [ 1.14276934 -1.21483771 -0.69463841 -2.41399271]
 [-1.24020657 -1.21483771 -0.69463841 -0.54268053]
 [ 0.34844403  0.25604203  1.84053826  1.32863165]
 [ 1.14276934 -1.21483771 -0.69463841 -0.54268053]
 [ 1.14276934  0.25604203  1.84053826 -0.54268053]
 [-1.24020657 -0.72454447 -0.69463841 -1.47833662]
 [ 1.14276934  1.48177515  0.57294993 -0.54268053]
 [-1.24020657  0.9914819   0.57294993  1.32863165]
 [-1.24020657 -0.72454447 -0.69463841  1.32863165]
 [-1.24020657 -0.23425122 -0.69463841 -1.47833662]
 [ 1.14276934  0.25604203  0.57294993 -0.54268053]
 [-1.24020657 -0.23425122  1.84053826 -0.54268053]
 [ 1.14276934  0.25604203  1.20

# Training the logisctic regression model on the training set

In [33]:
#build logit model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

# Predicting a new result

In [35]:
#predict = 1/0 , predict_log_proba = in logarithm, predict_proba = in probability
#predict expect 2 dimentional arrays, row and column
#pake feature scalling supaya sesuai
print(classifier.predict(sc.transform([[1,18,1,4]])))

[1]


# Predicting the test set results

In [42]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.values.reshape(len(y_test),1)),1))

[[1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]

# Confusion Matrix

In [43]:
#show how many correct or incorrect prediction

from sklearn.metrics import confusion_matrix, accuracy_score

#real result y_true, but since we want to distingush of real results in the training set
#we called it y_true to true, y_train as training set, y_test as test set
cm = confusion_matrix(y_test, y_pred)

print(cm)
accuracy_score(y_test, y_pred)

# 32 correct prediction of class 0 => ga bisa bayar
# 157 correct prediction of class 1 => bisa bayar
# 51 incorrect prediction of class 1 => bisa bayar tapi kita mikir gak bisa
# 10 incorrect prediction of class 0 => gak bisa bayar tapi kita pikir bisa
# 75% correct prediction


[[ 32  51]
 [ 10 157]]


0.756