In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report



In [None]:
# Adatok betöltése
data = pd.read_csv('UniversalBank.csv', delimiter=',')
data.columns = data.columns.str.replace(' ', '_')
data.columns = data.columns.str.lower()

# Adatok előkészítése
data = data.drop(columns=['id', 'zip_code'])
data['experience'] = bank['experience'].apply(lambda x: max(0, x))
X = pd.get_dummies(data.drop(columns=['personal_loan']))
y = data['personal_loan']

data.head(10)

Unnamed: 0,age,experience,income,family,ccavg,education,mortgage,personal_loan,securities_account,cd_account,online,creditcard
0,25,1,49,4,1.6,1,0,0,1,0,0,0
1,45,19,34,3,1.5,1,0,0,1,0,0,0
2,39,15,11,1,1.0,1,0,0,0,0,0,0
3,35,9,100,1,2.7,2,0,0,0,0,0,0
4,35,8,45,4,1.0,2,0,0,0,0,0,1
5,37,13,29,4,0.4,2,155,0,0,0,1,0
6,53,27,72,2,1.5,2,0,0,0,0,1,0
7,50,24,22,1,0.3,3,0,0,0,0,0,1
8,35,10,81,3,0.6,2,104,0,0,0,1,0
9,34,9,180,1,8.9,3,0,1,0,0,0,0


In [42]:
# Adatok kettéosztása train és teszt adatokra
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [43]:
# Adatok standardizálása
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [44]:
# legjobb C és gamma paraméterek keresése
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

grid_search = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train, y_train)

print(grid_search.best_params_)

{'C': 10, 'gamma': 0.1}


In [45]:
# Modell építése
# 'linear', 'poly', 'precomputed', 'rbf', 'sigmoid'
model = SVC(kernel='linear', C=grid_search.best_params_['C'], gamma=grid_search.best_params_['gamma'])
model.fit(X_train, y_train)

In [46]:
# Becslés train és teszt adaton
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [47]:
# Konfúziós mátrix létrehozása
train_cm = confusion_matrix(y_train, y_train_pred)
test_cm = confusion_matrix(y_test, y_test_pred)

In [48]:
# Konfúziós mátrix
train_report = classification_report(y_train, y_train_pred)
test_report = classification_report(y_test, y_test_pred)

In [None]:
print('Tanító Konfúziós Mátrix:\n', train_cm)
print('Teszt Konfúziós Mátrix:\n', test_cm)
print('Tanító Osztályozási jelentés:\n', train_report)
print('Teszt Osztályozási jelentés:\n', test_report)

Train Confusion Matrix:
 [[3595   30]
 [ 158  217]]
Test Confusion Matrix:
 [[889   6]
 [ 41  64]]
Train Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.97      3625
           1       0.88      0.58      0.70       375

    accuracy                           0.95      4000
   macro avg       0.92      0.79      0.84      4000
weighted avg       0.95      0.95      0.95      4000

Test Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.97       895
           1       0.91      0.61      0.73       105

    accuracy                           0.95      1000
   macro avg       0.94      0.80      0.85      1000
weighted avg       0.95      0.95      0.95      1000



In [None]:
train_cm = confusion_matrix(y_train, y_train_pred)
test_cm = confusion_matrix(y_test, y_test_pred)

print("Konfúziós mátrix Tanító adathalmazon:\n", train_cm)
print("Konfúziós mátrix Teszt adathalmazon:\n", test_cm)

Confusion matrix for train set:
 [[3595   30]
 [ 158  217]]
Confusion matrix for test set:
 [[889   6]
 [ 41  64]]


In [None]:
# Mátrix trainre
train_accuracy = accuracy_score(y_train, y_train_pred)
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)

# Mátrix tesztre
test_accuracy = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)

print('Tanítókészlet hibamutatói:')
print('Accuracy: {:.2f}'.format(train_accuracy))
print('Precision: {:.2f}'.format(train_precision))
print('Recall: {:.2f}'.format(train_recall))
print('F1 Score: {:.2f}'.format(train_f1))
print()
print('Tesztkészlet hibamutatói:')
print('Accuracy: {:.2f}'.format(test_accuracy))
print('Precision: {:.2f}'.format(test_precision))
print('Recall: {:.2f}'.format(test_recall))
print('F1 Score: {:.2f}'.format(test_f1))


Train set error metrics:
Accuracy: 0.95
Precision: 0.88
Recall: 0.58
F1 Score: 0.70

Test set error metrics:
Accuracy: 0.95
Precision: 0.91
Recall: 0.61
F1 Score: 0.73


In [None]:
# Új adat megállapítása
new_data = pd.DataFrame({
    'age': [40],
    'experience': [10],
    'income': [80],
    'family': [3],
    'ccavg': [2.5],
    'education': [3],
    'mortgage': [100],
    'securities_account': [0],
    'cd_account': [0],
    'online': [1],
    'credit_card': [0],
})

prediction = model.predict(new_data)

print(prediction)

[1]


