In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
import tensorflow as tf
from sklearn.cluster import DBSCAN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import pickle
from sklearn.ensemble import BaggingClassifier, BaggingRegressor, GradientBoostingClassifier, GradientBoostingRegressor, RandomForestClassifier, RandomForestRegressor, StackingClassifier, StackingRegressor

In [4]:
data = pd.read_csv('../Data/frauds_for_5.csv')

In [5]:
data.head(5)

Unnamed: 0.1,Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
0,35,0.57782,-0.139317,2.341221,1.0,1.0,0.0,1.0,1.0
1,36,-0.299389,-0.133864,2.549327,1.0,0.0,0.0,1.0,1.0
2,48,0.272497,-0.646619,3.428512,1.0,0.0,0.0,1.0,1.0
3,101,-0.357666,-0.574263,3.430762,1.0,0.0,0.0,1.0,1.0
4,104,0.350093,-0.61414,1.967437,1.0,0.0,0.0,1.0,1.0


In [8]:
data = data.drop(['Unnamed: 0'], axis=1)

In [9]:
X = data.drop(['fraud'], axis=1)
y = data['fraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# MODEL ML1

In [20]:
modelKNN = KNeighborsClassifier(3)
modelKNN.fit(X_train, y_train) 

# Предсказания на тестовых данных
predictionsKNN = modelKNN.predict(X_test)

# Метрики качества
accuracy = accuracy_score(y_test, predictionsKNN)

print(f"Точность модели KNN: {accuracy}") 

report = classification_report(y_test, predictionsKNN)  
print(report)

Точность модели KNN: 0.9972913616398243
              precision    recall  f1-score   support

         0.0       1.00      0.99      1.00      6930
         1.0       0.99      1.00      1.00      6730

    accuracy                           1.00     13660
   macro avg       1.00      1.00      1.00     13660
weighted avg       1.00      1.00      1.00     13660



In [21]:
with open('knn.pkl', 'wb') as pickle_out:
    pickle.dump(modelKNN, pickle_out)

# MODEL ML2

In [30]:
dbscan = DBSCAN(eps=1.0, min_samples=5)
predictions = dbscan.fit_predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Точность модели DBSCAN: {accuracy}") 

Точность модели DBSCAN: 0.18521229868228403


In [32]:
with open('dbscan.pkl', 'wb') as pickle_out:
    pickle.dump(dbscan, pickle_out)

# MODEL ML3

In [35]:
gradient_boosting_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

gradient_boosting_classifier.fit(X_train, y_train)

y_pred = gradient_boosting_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Точность:", accuracy)
print(classification_report(y_test, y_pred))

Точность: 1.0
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      6930
         1.0       1.00      1.00      1.00      6730

    accuracy                           1.00     13660
   macro avg       1.00      1.00      1.00     13660
weighted avg       1.00      1.00      1.00     13660



In [36]:
with open('gradient_boosting_classifier.pkl', 'wb') as pickle_out:
    pickle.dump(gradient_boosting_classifier, pickle_out)

# MODEL ML4

In [37]:
model = DecisionTreeClassifier(max_depth=2, random_state=17, criterion = 'entropy')
model.fit(X_train, y_train) # Обучение модели

In [38]:
bagging_classifier = BaggingClassifier(model, n_estimators=10, random_state=42)

bagging_classifier.fit(X_train, y_train)

y_pred = bagging_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Точность:", accuracy)
print(classification_report(y_test, y_pred))

Точность: 0.9866764275256222
              precision    recall  f1-score   support

         0.0       1.00      0.97      0.99      6930
         1.0       0.97      1.00      0.99      6730

    accuracy                           0.99     13660
   macro avg       0.99      0.99      0.99     13660
weighted avg       0.99      0.99      0.99     13660



In [39]:
with open('bagging_classifier.pkl', 'wb') as pickle_out:
    pickle.dump(bagging_classifier, pickle_out)

# MODEL ML5

In [40]:
base_classifiers = [
    ('rf', RandomForestClassifier()),
    ('KNN', KNeighborsClassifier()),
    ('dtc',DecisionTreeClassifier())
]
meta_classifier = LogisticRegression(random_state=42)

stacking_classifier = StackingClassifier(estimators=base_classifiers, final_estimator=meta_classifier)
stacking_classifier.fit(X_train, y_train)

y_pred = stacking_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Точность:", accuracy)
print(classification_report(y_test, y_pred))

Точность: 1.0
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      6930
         1.0       1.00      1.00      1.00      6730

    accuracy                           1.00     13660
   macro avg       1.00      1.00      1.00     13660
weighted avg       1.00      1.00      1.00     13660



In [41]:
with open('stacking_classifier.pkl', 'wb') as pickle_out:
    pickle.dump(stacking_classifier, pickle_out)

# MODEL ML6

In [None]:
nr = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])
nr.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
nr.fit(X_train, y_train, epochs=10, batch_size=32)