In [None]:
# -*- coding: utf-8 -*-


# 전처리 & 정규화

from google.colab import drive
drive.mount('/content/drive')

!pip install kaggle --upgrade
!pip install -U imbalanced-learn

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d chitwanmanchanda/fraudulent-transactions-data

!unzip fraudulent-transactions-data.zip

import numpy as np
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

df=pd.read_csv('/content/drive/MyDrive/ml/data/Fraud.csv')

df.head()

df.info()

df.isnull().sum()

df['type'].nunique()

df['nameOrig'].nunique()

df['nameDest'].nunique()

df['isFraud'].value_counts()

df.describe()

df.groupby(['isFraud', 'type']).size().unstack(fill_value=0)

# one-hot encoder
types = pd.get_dummies(df['type'])
types.head()

# label encoder
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
df['nameOrig'] = label.fit_transform(df['nameOrig'])
df['nameDest'] = label.fit_transform(df['nameDest'])

df = pd.concat([df, types], axis=1)
df = df.drop('type', axis=1)
df.head()

X = df.drop('isFraud', axis = 1) # feature
y = df['isFraud'] # label

X.shape, y.shape

from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=0)
X_under, y_under = rus.fit_resample(X, y)

X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(X_under, y_under, test_size=0.2, random_state=0)
X_train_under.shape, X_test_under.shape, y_train_under.shape, y_test_under.shape

from sklearn.preprocessing import StandardScaler

std_scaler_under = StandardScaler().fit(X_train_under)

X_train_under_scaled = std_scaler_under.transform(X_train_under)

X_test_under_scaled = std_scaler_under.transform(X_test_under)




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting imbalanced-learn
  Downloading imbalanced_learn-0.9.1-py3-none-any.whl (199 kB)
[K     |████████████████████████████████| 199 kB 3.8 MB/s 
[?25h  Downloading imbalanced_learn-0.9.0-py3-none-any.whl (199 kB)
[K     |████████████████████████████████| 199 kB 8.9 MB/s 
Installing collected packages: imbalanced-learn
  Attempting uninstall: imbalanced-learn
    Found existing installation: imbalanced-learn 0.8.1
    Uninstalling imbalanced-learn-0.8.1:
      Successfully uninstalled imbalanced-learn-0.8.1
Successfully installed imbalanced-learn-0.9.0
cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': 

In [None]:
X_train_under_scaled.shape

(13140, 14)

In [None]:
"""# KNN"""

from sklearn.neighbors import KNeighborsClassifier

knn_clf = KNeighborsClassifier(n_neighbors=5).fit(X_train_under_scaled, y_train_under)

knn_y_pred = knn_clf.predict(X_test_under_scaled)

print("KNN Result")
print("accuracy score : ", accuracy_score(y_test_under, knn_y_pred))
print("precision score", precision_score(y_test_under, knn_y_pred))
print("recall score : ", recall_score(y_test_under, knn_y_pred))
print("f1 score : ", f1_score(y_test_under, knn_y_pred))

"""# Logistic Regression"""

from sklearn.linear_model import LogisticRegression

log_clf = LogisticRegression(solver='liblinear', random_state=0).fit(X_train_under_scaled, y_train_under)
log_y_pred = log_clf.predict(X_test_under_scaled)

print("Logistic Regression Result")
print("accuracy score : ", accuracy_score(y_test_under, log_y_pred))
print("precision score",  precision_score(y_test_under, log_y_pred))
print("recall score : ",  recall_score(y_test_under, log_y_pred))
print("f1 score : ",  f1_score(y_test_under, log_y_pred))

"""# SVM"""

from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline

svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1))
])
    
svm_clf.fit(X_train_under, y_train_under)

svm_y_pred = svm_clf.predict(X_test_under)

print("SVM Result")
print("accuracy score : ", accuracy_score(y_test_under, svm_y_pred))
print("precision score",  precision_score(y_test_under, svm_y_pred))
print("recall score : ",  recall_score(y_test_under, svm_y_pred))
print("f1 score : ",  f1_score(y_test_under, svm_y_pred))

"""# Decision Tree"""

from sklearn.tree import DecisionTreeClassifier

tree_clf = DecisionTreeClassifier(max_depth=4, max_leaf_nodes=10, random_state=0).fit(X_train_under_scaled, y_train_under)

tree_y_pred = tree_clf.predict(X_test_under)


print("accuracy score : ", accuracy_score(y_test_under, tree_y_pred))
print("precision score",  precision_score(y_test_under, tree_y_pred))
print("recall score : ",  recall_score(y_test_under, tree_y_pred))
print("f1 score : ", f1_score(y_test_under, tree_y_pred))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

std_scaler = StandardScaler().fit(X_train)

X_train_scaled = std_scaler.transform(X_train)

X_test_scaled = std_scaler.transform(X_test)

dtree_clf = DecisionTreeClassifier(max_depth=4, max_leaf_nodes=10, random_state=0).fit(X_train, y_train)

dtree_y_pred = dtree_clf.predict(X_test)

print("Decision Tree Result")
print("accuracy score : ", accuracy_score(y_test, dtree_y_pred))
print("precision score",  precision_score(y_test, dtree_y_pred))
print("recall score : ",  recall_score(y_test, dtree_y_pred))
print("f1 score : ", f1_score(y_test, dtree_y_pred))

"""# Random Forest"""

from sklearn.ensemble import RandomForestClassifier

rand_clf = RandomForestClassifier(n_estimators=100, random_state=0).fit(X_train_under_scaled, y_train_under)

rand_y_pred = rand_clf.predict(X_test_under)


print("accuracy score : ", accuracy_score(y_test_under, rand_y_pred))
print("precision score",  precision_score(y_test_under, rand_y_pred))
print("recall score : ",  recall_score(y_test_under, rand_y_pred))
print("f1 score : ", f1_score(y_test_under, rand_y_pred))

randf_clf = RandomForestClassifier(n_estimators=15, random_state=0).fit(X_train, y_train)

randf_y_pred = randf_clf.predict(X_test)

print("Random Forest Result")
print("accuracy score : ", accuracy_score(y_test, randf_y_pred))
print("precision score",  precision_score(y_test, randf_y_pred))
print("recall score : ",  recall_score(y_test, randf_y_pred))
print("f1 score : ", f1_score(y_test, randf_y_pred))

In [None]:
"""#ada boosting result"""
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import precision_score, recall_score, f1_score

dt = DecisionTreeClassifier(max_depth=1,   random_state=0)
#tree_clf = DecisionTreeClassifier(max_depth=4, max_leaf_nodes=10, random_state=0)
ada = AdaBoostClassifier(base_estimator= dt, n_estimators= 50, learning_rate = 0.5)

# Fit ada to the training set
ada.fit(X_train, y_train)

# Compute the probabilities of obtaining the positive class
y_pred = ada.predict(X_test)

print("Ada Boosting Result")
print("accuracy score : ", accuracy_score(y_test, y_pred))
print("precision score",  precision_score(y_test, y_pred))
print("recall score : ",  recall_score(y_test, y_pred))
print("f1 score : ", f1_score(y_test, y_pred))



Ada Boosting Result
accuracy score :  0.9993438237707108
precision score 0.9653579676674365
recall score :  0.5094454600853139
f1 score :  0.6669325887514959


In [None]:
"""#Gradient boosting result"""
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

# Instantiate gb
#dt = DecisionTreeClassifier(max_depth=1,   random_state=0)
#dt.fit(X_train, y_train)

gb = GradientBoostingClassifier(max_depth = 3, n_estimators=100, random_state=0, learning_rate = 0.5)

# Fit gb to the training set
gb.fit(X_train, y_train)

# Predict test set labels
y_pred = gb.predict(X_test)

print("Gradient Boosting Result")
print("accuracy score : ", accuracy_score(y_test, y_pred))
print("precision score",  precision_score(y_test, y_pred))
print("recall score : ",  recall_score(y_test, y_pred))
print("f1 score : ", f1_score(y_test, y_pred))

Gradient Boosting Result
accuracy score :  0.9992542380340175
precision score 0.8333333333333334
recall score :  0.5271176112126752
f1 score :  0.6457633445315416


In [None]:
"""#XGBoost result"""
import xgboost
from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators = 100, learning_rate = 0.08, gamma = 0, subsample = 0.75, max_depth = 7, random_state = 0)

# Train the model, this will take a few minutes to run
#bst = xgb.XGBRegressor(n_estimators = 100, learning_rate = 0.08, gamma = 0, subsample = 0.75, max_depth = 7, random_state = 0)

xgb.fit(X_train, y_train)
# Get predictions on the test set and print the accuracy score
y_pred = xgb.predict(X_test)

print("XGBoost Result")
print("accuracy score : ", accuracy_score(y_test, y_pred))
print("precision score",  precision_score(y_test, y_pred))
print("recall score : ",  recall_score(y_test, y_pred))
print("f1 score : ", f1_score(y_test, y_pred))

XGBoost Result
accuracy score :  0.9996880216011643
precision score 0.9712121212121212
recall score :  0.7812309567336989
f1 score :  0.8659236744343127


In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
import numpy as np


model = keras.models.Sequential(
    keras.layers.SimpleRNN()
)

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
history = model.fit(epochs=10, validation_data=())
score = model.evaluate()
X_new = 
y_pred = model.predic(X_new)