In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("/content/fraud_data.csv")
df.head(10)

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df.dropna(inplace = True)

In [None]:
df.columns

In [None]:
#outlier detection
for i in df.columns:
  if((df[i].dtype == 'int64') or (df[i].dtype == 'float64')):
    sns.boxplot(df[i])
    plt.xlabel(i)
    plt.ylabel('count')
    plt.show()

In [None]:
df.columns

In [None]:
df = df.drop(columns = ["trans_date_trans_time", "trans_num","dob", "city"])
df.head(10)

In [None]:
df.info()

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
x = df.drop("is_fraud", axis =1)
y = df['is_fraud']

In [None]:
cat_cols = ['merchant',"category","state","job"]
num_cols = ['amt',"lat","long","city_pop","merch_lat","merch_long"]

In [None]:
x_encoded = pd.get_dummies(x, columns = cat_cols , drop_first = True)

In [None]:
scaler = StandardScaler()
x_encoded[num_cols]= scaler.fit_transform(x_encoded[num_cols])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_encoded, y, test_size=0.3, random_state =42)


In [None]:
#Train Logistic Regression model
logistic_model = LogisticRegression()
logistic_model.fit(x_train, y_train)

In [None]:
y_pred = logistic_model.predict(x_test)
log_accuracy =accuracy_score(y_test, y_pred)
log_con_matrix = confusion_matrix(y_test, y_pred)
log_re = classification_report(y_test, y_pred)

In [None]:
print(f"Logistic REgression Accuracy:\n{log_accuracy}")
print(f"Logistic Regression Confusion Matrix:\n{log_con_matrix}")
print(f"Logistic Regression Classification Report:\n{log_re}")

In [None]:
#train decision tree model
decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(x_train, y_train)

In [None]:
y_pred_dt = decision_tree_model.predict(x_test)
dt_accuracy = accuracy_score(y_test, y_pred_dt)
dt_con_matrix = confusion_matrix(y_test, y_pred_dt)
dt_re = classification_report(y_test, y_pred_dt)

In [None]:
print(f"Decision Tree Accuracy:\n {dt_accuracy}")
print(f"Decision Tree Confusion Matrix:\n {dt_con_matrix}")
print(f"Decision Tree Classification Report:\n {dt_re}")


In [None]:
# save the model

import joblib
joblib.dump(logistic_model, 'logistic_model.joblib')
joblib.dump(decision_tree_model, 'decision_tree_model.joblib')


In [None]:
import pickle

with open('logistic_model.pkl', 'wb') as file:
    pickle.dump(logistic_model, file)

with open('decision_tree_model.pkl', 'wb') as file:
    pickle.dump(decision_tree_model, file)

In [None]:
#load the model

loaded_logre_model = joblib.load('logistic_model.joblib')
loaded_dt_model = joblib.load('decision_tree_model.joblib')

# use the model

predictions_Logre = loaded_logre_model.predict(x_test)
predictions_dt = loaded_dt_model.predict(x_test)

In [None]:
#using pickle:

with open('logistic_model.pkl', 'rb') as file:
    loaded_logre_model = pickle.load(file)

with open('decision_tree_model.pkl', 'rb') as file:
    loaded_dt_model = pickle.load(file)


In [None]:
#predictions:
predictions_Logre = loaded_logre_model.predict(x_test)
predictions_dt = loaded_dt_model.predict(x_test)

In [None]:
#deploying the model using flask API:

from flask import Flask, request, jsonify
import joblib

app = Flask(__name__)

#load the model

model_logre = joblib.load('logistic_model.joblib')
model_dt = joblib.load('decision_tree_model.joblib')

@app.route('/predict', methods = ['POST'])
def predict():
  data = request.json
  prediction_lr = model_logre.predict(data)

  return jsonify({'prediction':prediction_lr.tolist()})


if __name__ == '__main__':
  app.run(debug =True)