In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
dataset = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Dataset/card_transdata.csv')

In [None]:
dataset

In [None]:
df = dataset.copy()

In [None]:
df

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df[df.duplicated()]

In [None]:
#Count total number of classes in Data
class_counts = dataset.groupby('fraud').size()
print(class_counts)

In [None]:
corr = df.corr().abs()
mask = np.triu(np.ones_like(corr, dtype=bool))
sns.heatmap(corr, annot=True, fmt=".2f", linewidths=.5, mask=mask, robust=True)

In [None]:

cols_num = ["distance_from_home", "distance_from_last_transaction", "ratio_to_median_purchase_price","repeat_retailer","used_chip","used_pin_number","online_order","fraud"]
fig, axs = plt.subplots(8,1, figsize=(15,20), constrained_layout=True)
i = 0
for col in cols_num :
    axs[i].hist(df[col], bins=100)
    axs[i].set_title(col)
    i += 1
#plt.figure(figsize=(100,50))
plt.show()

In [None]:
df_new = df.sample(n=30000, random_state=42)
sns.pairplot(df_new,hue='fraud')
plt.show()

In [None]:

from matplotlib import pyplot
df.hist()
pyplot.show()

In [None]:
df.plot(kind='density' ,subplots=True, layout=(4,4), sharex=False)
pyplot.show()

In [None]:
cols_X = ["distance_from_home", 
          "distance_from_last_transaction", 
          "ratio_to_median_purchase_price", 
          "repeat_retailer", 
          "used_chip", 
          "used_pin_number", 
          "online_order"]
col_y = "fraud"

In [None]:
cols_X

In [None]:
col_y

In [None]:
df_traintm = df[cols_X+[col_y]].sample(frac=0.8,random_state=42)
df_test = df[cols_X+[col_y]].drop(df_traintm.index)

df_traintm.shape, df_test.shape

In [None]:
df_train = df_traintm[cols_X+[col_y]].sample(frac=0.8,random_state=42)
df_val = df_traintm[cols_X+[col_y]].drop(df_train.index)

df_train.shape, df_val.shape

In [None]:
X_train = df_train[cols_X].values
X_val = df_val[cols_X].values
X_test = df_test[cols_X].values
y_train = df_train[col_y].values
y_val = df_val[col_y].values
y_test = df_test[col_y].values

print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform (X_test)
X_val = sc.transform(X_val)
print(np.mean(X_train))
print(np.std(X_train))

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42, sampling_strategy='minority')
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

In [None]:
X_train_smote

In [None]:
y_train_smote

In [None]:
from collections import Counter
print(f"SMOTE \t\t\t: {Counter(y_train_smote)}")

In [None]:
score = dict()
score["model"] = []
score["resampling"] = []
score["recall"] = []
score["precision"] = []
score["f1"] = []
score["accuracy"] = []
score["auc"] = []

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import *

In [None]:
log_reg = LogisticRegression(random_state=42, multi_class='ovr', n_jobs=-1)
log_reg.fit(X_train_smote, y_train_smote)

y_val_pred = log_reg.predict(X_val)
y_val_pred_prob = log_reg.predict_proba(X_val)

score["model"].append("logistict Regression")
score["resampling"].append("Smote oversampling")
score["recall"].append(recall_score(y_val, y_val_pred))
score["precision"].append(precision_score(y_val, y_val_pred))
score["f1"].append(f1_score(y_val, y_val_pred))
score["accuracy"].append(accuracy_score(y_val, y_val_pred)*100)
score["auc"].append(roc_auc_score(y_val, y_val_pred_prob[:, 1]))

pd.DataFrame.from_dict(score)

In [None]:
cm = confusion_matrix(y_val, y_val_pred)
print('Confusion matrix: ')
print(cm)

In [None]:
score = log_reg.score(X_test, y_test)
print(score)

In [None]:
distance_from_home =57.877  #@param {type:"number"}
distance_from_last_transaction =0.311140  #@param {type:"number"}
ratio_to_median_purchase_price =1.9459  #@param {type:"number"}
repeat_retailer = 1.0 #@param {type:"number"}
used_chip = 1.0  #@param {type:"number"}
used_pin_number =0.0  #@param {type:"number"}
online_order=0.0  #@param {type:"number"}
output = log_reg.predict(sc.transform([[distance_from_home, 
          distance_from_last_transaction, 
          ratio_to_median_purchase_price, 
          repeat_retailer, 
          used_chip, 
          used_pin_number, 
          online_order]]))
print("Fraud = ",output)

if output==[1]:
  print("It is a Fraud")
else:
  print("It is not Fraud")

In [None]:
import pickle 
print("[INFO] Saving model...")
NSP_ML_model=pickle.dump(log_reg,open('/content/drive/My Drive/Colab Notebooks/NSP_Creditcardfrauddetection.pkl', 'wb'))

In [None]:
model = pickle.load(open('/content/drive/My Drive/Colab Notebooks/NSP_Creditcardfrauddetection.pkl','rb'))
model.predict(X_val)

In [None]:
!pip install streamlit==1.1.0

In [None]:
import pickle
pickle_in = open("/content/drive/My Drive/Colab Notebooks/NSP_Creditcardfrauddetection.pkl","rb")
model=pickle.load(pickle_in)

In [None]:
%%writefile app.py
import streamlit as st
from PIL import Image
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
st.set_option('deprecation.showfileUploaderEncoding', False)
# Load the pickled model
pickle_in = open("/content/drive/My Drive/Colab Notebooks/NSP_Creditcardfrauddetection.pkl","rb")
model=pickle.load(pickle_in)
dataset= pd.read_csv('/content/drive/My Drive/Colab Notebooks/Dataset/card_transdata.csv')
X = dataset.iloc[:, [:8]].values
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
def predict_note_authentication(distance_from_home, distance_from_last_transaction, ratio_to_median_purchase_price, repeat_retailer, used_chip, used_pin_number, online_order):
  output= model.predict(sc.transform([[distance_from_home, distance_from_last_transaction, ratio_to_median_purchase_price, repeat_retailer, used_chip, used_pin_number, online_order]]))
  print("Fraud", output)
  if output==[1]:
    prediction="It is a Fraud"
  else:
    prediction="It is not a Fraud"
  print(prediction)
  return prediction
def main():
    
    html_temp = """
   <div class="" style="background-color:blue;" >
   <div class="clearfix">           
   <div class="col-md-12">
   <center><p style="font-size:40px;color:white;margin-top:10px;">Poornima Group Of Institution</p></center> 
   <center><p style="font-size:30px;color:white;margin-top:10px;">Department of Artificial Intelligence and Data Science</p></center> 
   <center><p style="font-size:25px;color:white;margin-top:10px;">ML_Lab Project Deployment</p></center> 
   </div>
   </div>
   </div>
   """
    st.markdown(html_temp,unsafe_allow_html=True)
    st.header("Credi Card Fraud Detection")
    distance_from_home = st.number_input("Distance From Home","")
    distance_from_last_transaction = st.number_input("Distance From Last Transaction") 
    ratio_to_median_purchase_price = st.number_input("Ratio to Median Purchase Price")
    repeat_retailer = st.number_input("Repeat Retailer")
    used_chip = st.number_input("Used Chip")
    used_pin_number = st.number_input("Used Pin Number")
    online_order = st.number_input("Online Order")

    result=""
    if st.button("Predict"):
      result=predict_note_authentication(distance_from_home, distance_from_last_transaction, ratio_to_median_purchase_price, repeat_retailer, used_chip, used_pin_number, online_order)
      st.success('Model has detected {}'.format(result))
    if st.button("About"):
      st.subheader("Developed by Shakti Sarada Prasad")
      st.subheader("Student , Department of Computer Engineering")

if __name__=='__main__':
  main()
