### IMPORTING LIBRARIES

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
import warnings

In [3]:
warnings.filterwarnings('ignore')

### READ CSV FILE

In [None]:
#read csv files
df=pd.read_csv('onlinefraud.csv')
df.head()

In [None]:
df.shape

In [None]:
 df.drop(['isFlaggedFraud'], axis=1, inplace=True)

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
# no null values
df.isnull().sum()

In [None]:
# Select only numeric columns before calculating correlation
numeric_df = df.select_dtypes(include=['number'])
correlation = numeric_df.corr()
print(correlation)

## DATA VISUALIZATION

In [None]:
sns.countplot(x='type',data=df)

In [None]:
sns.barplot(x='type', y='amount', data=df)

In [None]:
df["isFraud"].value_counts()

In [None]:
df.loc[df["isFraud"]==1,"isFraud"]= "Fraud"
df.loc[df["isFraud"]==0,"isFraud"]= "Not Fraud"
df.head()

In [None]:
sns.scatterplot(x='isFraud',y='newbalanceOrig',data=df)

In [None]:
sns.scatterplot(x='amount',y='oldbalanceOrg',data=df)

In [None]:
sns.heatmap(correlation,annot=True)

### DATA PREPROCESSING

In [None]:
df.columns

In [None]:
df.drop(['nameDest','nameOrig'],axis=1,inplace=True)

In [None]:
df.columns

In [None]:
df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['type'] = label_encoder.fit_transform(df['type'])
df.head()

In [None]:
df["type"].value_counts()

In [None]:
#dividing the dataset into dependent and independent y and x respectively
x=df.drop(["isFraud"],axis=1)
y=df["isFraud"]

In [None]:
x.head()

In [None]:
y.head()

### TRAIN TEST SPLIT

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
#for model building
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import pickle

### MODEL BUILDING

#### 1. LOGISTIC REGRESSION

In [None]:
lr=LogisticRegression()
lr.fit(x_train,y_train)

In [None]:
lr.score(x_test,y_test)

#### 2. Decision Tree Classifier

In [None]:
dt=DecisionTreeClassifier()
dt.fit(x_train,y_train)

In [None]:
dt.score(x_test,y_test)

#### 3. Random Forest Classifier

In [None]:
rfc=RandomForestClassifier()
rfc.fit(x_train,y_train)

In [None]:
rfc.score(x_test,y_test)

### MODEL ACCURACY

In [None]:
print("Logistic Regression :",lr.score(x_test,y_test) *100)
print("Decision Tree Classifier :",dt.score(x_test,y_test) *100)
print("Random Forest Classifier :",rfc.score(x_test,y_test) *100)

In [None]:
dt.predict(x_test)

### PREDICTION

In [None]:
features=np.array([[696,1,85002.52,85002.52,0,169,50]])
dt.predict(features)

### SAVING THE MODEL

In [None]:
pickle.dump(rfc,open('model.sav','wb'))

In [None]:
# loading the saved model
loaded_model = pickle.load(open('model.sav', 'rb'))