<a href="https://colab.research.google.com/github/pavi116tra/CodSoft/blob/main/Creditcard_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
use_cols = [
    "category",
    "amt",
    "gender",
    "city_pop",
    "job",
    "is_fraud"
]

In [3]:
train_df = pd.read_csv("fraudTrain.csv",usecols=use_cols)
test_df = pd.read_csv("fraudTest.csv",usecols=use_cols)

In [4]:
train_df.head()

Unnamed: 0,category,amt,gender,city_pop,job,is_fraud
0,misc_net,4.97,F,3495.0,"Psychologist, counselling",0.0
1,grocery_pos,107.23,F,149.0,Special educational needs teacher,0.0
2,entertainment,220.11,M,4154.0,Nature conservation officer,0.0
3,gas_transport,45.0,M,1939.0,Patent attorney,0.0
4,misc_pos,41.96,M,99.0,Dance movement psychotherapist,0.0


In [5]:
test_df.head()

Unnamed: 0,category,amt,gender,city_pop,job,is_fraud
0,personal_care,2.86,M,333497.0,Mechanical engineer,0.0
1,personal_care,29.84,F,302.0,"Sales professional, IT",0.0
2,health_fitness,41.28,F,34496.0,"Librarian, public",0.0
3,misc_pos,60.05,M,54767.0,Set designer,0.0
4,travel,3.19,M,1126.0,Furniture designer,0.0


In [6]:
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)

categorical_cols = ["category", "gender", "job"]

train_df[categorical_cols] = encoder.fit_transform(train_df[categorical_cols])
test_df[categorical_cols] = encoder.transform(test_df[categorical_cols])


In [7]:
train_df = train_df.dropna(subset=["is_fraud"])
test_df = test_df.dropna(subset=["is_fraud"])

In [8]:
X_train = train_df.drop("is_fraud", axis=1)
y_train = train_df["is_fraud"]

X_test = test_df.drop("is_fraud", axis=1)
y_test = test_df["is_fraud"]


In [9]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)


In [10]:
X_train = X_train.fillna(X_train.median())
X_test = X_test.fillna(X_test.median())

In [11]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(
    max_iter=1000,
    class_weight="balanced"
)

model.fit(X_train, y_train)


In [12]:
y_pred = model.predict(X_test)


In [13]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.9425633726036524


In [14]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)


Confusion Matrix:
 [[124083   7453]
 [   133    407]]


In [15]:
from sklearn.metrics import classification_report

print("Classification Report:\n")
print(classification_report(y_test, y_pred))


Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.94      0.97    131536
           1       0.05      0.75      0.10       540

    accuracy                           0.94    132076
   macro avg       0.53      0.85      0.53    132076
weighted avg       1.00      0.94      0.97    132076



In [16]:
from sklearn.svm import LinearSVC

svm_model = LinearSVC(
    class_weight="balanced",
    max_iter=5000
)

svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)

print("Linear SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print(confusion_matrix(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))



Linear SVM Accuracy: 0.9518837638935158
[[125316   6220]
 [   135    405]]
              precision    recall  f1-score   support

           0       1.00      0.95      0.98    131536
           1       0.06      0.75      0.11       540

    accuracy                           0.95    132076
   macro avg       0.53      0.85      0.54    132076
weighted avg       1.00      0.95      0.97    132076



In [17]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(
    max_depth=10,
    class_weight="balanced",
    random_state=42
)

dt_model.fit(X_train, y_train)

y_pred_dt = dt_model.predict(X_test)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print(confusion_matrix(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))


Decision Tree Accuracy: 0.9567597443895939
[[125878   5658]
 [    53    487]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98    131536
           1       0.08      0.90      0.15       540

    accuracy                           0.96    132076
   macro avg       0.54      0.93      0.56    132076
weighted avg       1.00      0.96      0.97    132076



In [18]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

nb_model = GaussianNB()

nb_model.fit(X_train, y_train)

y_pred_nb = nb_model.predict(X_test)

print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))
print(confusion_matrix(y_test, y_pred_nb))
print(classification_report(y_test, y_pred_nb))


Naive Bayes Accuracy: 0.9881886186740967
[[130275   1261]
 [   299    241]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99    131536
           1       0.16      0.45      0.24       540

    accuracy                           0.99    132076
   macro avg       0.58      0.72      0.62    132076
weighted avg       0.99      0.99      0.99    132076



In [19]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5951497622580938

Confusion Matrix:
 [[78200 53336]
 [  135   405]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.59      0.75    131536
           1       0.01      0.75      0.01       540

    accuracy                           0.60    132076
   macro avg       0.50      0.67      0.38    132076
weighted avg       0.99      0.60      0.74    132076



In [20]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=8,)

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("\nClassification RePort:\n", classification_report(y_test, y_pred_rf))


Random Forest Accuracy: 0.9971758684393834

Confusion Matrix:
 [[131380    156]
 [   217    323]]

Classification RePort:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    131536
           1       0.67      0.60      0.63       540

    accuracy                           1.00    132076
   macro avg       0.84      0.80      0.82    132076
weighted avg       1.00      1.00      1.00    132076



In [21]:
import joblib

joblib.dump(model, "CreditCardmodel.pkl")
from google.colab import files
files.download("CreditCardmodel.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
%%writefile main.py
# put your full ML code here
print("ML Project")


Writing main.py


In [None]:
files.download("main.py")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>