<a href="https://colab.research.google.com/github/kdmwangi/CODSOFT/blob/main/Fraud_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Credit Card Fraud Detection

<li>Build a machine learning model to identify fraudulent credit card
transactions.
<li>Preprocess and normalize the transaction data, handle class
imbalance issues, and split the dataset into training and testing sets.
<li>Train a classification algorithm, such as logistic regression or random
forests, to classify transactions as fraudulent or genuine.
Evaluate the model's performance using metrics like precision, recall,
and F1-score, and consider techniques like oversampling or
undersampling for improving results.


# Import Statements

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Read Data

In [2]:
dataset = pd.read_csv('creditcard.csv')


# Data Exploration

In [3]:
dataset.shape

(7973, 31)

In [4]:
dataset.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.0
1,0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0.0
2,1,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0.0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0.0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0.0


In [5]:
dataset.tail()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
7968,10980,1.284388,-0.013181,0.646174,0.198985,-0.568675,-0.526121,-0.448235,-0.167709,1.773223,...,-0.101868,-0.030298,-0.081412,-0.123281,0.278808,1.064001,-0.090181,0.000481,15.95,0.0
7969,10981,1.190428,-0.122329,0.954945,0.267101,-0.971026,-0.652279,-0.612992,-0.003909,1.633117,...,-0.015001,0.127027,0.012079,0.534409,0.112179,1.004483,-0.100188,-0.004774,14.95,0.0
7970,10981,-0.725175,0.298202,1.824761,-2.58717,0.283605,-0.016617,0.153659,0.045084,-0.197611,...,-0.017097,-0.070535,-0.442861,-0.895837,0.624743,-0.510601,-0.031142,0.025564,12.95,0.0
7971,10981,1.226153,-0.129645,0.735197,0.142752,-0.703245,-0.349641,-0.612641,0.020507,1.648986,...,-0.047936,0.040196,-0.057391,-0.012386,0.187685,1.037786,-0.100081,-0.009869,15.95,0.0
7972,10981,1.145381,-0.059349,0.968088,0.267891,-0.822582,-0.597727,-0.450197,-0.119747,1.338188,...,,,,,,,,,,


In [6]:
dataset.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

In [7]:
dataset.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,7973.0,7973.0,7973.0,7973.0,7973.0,7973.0,7973.0,7973.0,7973.0,7973.0,...,7972.0,7972.0,7972.0,7972.0,7972.0,7972.0,7972.0,7972.0,7972.0,7972.0
mean,4257.151261,-0.29974,0.295226,0.899355,0.215736,-0.025285,0.157286,-0.026445,-0.070525,0.655244,...,-0.053715,-0.165799,-0.035174,0.025977,0.088893,0.020256,0.01615,0.001161,65.41354,0.003136
std,3198.964299,1.498341,1.283914,1.090297,1.447057,1.167218,1.325015,1.063709,1.332568,1.156618,...,0.953498,0.654858,0.488322,0.60176,0.427505,0.517409,0.40357,0.275976,194.911169,0.055915
min,0.0,-23.066842,-25.640527,-12.389545,-4.657545,-32.092129,-7.574798,-12.96867,-23.632502,-3.878658,...,-11.468435,-8.527145,-15.14434,-2.512377,-2.577363,-1.338556,-7.9761,-3.054085,0.0,0.0
25%,1531.0,-1.046362,-0.237359,0.372435,-0.687521,-0.630525,-0.655399,-0.517733,-0.199794,-0.085635,...,-0.271837,-0.581473,-0.182989,-0.340419,-0.161009,-0.36318,-0.063198,-0.019081,4.6175,0.0
50%,3635.0,-0.416341,0.335446,0.948695,0.223379,-0.107337,-0.148669,0.004732,0.016128,0.61317,...,-0.130344,-0.167048,-0.046107,0.089606,0.115418,-0.01526,0.007101,0.018443,15.95,0.0
75%,6662.0,1.122758,0.950582,1.597949,1.131542,0.405082,0.5552,0.527353,0.307111,1.294087,...,0.044823,0.250886,0.086806,0.421015,0.361249,0.329322,0.1447,0.080563,54.91,0.0
max,10981.0,1.685314,8.26175,4.101716,7.380245,11.974269,21.393069,34.303177,3.877662,10.392889,...,22.588989,4.534454,13.876221,3.200201,5.525093,3.517346,4.173387,4.860769,7712.43,1.0


In [8]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7973 entries, 0 to 7972
Data columns (total 31 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    7973 non-null   int64  
 1   V1      7973 non-null   float64
 2   V2      7973 non-null   float64
 3   V3      7973 non-null   float64
 4   V4      7973 non-null   float64
 5   V5      7973 non-null   float64
 6   V6      7973 non-null   float64
 7   V7      7973 non-null   float64
 8   V8      7973 non-null   float64
 9   V9      7973 non-null   float64
 10  V10     7973 non-null   float64
 11  V11     7973 non-null   float64
 12  V12     7973 non-null   float64
 13  V13     7973 non-null   float64
 14  V14     7973 non-null   float64
 15  V15     7972 non-null   float64
 16  V16     7972 non-null   float64
 17  V17     7972 non-null   float64
 18  V18     7972 non-null   float64
 19  V19     7972 non-null   float64
 20  V20     7972 non-null   float64
 21  V21     7972 non-null   float64
 22  

In [9]:
dataset.dtypes

Time        int64
V1        float64
V2        float64
V3        float64
V4        float64
V5        float64
V6        float64
V7        float64
V8        float64
V9        float64
V10       float64
V11       float64
V12       float64
V13       float64
V14       float64
V15       float64
V16       float64
V17       float64
V18       float64
V19       float64
V20       float64
V21       float64
V22       float64
V23       float64
V24       float64
V25       float64
V26       float64
V27       float64
V28       float64
Amount    float64
Class     float64
dtype: object

In [12]:
# highest amount
print(f"{dataset['Amount'].max()} is the highest amount")
# least amount
print(f"{dataset['Amount'].min()} is the least amount")

7712.43 is the highest amount
0.0 is the least amount


In [14]:
dataset[-20:]

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
7953,10975,-0.195295,0.521268,1.434256,-0.652101,0.106763,0.258666,0.009089,0.167264,1.175229,...,-0.033815,0.041159,-0.26926,-0.849304,-0.136201,1.050313,-0.078328,-0.014294,14.95,0.0
7954,10976,1.189153,-0.060635,0.972446,0.25921,-0.89208,-0.630609,-0.550899,-0.055052,1.497383,...,-0.004098,0.199852,0.000447,0.558983,0.142117,1.001375,-0.094522,-0.002677,15.95,0.0
7955,10977,1.285888,-0.054289,0.413775,-0.066309,-0.176334,0.159908,-0.539002,-0.002637,1.502053,...,-0.087286,-0.003036,-0.176694,-0.810982,0.344627,1.085001,-0.092363,-0.015434,15.95,0.0
7956,10977,1.226333,-0.099813,0.742618,0.137696,-0.663756,-0.334571,-0.585634,-0.001422,1.585999,...,-0.043009,0.073736,-0.061124,0.017182,0.201393,1.035493,-0.097472,-0.008807,15.95,0.0
7957,10977,-1.706795,1.525315,0.857864,-0.036106,-1.210956,-0.222397,-1.025342,1.271377,0.422887,...,0.202229,0.285122,-0.122758,0.029863,-0.221374,1.09367,-0.637358,-0.130442,14.95,0.0
7958,10978,1.314172,-0.393035,0.401164,-0.478993,-0.839719,-0.440242,-0.910447,-0.07773,0.399755,...,0.164113,0.597952,-0.18149,-0.082715,0.470708,-0.124088,0.001663,0.02787,39.0,0.0
7959,10978,1.203169,-0.114123,0.872416,0.217274,-0.850822,-0.528506,-0.600233,-0.004233,1.611785,...,-0.025252,0.107685,-0.017058,0.334611,0.147187,1.016364,-0.099122,-0.006187,15.95,0.0
7960,10978,1.285853,-0.084448,0.401697,-0.06464,-0.207862,0.1585,-0.568156,0.022719,1.564522,...,-0.092221,-0.038169,-0.171197,-0.809587,0.331126,1.08582,-0.095161,-0.016203,15.95,0.0
7961,10979,1.270792,-0.466152,0.365816,-0.525627,-0.683161,0.022055,-1.011399,0.080285,0.447692,...,0.187797,0.68436,-0.147915,-0.403501,0.386381,-0.093228,0.015917,0.023723,39.0,0.0
7962,10979,-2.058928,0.527799,1.576619,-0.119223,0.362195,-0.398901,0.087637,-0.476905,2.126294,...,-0.431854,0.010496,-0.160385,0.244118,-0.784769,0.86854,-0.157976,0.191359,25.95,0.0


# Data Cleaning

In [None]:
dataset.