<a href="https://colab.research.google.com/github/rohit-7620/AI-based-Innovation-and-Product-Development/blob/main/credit_card_fraud_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:


# Step 2: Import libraries
from datasets import load_dataset
import pandas as pd
import time

# Step 3: Load real online dataset from Hugging Face
dataset = load_dataset("jyunyilin/credit-card-fraud-detection")

# Convert to pandas DataFrame
df = dataset["train"].to_pandas()
print("Dataset loaded successfully!")
print("Total transactions:", len(df))
print("Fraudulent transactions:", df["Class"].sum())
print(df.head())




Dataset loaded successfully!
Total transactions: 284807
Fraudulent transactions: 492
   Time        V1        V2        V3        V4        V5        V6        V7  \
0     0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1     0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2     1 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3     1 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4     2 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -

In [6]:
# Step 4: Define fraud detection rule
def is_fraud(tx):
    """
    Mark transaction as fraud if:
    - Amount > 1000 OR
    - Class == 1 (fraud label in dataset)
    """
    return tx["Amount"] > 1000 or tx["Class"] == 1

# Step 5: Prepare list to store detected frauds
fraud_list = []

# Step 6: Simulate real-time streaming
print("\nStarting real-time transaction streaming...\n")
for index, row in df.iterrows():
    transaction = {
        "TransactionID": index,
        "Time": row["Time"],
        "Amount": row["Amount"],
        "Class": row["Class"]
    }

    if is_fraud(transaction):
        print(f"⚠️ Fraudulent Transaction Detected: {transaction}")
        fraud_list.append(transaction)
    else:
        print(f"Transaction OK: {transaction}")

    time.sleep(0.01)  # simulate streaming delay (10ms per transaction)

# Step 7: Save detected frauds to CSV
fraud_df = pd.DataFrame(fraud_list)
fraud_df.to_csv("detected_frauds.csv", index=False)
print("\nAll detected frauds saved to 'detected_frauds.csv'.")

# Step 8: Summary
print("\nSummary:")
print("Total transactions processed:", len(df))
print("Total frauds detected:", len(fraud_df))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Transaction OK: {'TransactionID': 125215, 'Time': np.float64(77571.0), 'Amount': np.float64(129.0), 'Class': np.float64(0.0)}
Transaction OK: {'TransactionID': 125216, 'Time': np.float64(77573.0), 'Amount': np.float64(58.99), 'Class': np.float64(0.0)}
Transaction OK: {'TransactionID': 125217, 'Time': np.float64(77574.0), 'Amount': np.float64(14.0), 'Class': np.float64(0.0)}
Transaction OK: {'TransactionID': 125218, 'Time': np.float64(77574.0), 'Amount': np.float64(6.27), 'Class': np.float64(0.0)}
Transaction OK: {'TransactionID': 125219, 'Time': np.float64(77574.0), 'Amount': np.float64(7.61), 'Class': np.float64(0.0)}
Transaction OK: {'TransactionID': 125220, 'Time': np.float64(77575.0), 'Amount': np.float64(117.61), 'Class': np.float64(0.0)}
Transaction OK: {'TransactionID': 125221, 'Time': np.float64(77576.0), 'Amount': np.float64(14.37), 'Class': np.float64(0.0)}
Transaction OK: {'TransactionID': 125222, 'Time': np.fl

KeyboardInterrupt: 