<a href="https://colab.research.google.com/github/kirwarobert/cnn/blob/main/mpesa_transaction_pattern_analysis_and_fraud_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Goal:
Build a dashboard and ML-powered engine that:

Visualizes transaction patterns of M-Pesa users

Detects anomalies or potential fraud

Allows querying and exploration of customer transaction behavior

# Core Features:
Component	Description
📊 Dashboard	Visualize M-Pesa usage: daily volumes, top users, peak hours
🔍 Transaction Explorer	Search transactions by phone number or customer ID
⚠️ Fraud Detection	ML model flags suspicious transactions (e.g. abnormal amounts, frequency)
⏱️ Real-Time Mode	Allow simulated real-time analysis (optional add-on)

# Technologies
Python + Pandas for processing

Scikit-learn for fraud detection

Streamlit for interactive interface

PyNgrok for Colab deployment



# 1. Install dependencies:
python
Copy
Edit


In [1]:
!pip install pandas scikit-learn streamlit pyngrok matplotlib seaborn --quiet


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m73.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.6/207.6 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m109.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

# 2. Load or simulate M-Pesa data

In [2]:
import pandas as pd
import numpy as np

# Simulate dataset (or upload your own)
df = pd.DataFrame({
    "transaction_id": [f"TX{i:04d}" for i in range(1000)],
    "customer_id": np.random.choice(["CU001", "CU002", "CU003", "CU004"], 1000),
    "msisdn": np.random.choice(["254712345678", "254711223344", "254733221100"], 1000),
    "timestamp": pd.date_range("2024-04-01", periods=1000, freq="H"),
    "amount": np.random.gamma(shape=2.0, scale=1000, size=1000),
    "type": np.random.choice(["send", "withdraw", "deposit", "airtime"], 1000),
    "balance": np.random.randint(100, 10000, 1000),
    "location": np.random.choice(["Nairobi", "Kisumu", "Mombasa", "Eldoret"], 1000)
})
df.to_csv("mpesa_data.csv", index=False)


  "timestamp": pd.date_range("2024-04-01", periods=1000, freq="H"),


# 3. Create Streamlit App

In [3]:
%%writefile mpesa_dashboard.py
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv("mpesa_data.csv", parse_dates=["timestamp"])

st.set_page_config(page_title="M-Pesa Dashboard", layout="wide")
st.title("📱 M-Pesa Transaction Dashboard")

st.sidebar.header("Filter Transactions")
location = st.sidebar.selectbox("Location", options=["All"] + df["location"].unique().tolist())
if location != "All":
    df = df[df["location"] == location]

st.subheader("📈 Transaction Overview")
col1, col2 = st.columns(2)

with col1:
    st.metric("Total Transactions", len(df))
    st.metric("Total Volume (KES)", f"{df['amount'].sum():,.0f}")

with col2:
    st.bar_chart(df['type'].value_counts())

# Time Series
st.subheader("⏳ Transactions Over Time")
df["hour"] = df["timestamp"].dt.hour
hourly = df.groupby("hour")["amount"].sum()

fig, ax = plt.subplots()
hourly.plot(kind="line", ax=ax)
ax.set_title("Total Amount by Hour")
st.pyplot(fig)

# Fraud detection (simple outlier flag)
st.subheader("⚠️ Suspicious Transactions")
threshold = df["amount"].mean() + 3 * df["amount"].std()
suspicious = df[df["amount"] > threshold]
st.write(f"Flagged {len(suspicious)} potential fraud transactions:")
st.dataframe(suspicious)


Writing mpesa_dashboard.py


# 4. Run with Ngrok

In [None]:
from pyngrok import ngrok
!streamlit run mpesa_dashboard.py &

# Open public URL
public_url = ngrok.connect(8501)
print("Access dashboard:", public_url)



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.232.63.12:8501[0m
[0m
