<a href="https://colab.research.google.com/github/susannelobo/Alexis/blob/main/AI-basedThreatAnalyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade pip
!pip install numpy==1.26.4 pandas==2.2.3 matplotlib==3.9.2 plotly==5.24.1 scikit-learn==1.5.2
!pip install streamlit==1.25.0 pyngrok==7.0.0 pdfplumber==0.11.4 cryptography==43.0.1 python-dotenv==1.0.1 sentence-transformers==3.1.1 transformers==4.45.2

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting pandas==2.2.3
  Downloading pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting matplotlib==3.9.2
  Downloading matplotlib-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scikit-learn==1.5.2
  Downloading scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading num

Collecting streamlit==1.25.0
  Downloading streamlit-1.25.0-py2.py3-none-any.whl.metadata (8.1 kB)
Collecting pyngrok==7.0.0
  Downloading pyngrok-7.0.0.tar.gz (718 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.7/718.7 kB[0m [31m12.7 MB/s[0m  [33m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pdfplumber==0.11.4
  Downloading pdfplumber-0.11.4-py3-none-any.whl.metadata (41 kB)
Collecting cryptography==43.0.1
  Downloading cryptography-43.0.1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (5.4 kB)
Collecting python-dotenv==1.0.1
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting sentence-transformers==3.1.1
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting transformers==4.45.2
  Downloading transformers-4.45.2-py3-none-any.whl.metadata (44 kB)
Collecting importlib-metadata<7,>=1.4 (from streamlit==1.25.0)
  Downloading importlib_metadata-6.11.0-py3-none-any.whl.

In [None]:
%%writefile threat_analyzer.py
import pdfplumber
import hashlib
import pandas as pd
import numpy as np
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import torch
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.backends import default_backend
import time, json

# Load NLP Models
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment = pipeline("sentiment-analysis")
ner = pipeline("ner", grouped_entities=True)
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# -------- PDF EXTRACT --------
def extract_text_from_pdf(file):
    text = ""
    with pdfplumber.open(file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text.strip()

# -------- SUMMARIZATION --------
def summarize_text(text, max_len=200):
    chunks = [text[i:i+1024] for i in range(0, len(text), 1024)]
    summaries = [summarizer(c, max_length=max_len, min_length=50, do_sample=False)[0]['summary_text'] for c in chunks]
    return " ".join(summaries)

# -------- SENTIMENT --------
def sentiment_score(text):
    result = sentiment(text[:512])[0]
    return result["label"], result["score"]

# -------- NER --------
def extract_entities(text):
    entities = ner(text[:1000])
    df = pd.DataFrame(entities)
    return df

# -------- CLUSTERING --------
def cluster_reports(texts, n_clusters=3):
    embeddings = embedder.encode(texts)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    kmeans.fit(embeddings)
    return kmeans.labels_

# -------- BLOCKCHAIN LOGGING --------
def generate_block(data):
    data_json = json.dumps(data, sort_keys=True).encode()
    block_hash = hashlib.sha256(data_json).hexdigest()
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    return {"timestamp": timestamp, "hash": block_hash, "data": data}

def verify_blockchain(blockchain):
    for i in range(1, len(blockchain)):
        prev_hash = blockchain[i-1]["hash"]
        current_data = blockchain[i]["data"]
        expected_hash = hashlib.sha256(json.dumps(current_data, sort_keys=True).encode()).hexdigest()
        if blockchain[i]["hash"] != expected_hash:
            return False
    return True

Writing threat_analyzer.py


In [None]:
%%writefile app.py
import streamlit as st
import os
from threat_analyzer import extract_text_from_pdf, summarize_text, sentiment_score, extract_entities, cluster_reports, generate_block, verify_blockchain
import pandas as pd
import plotly.express as px

# --- App Config ---
st.set_page_config(page_title="Threat Intelligence Analyzer", layout="wide")

# --- Password Gate ---
PASSWORD = "quantumshield"  # default password for Colab
entered_pw = st.text_input("🔐 Enter Access Password:", type="password")
if entered_pw != PASSWORD:
    st.warning("Enter correct password to access.")
    st.stop()

st.title("🧠 Threat Intelligence Report Analyzer")
st.caption("Hybrid NLP + ML + Blockchain-based Threat Report Analysis")

uploaded_files = st.file_uploader("📄 Upload Threat Reports (PDFs)", type=["pdf"], accept_multiple_files=True)

if uploaded_files:
    summaries, sentiments, entities_list, blockchain = [], [], [], []

    for file in uploaded_files:
        with st.spinner(f"Processing {file.name}..."):
            text = extract_text_from_pdf(file)
            summary = summarize_text(text)
            sentiment_label, sentiment_val = sentiment_score(text)
            entities = extract_entities(text)
            block = generate_block({"filename": file.name, "summary": summary, "sentiment": sentiment_label})

            summaries.append(summary)
            sentiments.append((sentiment_label, sentiment_val))
            entities_list.append(entities)
            blockchain.append(block)

    # --- Dashboard Summary ---
    st.subheader("📊 Dashboard Summary")
    df_summary = pd.DataFrame({
        "Filename": [f.name for f in uploaded_files],
        "Sentiment": [s[0] for s in sentiments],
        "Confidence": [s[1] for s in sentiments],
        "Summary": summaries
    })
    st.dataframe(df_summary, use_container_width=True)

    # Sentiment Plot
    fig = px.bar(df_summary, x="Filename", y="Confidence", color="Sentiment", title="Sentiment Confidence per Report")
    st.plotly_chart(fig, use_container_width=True)

    # Cluster Reports
    st.subheader("🧩 Clustering Reports")
    labels = cluster_reports(summaries)
    df_summary["Cluster"] = labels
    st.dataframe(df_summary[["Filename", "Cluster", "Sentiment"]])

    # Blockchain Integrity
    st.subheader("🔗 Blockchain Log")
    st.json(blockchain)
    if verify_blockchain(blockchain):
        st.success("✅ Blockchain integrity verified — data is secure.")
    else:
        st.error("❌ Blockchain integrity compromised.")

Writing app.py


In [None]:
!ngrok authtoken 341YcNuihSmtvKvv42xXjYHaUJI_6TwMASZyweYG5TSdqipcY

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
from pyngrok import ngrok
import threading, os, time

def run_app():
    os.system("streamlit run app.py --server.port 8501 --server.enableCORS=false")

thread = threading.Thread(target=run_app)
thread.start()
time.sleep(5)

public_url = ngrok.connect(8501, bind_tls=True)
print("🚀 Your Streamlit app is live at:", public_url)



🚀 Your Streamlit app is live at: NgrokTunnel: "https://d94cdcc67540.ngrok-free.app" -> "http://localhost:8501"
