In [None]:
# ================================
# Fake Poster Detection Pipeline with X API (90% Threshold)
# ================================

!pip install tweepy tensorflow pillow pandas

import os, json, hashlib, requests, pandas as pd
from io import BytesIO
from PIL import Image
import numpy as np
import tweepy
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from google.colab import files

# ------------------------
# Step 0: Setup
# ------------------------
print("🔑 Please provide your X (Twitter) API Bearer Token:")
BEARER_TOKEN = input("Enter Bearer Token: ").strip()

client = tweepy.Client(bearer_token=BEARER_TOKEN, wait_on_rate_limit=True)


print("🎬 Enter the movie name you want to monitor:")
MOVIE_NAME = input("Movie Name: ")

# Init Twitter API
client = tweepy.Client(bearer_token=BEARER_TOKEN)

# ------------------------
# Step 1: Upload Official Posters
# ------------------------
num_official = int(input("How many official posters do you want to upload? "))
official_posters = []
os.makedirs("official", exist_ok=True)

print(f"📤 Please upload {num_official} official poster(s)...")
uploaded = files.upload()

for fname in uploaded.keys():
    official_posters.append(fname)

# Store SHA-256 hashes in ledger
ledger = {}
for poster in official_posters:
    with open(poster, "rb") as f:
        sha = hashlib.sha256(f.read()).hexdigest()
        ledger[poster] = sha

with open("ledger.json", "w") as f:
    json.dump(ledger, f, indent=4)

print("✅ Official poster hashes stored in ledger.json")

# ------------------------
# Step 2: Scrape Candidate Posters from X
# ------------------------
print("🔎 Fetching candidate posters from X...")
query = f"{MOVIE_NAME} poster has:images -is:retweet"

tweets = client.search_recent_tweets(query=query, max_results=10,
                                     tweet_fields=["id","created_at","author_id","text"],
                                     expansions=["attachments.media_keys","author_id"],
                                     media_fields=["url"])

# Map authors
author_map = {}
if "users" in tweets.includes:
    for user in tweets.includes["users"]:
        author_map[user.id] = user.username

# Map media
media = {}
if "media" in tweets.includes:
    for m in tweets.includes["media"]:
        media[m.media_key] = m

os.makedirs("candidates", exist_ok=True)
downloaded_candidates = []

if tweets.data:
    for tweet in tweets.data:
        if "attachments" in tweet.data:
            for mkey in tweet.data["attachments"]["media_keys"]:
                if mkey in media:
                    url = media[mkey].url
                    response = requests.get(url)
                    img = Image.open(BytesIO(response.content)).convert("RGB")
                    save_path = f"candidates/{tweet.id}.jpg"
                    img.save(save_path)

                    downloaded_candidates.append({
                        "tweet_id": tweet.id,
                        "username": author_map.get(tweet.author_id, "unknown"),
                        "created_at": str(tweet.created_at),
                        "text": tweet.text[:100],  # preview
                        "file": save_path
                    })

print(f"📥 Downloaded {len(downloaded_candidates)} candidate posters.")

# ------------------------
# Step 3: CNN Similarity Function
# ------------------------
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

def get_embedding(img_path):
    img = image.load_img(img_path, target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    feat = model.predict(x, verbose=0)
    return feat.flatten()

official_embeddings = [get_embedding(p) for p in official_posters]

# ------------------------
# Step 4: Verification + Reporting
# ------------------------
print("⚡ Classifying candidate posters...")

def cosine_similarity(a, b):
    return np.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b))

results = []

for cand in downloaded_candidates:
    cand_embed = get_embedding(cand["file"])
    similarities = [cosine_similarity(cand_embed, off) for off in official_embeddings]
    max_sim = max(similarities)
    similarity_pct = max_sim * 100

    if max_sim < 0.90:
        status = "Irrelevant ⚠️"
    else:
        with open(cand["file"], "rb") as f:
            cand_hash = hashlib.sha256(f.read()).hexdigest()

        if cand_hash in ledger.values():
            status = "Official ✅"
        else:
            status = "Fake/Doctored ❌ (Flagged)"

    results.append({
        "Tweet ID": cand["tweet_id"],
        "Tweet URL": f"https://x.com/{cand['username']}/status/{cand['tweet_id']}",
        "Username": cand["username"],
        "Posted At": cand["created_at"],
        "Tweet Snippet": cand["text"],
        "Similarity (%)": round(similarity_pct, 2),
        "Classification": status
    })

# Convert to DataFrame
df = pd.DataFrame(results)
print("\n📊 Detection Report:\n")
print(df)

# Save report
df.to_csv("poster_detection_report.csv", index=False)
print("\n✅ Report saved as poster_detection_report.csv")


🔑 Please provide your X (Twitter) API Bearer Token:
Enter Bearer Token: AAAAAAAAAAAAAAAAAAAAAFtX4gEAAAAArL%2BLuWsEbVS3mNkfHK3cyfrrfyA%3Dx5C7vZTmkkgYWerzReZ7qi39PFhfBd1DMNeHX9Arzg5Qkgwc8r
🎬 Enter the movie name you want to monitor:
Movie Name: Saiyaara
How many official posters do you want to upload? 10
📤 Please upload 10 official poster(s)...


Saving test 1.jpeg to test 1 (1).jpeg
Saving test2.jpg to test2 (1).jpg
Saving test3.jpg to test3 (1).jpg
Saving test4.jpeg to test4 (1).jpeg
Saving test5.jpeg to test5 (1).jpeg
Saving test6.jpg to test6 (1).jpg
Saving test7.webp to test7 (1).webp
Saving test8.jpeg to test8 (1).jpeg
Saving test9.jpg to test9 (1).jpg
Saving test10.jpg to test10 (1).jpg
✅ Official poster hashes stored in ledger.json
🔎 Fetching candidate posters from X...
📥 Downloaded 2 candidate posters.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
⚡ Classifying candidate posters...

📊 Detection Report:

              Tweet ID                                          Tweet URL  \
0  1973643569916498302  https://x.com/buzzzookashow/status/19736435699...   
1  1973355872672948511  https://x.com/ocdtimes/status/1973355872672948511   

        U

In [None]:
!ngrok authtoken 33bYPaYgQ8VhnjEMzmD7helvHjg_27GeAa7JV54XGms78V6xY
!pip install streamlit pandas pillow pyngrok


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
# ================================
# Step 0: Install Required Packages
# ================================
!pip install tweepy tensorflow pillow pandas streamlit pyngrok tqdm --quiet

# ================================
# Step 1: Imports
# ================================
import os, json, hashlib, requests, pandas as pd, numpy as np
from io import BytesIO
from PIL import Image
from google.colab import files
import streamlit as st
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
import tweepy

# ================================
# Step 2: Setup folders
# ================================
os.makedirs("official_posters", exist_ok=True)
os.makedirs("candidate_posters", exist_ok=True)

# ================================
# Step 3: X API Setup
# ================================
BEARER_TOKEN = input("🔑 Enter your X (Twitter) API Bearer Token: ").strip()
MOVIE_NAME = input("🎬 Enter the movie name to monitor: ").strip()
client = tweepy.Client(bearer_token=BEARER_TOKEN, wait_on_rate_limit=True)

# ================================
# Step 4: Upload Official Posters
# ================================
num_official = int(input("How many official posters do you want to upload? "))
print(f"📤 Please upload {num_official} official poster(s)...")
uploaded = files.upload()

official_posters = []
for fname in uploaded.keys():
    os.rename(fname, f"official_posters/{fname}")
    official_posters.append(f"official_posters/{fname}")

# Compute SHA-256 hashes
ledger = {}
for poster_path in official_posters:
    with open(poster_path, "rb") as f:
        sha = hashlib.sha256(f.read()).hexdigest()
        ledger[os.path.basename(poster_path)] = sha

with open("ledger.json", "w") as f:
    json.dump(ledger, f, indent=4)
print("✅ Official poster hashes stored in ledger.json")

# ================================
# Step 5: Scrape Candidate Posters from X
# ================================
query = f"{MOVIE_NAME} poster has:images -is:retweet"
tweets = client.search_recent_tweets(
    query=query,
    max_results=50,
    tweet_fields=["id","created_at","author_id","text"],
    expansions=["attachments.media_keys","author_id"],
    media_fields=["url"]
)

author_map = {user.id:user.username for user in tweets.includes.get("users", [])} if tweets.includes else {}
media_map = {m.media_key:m for m in tweets.includes.get("media", [])} if tweets.includes else {}

downloaded_candidates = []

if tweets.data:
    for tweet in tweets.data:
        if "attachments" in tweet.data:
            for mkey in tweet.data["attachments"]["media_keys"]:
                if mkey in media_map:
                    url = media_map[mkey].url
                    response = requests.get(url)
                    img = Image.open(BytesIO(response.content)).convert("RGB")
                    save_path = f"candidate_posters/{tweet.id}.jpg"
                    img.save(save_path)
                    downloaded_candidates.append({
                        "tweet_id": tweet.id,
                        "username": author_map.get(tweet.author_id, "unknown"),
                        "created_at": str(tweet.created_at),
                        "text": tweet.text[:100],
                        "file": save_path
                    })
print(f"📥 Downloaded {len(downloaded_candidates)} candidate posters.")

# ================================
# Step 6: CNN Setup (ResNet50)
# ================================
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

def get_embedding(img_path):
    img = image.load_img(img_path, target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    feat = model.predict(x, verbose=0)
    return feat.flatten()

official_embeddings = [get_embedding(p) for p in official_posters]

def cosine_similarity(a,b):
    return np.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b))

# ================================
# Step 7: Verification + Reporting
# ================================
results = []

for cand in downloaded_candidates:
    cand_embed = get_embedding(cand["file"])
    similarities = [cosine_similarity(cand_embed, off) for off in official_embeddings]
    max_sim = max(similarities)
    similarity_pct = max_sim*100

    if max_sim < 0.9:
        status = "Irrelevant ⚠️"
    else:
        with open(cand["file"], "rb") as f:
            cand_hash = hashlib.sha256(f.read()).hexdigest()
        matched_official = official_posters[np.argmax(similarities)]
        matched_name = os.path.basename(matched_official)
        if cand_hash == ledger.get(matched_name):
            status = "Official ✅"
        else:
            status = "Fake/Doctored ❌ (Flagged)"

    results.append({
        "Tweet ID": cand["tweet_id"],
        "Tweet URL": f"https://x.com/{cand['username']}/status/{cand['tweet_id']}",
        "Username": cand["username"],
        "Posted At": cand["created_at"],
        "Tweet Snippet": cand["text"],
        "Similarity (%)": round(similarity_pct,2),
        "Classification": status
    })

df = pd.DataFrame(results)
df.to_csv("poster_detection_report.csv", index=False)
print("✅ Report saved as poster_detection_report.csv")


🔑 Enter your X (Twitter) API Bearer Token: AAAAAAAAAAAAAAAAAAAAAFtX4gEAAAAArL%2BLuWsEbVS3mNkfHK3cyfrrfyA%3Dx5C7vZTmkkgYWerzReZ7qi39PFhfBd1DMNeHX9Arzg5Qkgwc8r
🎬 Enter the movie name to monitor: Saiyaara
How many official posters do you want to upload? 10
📤 Please upload 10 official poster(s)...


Saving test 1.jpeg to test 1 (2).jpeg
Saving test2.jpg to test2 (2).jpg
Saving test3.jpg to test3 (2).jpg
Saving test4.jpeg to test4 (2).jpeg
Saving test5.jpeg to test5 (2).jpeg
Saving test6.jpg to test6 (2).jpg
Saving test7.webp to test7 (2).webp
Saving test8.jpeg to test8 (2).jpeg
Saving test9.jpg to test9 (2).jpg
Saving test10.jpg to test10 (2).jpg
✅ Official poster hashes stored in ledger.json
📥 Downloaded 2 candidate posters.
✅ Report saved as poster_detection_report.csv


In [None]:
# ================================
# Streamlit App for Poster Verification
# ================================

import streamlit as st
import pandas as pd
from PIL import Image

# ================================
# 1. Load Report
# ================================
st.title("🎬 Movie Poster Verification Dashboard")

st.markdown(
    """
    This dashboard shows the classification of candidate posters fetched from X (Twitter)
    against the official posters using SHA-256 hashing and ResNet50 embeddings.
    """
)

# Load CSV
df = pd.read_csv("poster_detection_report.csv")

# ================================
# 2. Filters
# ================================
st.sidebar.header("Filters")
status_filter = st.sidebar.multiselect(
    "Filter by Classification",
    options=df["Classification"].unique(),
    default=df["Classification"].unique()
)

username_filter = st.sidebar.text_input("Filter by Username (optional)")

filtered_df = df[df["Classification"].isin(status_filter)]
if username_filter:
    filtered_df = filtered_df[filtered_df["Username"].str.contains(username_filter, case=False)]

st.write(f"### Showing {len(filtered_df)} Posters")

# ================================
# 3. Display Table
# ================================
st.dataframe(filtered_df[[
    "Tweet ID", "Tweet URL", "Username", "Posted At", "Tweet Snippet", "Similarity (%)", "Classification"
]])

# ================================
# 4. Display Images
# ================================
st.write("### Poster Previews")

for idx, row in filtered_df.iterrows():
    st.markdown(f"**{row['Classification']} — @{row['Username']} — {row['Similarity (%)']}% similar**")
    img = Image.open(f"candidate_posters/{row['Tweet ID']}.jpg")
    st.image(img, use_column_width=True)


2025-10-04 16:02:01.792 Session state does not function when running a script without `streamlit run`
2025-10-04 16:02:01.887 The `use_column_width` parameter has been deprecated and will be removed in a future release. Please utilize the `use_container_width` parameter instead.
2025-10-04 16:02:02.031 The `use_column_width` parameter has been deprecated and will be removed in a future release. Please utilize the `use_container_width` parameter instead.


In [None]:
!pip install pyngrok --quiet

from pyngrok import ngrok

!streamlit run app.py &

public_url = ngrok.connect(8501)
print(public_url)




Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.231.129.108:8501[0m
[0m




[34m  Stopping...[0m
NgrokTunnel: "https://superably-nonargumentative-dotty.ngrok-free.dev" -> "http://localhost:8501"


entire streamlit

In [None]:
%%writefile app.py
import streamlit as st
import os, json, hashlib, requests, pandas as pd, numpy as np
from io import BytesIO
from PIL import Image
import tweepy
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image

# ================================
# Setup folders
# ================================
os.makedirs("official_posters", exist_ok=True)
os.makedirs("candidate_posters", exist_ok=True)

# ================================
# Streamlit UI
# ================================
st.title("🎬 Movie Poster Verification Dashboard")
st.markdown("""
Upload official posters, provide X API credentials, and verify candidate posters from X.
""")

# Step 1: Inputs
bearer_token = st.text_input("Enter your X (Twitter) API Bearer Token", type="password")
movie_name = st.text_input("Enter the movie name to monitor")

uploaded_files = st.file_uploader(
    "Upload Official Posters", type=["png","jpg","jpeg"], accept_multiple_files=True
)

run_verification = st.button("✅ Run Poster Verification")

# ================================
# Main Logic
# ================================
if run_verification:
    if not bearer_token or not movie_name or not uploaded_files:
        st.error("Please provide all inputs and upload at least one official poster.")
    else:
        st.info("Running verification... This may take a few minutes.")

        # Save official posters and compute hashes
        official_posters = []
        ledger = {}
        for uploaded_file in uploaded_files:
            path = os.path.join("official_posters", uploaded_file.name)
            with open(path, "wb") as f:
                f.write(uploaded_file.getbuffer())
            official_posters.append(path)

            # SHA-256 hash
            with open(path, "rb") as f:
                sha = hashlib.sha256(f.read()).hexdigest()
                ledger[uploaded_file.name] = sha

        with open("ledger.json", "w") as f:
            json.dump(ledger, f, indent=4)

        # X API setup
        client = tweepy.Client(bearer_token=bearer_token, wait_on_rate_limit=True)

        # Scrape candidate posters
        query = f"{movie_name} poster has:images -is:retweet"
        tweets = client.search_recent_tweets(
            query=query,
            max_results=50,
            tweet_fields=["id","created_at","author_id","text"],
            expansions=["attachments.media_keys","author_id"],
            media_fields=["url"]
        )

        author_map = {user.id:user.username for user in tweets.includes.get("users", [])} if tweets.includes else {}
        media_map = {m.media_key:m for m in tweets.includes.get("media", [])} if tweets.includes else {}

        downloaded_candidates = []
        if tweets.data:
            for tweet in tweets.data:
                if "attachments" in tweet.data:
                    for mkey in tweet.data["attachments"]["media_keys"]:
                        if mkey in media_map:
                            url = media_map[mkey].url
                            response = requests.get(url)
                            img = Image.open(BytesIO(response.content)).convert("RGB")
                            save_path = f"candidate_posters/{tweet.id}.jpg"
                            img.save(save_path)
                            downloaded_candidates.append({
                                "tweet_id": tweet.id,
                                "username": author_map.get(tweet.author_id, "unknown"),
                                "created_at": str(tweet.created_at),
                                "text": tweet.text[:100],
                                "file": save_path
                            })

        # CNN embeddings
        model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
        def get_embedding(img_path):
            img = image.load_img(img_path, target_size=(224,224))
            x = image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            x = preprocess_input(x)
            feat = model.predict(x, verbose=0)
            return feat.flatten()

        official_embeddings = [get_embedding(p) for p in official_posters]
        def cosine_similarity(a,b):
            return np.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b))

        # Verification
        results = []
        for cand in downloaded_candidates:
            cand_embed = get_embedding(cand["file"])
            similarities = [cosine_similarity(cand_embed, off) for off in official_embeddings]
            max_sim = max(similarities)
            similarity_pct = max_sim*100

            if max_sim < 0.9:
                status = "Irrelevant ⚠️"
            else:
                with open(cand["file"], "rb") as f:
                    cand_hash = hashlib.sha256(f.read()).hexdigest()
                matched_official = official_posters[np.argmax(similarities)]
                matched_name = os.path.basename(matched_official)
                if cand_hash == ledger.get(matched_name):
                    status = "Official ✅"
                else:
                    status = "Fake/Doctored ❌ (Flagged)"

            results.append({
                "Tweet ID": cand["tweet_id"],
                "Tweet URL": f"https://x.com/{cand['username']}/status/{cand['tweet_id']}",
                "Username": cand["username"],
                "Posted At": cand["created_at"],
                "Tweet Snippet": cand["text"],
                "Similarity (%)": round(similarity_pct,2),
                "Classification": status
            })

        df = pd.DataFrame(results)
        df.to_csv("poster_detection_report.csv", index=False)

        st.success(f"✅ Verification complete! {len(df)} posters analyzed.")

        # Download button
        st.download_button(
            label="📥 Download Report as CSV",
            data=df.to_csv(index=False),
            file_name="poster_detection_report.csv",
            mime="text/csv"
        )

        # Display posters
        st.write("### Poster Previews")
        for idx, row in df.iterrows():
            st.markdown(f"**{row['Classification']} — @{row['Username']} — {row['Similarity (%)']}% similar**")
            img = Image.open(f"candidate_posters/{row['Tweet ID']}.jpg")
            st.image(img, use_column_width=True)


Overwriting app.py


In [None]:
# Install dependencies
!pip install streamlit pyngrok tensorflow pillow pandas tweepy --quiet

# Start Streamlit + ngrok
import os, time
from pyngrok import ngrok

PORT = 8501
APP_FILE = "app.py"

# Start Streamlit in background
get_ipython().system_raw(f"streamlit run {APP_FILE} --server.port {PORT} --server.headless true &")

# Wait for server to be ready
time.sleep(10)

# Open ngrok tunnel
public_url = ngrok.connect(PORT)
print(f"🚀 Streamlit app is live at: {public_url}")


🚀 Streamlit app is live at: NgrokTunnel: "https://superably-nonargumentative-dotty.ngrok-free.dev" -> "http://localhost:8501"
