<a href="https://colab.research.google.com/github/pponmathi10/sample/blob/main/Ponmathi_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import os

# Sample fake news dataset
def load_dataset():
    data = {
        "text": [
            "Government announces new education policy",
            "Aliens landed in India yesterday",
            "Stock market shows steady growth",
            "Drinking bleach cures all diseases",
            "Scientists discover new planet",
            "Earth is flat according to experts"
        ],
        "label": [1, 0, 1, 0, 1, 0]  # 1=Real, 0=Fake
    }
    return pd.DataFrame(data)

def train_model(data):
    X = data["text"]
    y = data["label"]

    vectorizer = TfidfVectorizer(stop_words="english")
    X_vec = vectorizer.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_vec, y, test_size=0.2, random_state=42
    )

    model = LogisticRegression()
    model.fit(X_train, y_train)

    acc = accuracy_score(y_test, model.predict(X_test))

    joblib.dump(model, "fake_news_model.pkl")
    joblib.dump(vectorizer, "vectorizer.pkl")

    return acc

def load_model():
    if not os.path.exists("fake_news_model.pkl"):
        data = load_dataset()
        train_model(data)

    model = joblib.load("fake_news_model.pkl")
    vectorizer = joblib.load("vectorizer.pkl")
    return model, vectorizer


In [None]:
import streamlit as st
import pandas as pd
from model import load_model, load_dataset, train_model

st.set_page_config(page_title="HITL Fake News Detection")

st.title("üì∞ Fake News Detection")
st.subheader("Human-in-the-Loop Machine Learning")

model, vectorizer = load_model()

news_text = st.text_area("Enter News Text")

if news_text:
    X_vec = vectorizer.transform([news_text])
    prediction = model.predict(X_vec)[0]
    confidence = max(model.predict_proba(X_vec)[0])

    label = "REAL ‚úÖ" if prediction == 1 else "FAKE ‚ùå"
    st.markdown(f"### ü§ñ Prediction: **{label}**")
    st.markdown(f"### üîç Confidence: **{confidence*100:.2f}%**")

    # Human-in-the-loop trigger
    if confidence < 0.75:
        st.warning("‚ö† Low confidence prediction ‚Äì Human verification required")

        human = st.radio("Human Fact-Checker Decision:", ["Correct", "Incorrect"])

        if human == "Incorrect":
            corrected = st.radio("Correct Label:", ["REAL", "FAKE"])
            corrected_label = 1 if corrected == "REAL" else 0

            feedback = pd.DataFrame({
                "text": [news_text],
                "label": [corrected_label]
            })

            try:
                old = pd.read_csv("feedback.csv")
                feedback = pd.concat([old, feedback])
            except:
                pass

            feedback.to_csv("feedback.csv", index=False)
            st.success("Human feedback saved!")

if st.button("üîÅ Retrain Model with Human Feedback"):
    base = load_dataset()
    try:
        feedback = pd.read_csv("feedback.csv")
        full = pd.concat([base, feedback])
    except:
        full = base

    acc = train_model(full)
    st.success(f"Model retrained successfully! New Accuracy: {acc*100:.2f}%")


ModuleNotFoundError: No module named 'streamlit'

In [None]:
from google.colab import drive
drive.mount('/content/drive')