###### !pip install customtkinter pandas numpy scikit-learn mysql-connector-python

In [2]:
#Data Preprocessing (Merging True & Fake datasets)

import pandas as pd

# Load raw datasets
print("Loading datasets...")
true_df = pd.read_csv('True.csv')
fake_df = pd.read_csv('Fake.csv')

# Add labels (0 for Real, 1 for Fake)
true_df['label'] = 0
fake_df['label'] = 1

# Combine and Shuffle
df = pd.concat([true_df, fake_df]).sample(frac=1).reset_index(drop=True)

# Create 'full_text' column (Headline + Text)
df['full_text'] = df['title'] + " " + df['text']

# Keep only what we need to save memory
df = df[['full_text', 'label']]

# Save the cleaned version for the Brain to read
df.to_csv('cleaned_news_data.csv', index=False)
print("‚úÖ Data cleaning complete! 'cleaned_news_data.csv' created.")

Loading datasets...
‚úÖ Data cleaning complete! 'cleaned_news_data.csv' created.


In [4]:
#The Main Application (Logic + UI)

import customtkinter as ctk
import mysql.connector
import pandas as pd
import numpy as np
from tkinter import messagebox
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1. DATABASE MANAGER
class DBManager:
    def __init__(self):
        try:
            self.conn = mysql.connector.connect(
                host="localhost", user="root", password="", database="new_db"
            )
            self.cursor = self.conn.cursor()
        except mysql.connector.Error as err:
            print(f"Database Connection Error: {err}")

    def save_to_db(self, article, result, score):
        try:
            sql = "INSERT INTO predictions (article_text, result, confidence) VALUES (%s, %s, %s)"
            self.cursor.execute(sql, (article[:500], result, float(score))) 
            self.conn.commit()
        except:
            print("Could not save to database.")

    def fetch_last_five(self):
        try:
            self.cursor.execute("SELECT result, confidence, timestamp FROM predictions ORDER BY id DESC LIMIT 5")
            return self.cursor.fetchall()
        except:
            return []

# 2. THE ML BRAIN
class FakeNewsBrain:
    def __init__(self):
        self.vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
        self.model = LogisticRegression()
        self.train_and_evaluate()

    def train_and_evaluate(self):
        try:
            df = pd.read_csv('cleaned_news_data.csv')
            df.dropna(inplace=True)
            X_train, X_test, y_train, y_test = train_test_split(
                df['full_text'], df['label'], test_size=0.2, random_state=42
            )
            X_train_tfidf = self.vectorizer.fit_transform(X_train)
            self.model.fit(X_train_tfidf, y_train)
            acc = accuracy_score(y_test, self.model.predict(self.vectorizer.transform(X_test)))
            print(f"‚úÖ Training Complete! Accuracy: {round(acc*100, 2)}%")
        except FileNotFoundError:
            print("‚ùå Error: 'cleaned_news_data.csv' not found. Run Cell 1 first!")

    def predict(self, text):
        transformed_text = self.vectorizer.transform([text])
        prediction = self.model.predict(transformed_text)[0]
        probability = self.model.predict_proba(transformed_text)
        confidence = round(max(probability[0]) * 100, 2)
        verdict = "Fake" if prediction == 1 else "Real"
        return verdict, confidence

    def get_explaining_keywords(self, text):
        feature_names = self.vectorizer.get_feature_names_out()
        transformed_text = self.vectorizer.transform([text])
        coefficients = self.model.coef_[0]
        word_indices = transformed_text.nonzero()[1]
        words_in_article = [(feature_names[i], coefficients[i]) for i in word_indices]
        words_in_article.sort(key=lambda x: abs(x[1]), reverse=True)
        return [word for word, score in words_in_article[:5]]

# 3. THE INTERFACE
class NewsApp(ctk.CTk):
    def __init__(self):
        super().__init__()
        self.title("AI News Guard Pro v2.0")
        self.geometry("600x750")
        
        # Init components
        self.db = DBManager()
        self.brain = FakeNewsBrain()
        self.setup_ui()
        self.refresh_history()

    def setup_ui(self):
        ctk.CTkLabel(self, text="üõ°Ô∏è AI News Guard", font=("Arial", 28, "bold")).pack(pady=20)
        self.textbox = ctk.CTkTextbox(self, width=500, height=200)
        self.textbox.pack(pady=10)

        self.btn = ctk.CTkButton(self, text="Analyze News", command=self.process_news, fg_color="#1f538d")
        self.btn.pack(pady=10)

        self.explain_btn = ctk.CTkButton(self, text="Why is this Fake/Real?", command=self.show_explanation, fg_color="gray", state="disabled")
        self.explain_btn.pack(pady=5)

        self.result_label = ctk.CTkLabel(self, text="Status: Waiting for input...", font=("Arial", 16))
        self.result_label.pack(pady=10)

        self.history_frame = ctk.CTkScrollableFrame(self, width=500, height=180, label_text="Recent Database Logs")
        self.history_frame.pack(pady=20)

    def process_news(self):
        text = self.textbox.get("1.0", "end-1c")
        if not text.strip(): return

        verdict, score = self.brain.predict(text)
        color = "#ff4d4d" if verdict == "Fake" else "#2ecc71"
        self.result_label.configure(text=f"Result: {verdict} ({score}%)", text_color=color)
        self.explain_btn.configure(state="normal")
        self.db.save_to_db(text, verdict, score)
        self.refresh_history()

    def show_explanation(self):
        text = self.textbox.get("1.0", "end-1c")
        keywords = self.brain.get_explaining_keywords(text)
        msg = "The AI identified these top 'Impact Words' for its decision:\n\n" + "\n".join([f"‚Ä¢ {w}" for w in keywords])
        messagebox.showinfo("Explainable AI Logic", msg)

    def refresh_history(self):
        for widget in self.history_frame.winfo_children(): widget.destroy()
        data = self.db.fetch_last_five()
        for row in data:
            # Check if row is not empty
            time_val = row[2].strftime("%H:%M") if row[2] else "00:00"
            ctk.CTkLabel(self.history_frame, text=f"[{time_val}] {row[0]} ({row[1]}%)").pack(anchor="w", padx=10)

# Start the App
if __name__ == "__main__":
    app = NewsApp()
    app.mainloop()

‚úÖ Training Complete! Accuracy: 98.78%
