KNN MODEL

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Sample dataset
data = {
    "News": [
        "government plan ban social media app india",
        "india china reach new trade agreement",
        "new evidence suggest world flat scientist baffle",
        "india introduce new education reform improve learning",
        "india richest man pledge donate billion charity",
        "scientist discover cure cancer india",
        "mumbai experience heavy rainfall week",
        "government provide free internet citizen india",
        "prime minister modi announce new farm bill parliament",
        "nasa reveal india first colonize mars",
        "india celebrate diwali grand firework display",
        "indian economy booming expert say",
        "new app let indian order food directly local restaurant",
        "india cut tie neighbor country trade issue",
        "yoga become global health trend say indian expert"
    ],
    "True/False": [False, True, False, True, True, False, True, False, True, False, True, True, True, False, True]
}

# Create DataFrame
df = pd.DataFrame(data)

# Convert labels to numeric (True -> 1, False -> 0)
df["True/False"] = df["True/False"].astype(int)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["News"])
y = df["True/False"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predictions
y_pred = knn.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 100.00%


In [2]:
# to test

def predict_news(news_text):
    news_tfidf = vectorizer.transform([news_text])  # Transform input text
    prediction = knn.predict(news_tfidf)  # Predict using the trained model
    return "True" if prediction[0] == 1 else "False"

# Example usage
test_news = " china is india"
print(f"Prediction: {predict_news(test_news)}")


Prediction: False


In [6]:
joblib.dump(knn, 'knn_model.pkl')


['knn_model.pkl']