In [38]:
from google.colab import files
uploaded = files.upload()

Saving train.csv to train (1).csv


In [60]:
import pandas as pd

df = pd.read_csv("train.csv")

print("First 5 rows of the dataset:")
print(df.head())

First 5 rows of the dataset:
                                           Statement  Label
0  Says the Annies List political group supports ...  False
1  When did the decline of coal start? It started...   True
2  Hillary Clinton agrees with John McCain "by vo...   True
3  Health care reform legislation is likely to ma...  False
4  The economic turnaround started at the end of ...   True


In [61]:
df['Label'] = df['Label'].astype(str).str.strip().str.lower()

df['Label'] = df['Label'].map({'true': 1, 'false': 0})

df = df.dropna(subset=['Label'])

df['Label'] = df['Label'].astype(int)

print("Cleaned label values:", df['Label'].unique())
print("Total records after cleaning:", len(df))


Cleaned label values: [0 1]
Total records after cleaning: 10240


In [62]:
from sklearn.model_selection import train_test_split

X = df['Statement']
y = df['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print("Data split successfully.")
print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))


Data split successfully.
Training samples: 8192
Testing samples: 2048


In [64]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

print("Text vectorized Shape:", X_train_vec.shape)



Text vectorized Shape: (8192, 11053)


In [65]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train_vec, y_train)

print("Model training complete.")


Model training complete.


In [66]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test_vec)

print("Classification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.46      0.52       881
           1       0.65      0.76      0.70      1167

    accuracy                           0.63      2048
   macro avg       0.62      0.61      0.61      2048
weighted avg       0.63      0.63      0.62      2048



In [88]:
news = input("Enter a news statement: ")
news_vec = vectorizer.transform([news])
prediction = model.predict(news_vec)[0]
proba = model.predict_proba(news_vec)[0][1]

print("\n Prediction:", "True" if prediction else "False")
print("Probability of Truth:", round(proba * 100, 2), "%")


Enter a news statement: The earth is round

 Prediction: True
Probability of Truth: 53.91 %
