In [16]:
import pandas as pd
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
import pickle


In [4]:


def wordopt(text):
    text = text.lower()
    text = re.sub(r'\$\$.*?\$\$', '', text)  # Use raw string (r'...')
    text = re.sub(r"\W", " ", text)  # Use raw string
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Use raw string
    text = re.sub(r'<.*?>+', '', text)  # Use raw string
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # Correct format
    text = re.sub(r'\w*\d\w*', '', text)  # Use raw string
    return text


In [5]:
data_fake = pd.read_csv('Fake.csv')
data_true = pd.read_csv('True.csv')

In [6]:
data_fake["class"] = 0
data_true["class"] = 1

In [7]:
data = pd.concat([data_fake, data_true], axis=0)
data = data.sample(frac=1).reset_index(drop=True)

In [8]:
data['text'] = data['text'].apply(wordopt)

In [9]:
x = data['text']
y = data['class']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)


In [10]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [14]:
LR = LogisticRegression()
LR.fit(xv_train, y_train)

In [17]:
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [18]:
GB = GradientBoostingClassifier(random_state=0)
GB.fit(xv_train, y_train)

In [19]:
RF = RandomForestClassifier(random_state=0)
RF.fit(xv_train, y_train)

In [20]:
# Save the vectorizer and models
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorization, f)

In [21]:
with open('logistic_regression_model.pkl', 'wb') as f:
    pickle.dump(LR, f)

In [22]:
with open('decision_tree_model.pkl', 'wb') as f:
    pickle.dump(DT, f)

In [23]:
with open('gradient_boosting_model.pkl', 'wb') as f:
    pickle.dump(GB, f)

In [24]:
with open('random_forest_model.pkl', 'wb') as f:
    pickle.dump(RF, f)

In [25]:
print("Models and vectorizer saved successfully.")

Models and vectorizer saved successfully.
