### Importing Necessary Libraries

In [None]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer as Stem
from nltk.stem import WordNetLemmatizer as Lem

### Importing DataSets

In [None]:
fake = pd.read_csv('../input/fake-and-real-news-dataset/Fake.csv')
true = pd.read_csv('../input/fake-and-real-news-dataset/True.csv')

In [None]:
print(fake.shape, true.shape)

### Adding a label to classify the datasets

In [None]:
zero = np.zeros(fake.shape[0])

In [None]:
fake['label'] = zero

In [None]:
fake.head()

In [None]:
one = np.ones(true.shape[0])
true['label'] = one

In [None]:
true.head()

### Merging the datasets and shuffling it

In [None]:
dataset = pd.concat([fake,true])

In [None]:
dataset = dataset.sample(frac=1)
dataset = dataset.reset_index()

In [None]:
dataset.head()

In [None]:
dataset.drop(['index'],axis=1,inplace=True)

### Checking if any nulls are present

In [None]:
dataset.isnull().sum()

In [None]:
dataset.head()

### Dividing the sataset into X and y(target)

In [None]:
X = dataset.iloc[:,:-1]
y = dataset.iloc[:,-1]

In [None]:
X.head()

In [None]:
y.head()

### Dropping the date as it is not necessary

In [None]:
X.drop(['date'],axis=1,inplace=True)

### Splitting the dataset into train and test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [None]:
X_test.columns

In [None]:
import re

### Applying porter stemmer and stopwords and removing all punctuations from the title in train

In [None]:
ps = Stem()
train_corpus = []
for i in X_train.index:
    review = re.sub('[^a-zA-Z]',' ',X_train['title'][i])
    review = review.lower()
    review = review.split()
    review = [ps.stem(i) for i in review if i not in stopwords.words('english')]
    review = ' '.join(review)
    train_corpus.append(review)

### Implementing Multinomial Naive Bayes

In [None]:
from sklearn.naive_bayes import MultinomialNB as MNB

### Applying Count Vectorizer(Bag of Words) in train

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,ngram_range=(1,3))

In [None]:
X_train = cv.fit_transform(train_corpus)

In [None]:
model = MNB()
model.fit(X_train,y_train)

### Applying porter stemmer and stopwords and removing all punctuations from the title in test

In [None]:
ps = Stem()
test_corpus = []
for i in X_test.index:
    review = re.sub('[^a-zA-Z]',' ',X_test['title'][i])
    review = review.lower()
    review = review.split()
    review = [ps.stem(i) for i in review if i not in stopwords.words('english')]
    review = ' '.join(review)
    test_corpus.append(review)

### Applying Count Vectorizer(Bag of Words) in test

In [None]:
X_test = cv.transform(test_corpus)

In [None]:
pred = model.predict(X_test)

### Performance Metrics

In [None]:
from sklearn.metrics import confusion_matrix as cm, classification_report as cr, accuracy_score as acc
print(cm(y_test,pred))
print(cr(y_test,pred))
print(acc(y_test,pred))

### Using Hyper parameter in Multinomial Naive Bayes

In [None]:
accuracy = 0
for alpha in np.arange(start=0,stop=1,step=0.1):
    model = MNB(alpha=alpha)
    model.fit(X_train,y_train)
    pred = model.predict(X_test)
    temp_accuracy = round(acc(y_test,pred),3)
    if(accuracy>temp_accuracy):
        classifier = model
        print('accuracy',accuracy,'model with alpha value', alpha)
        print('\n')
    else:
        accuracy = temp_accuracy

### Applying Passive Aggressive Classifier with n-jobs=50

In [None]:
from sklearn.linear_model import PassiveAggressiveClassifier
model = PassiveAggressiveClassifier(n_jobs=50)

In [None]:
model.fit(X_train,y_train)

In [None]:
pred = model.predict(X_test)
from sklearn.metrics import confusion_matrix as cm, classification_report as cr, accuracy_score as acc
print(cm(y_test,pred))
print(cr(y_test,pred))
print(acc(y_test,pred))

### Checking for different values of n-jobs

In [None]:
accuracy = 0
for jobs in np.arange(start=0,stop=1000,step=50):
    model = PassiveAggressiveClassifier(n_jobs=jobs)
    model.fit(X_train,y_train)
    pred = model.predict(X_test)
    temp_accuracy = round(acc(y_test,pred),3)
    if(accuracy>temp_accuracy):
        classifier = model
        print('accuracy',accuracy,'model with n_jobs value', jobs)
        print('\n')
    else:
        print('continue')
        accuracy = temp_accuracy