## Fake News Prediction

### Step 1: Importing the Necessary Libraries

In [27]:
# Import modules
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report

In [28]:
# Load dataset
data=pd.read_csv("news.csv")

In [29]:
data

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


### Step 2: To Split the Input and Output Variables

In [31]:
Independent=data['title'] + " " + data['text']

In [32]:
Independent

0       You Can Smell Hillary’s Fear Daniel Greenfield...
1       Watch The Exact Moment Paul Ryan Committed Pol...
2       Kerry to go to Paris in gesture of sympathy U....
3       Bernie supporters on Twitter erupt in anger ag...
4       The Battle of New York: Why This Primary Matte...
                              ...                        
6330    State Department says it can't find emails fro...
6331    The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...
6332    Anti-Trump Protesters Are Tools of the Oligarc...
6333    In Ethiopia, Obama seeks progress on peace, se...
6334    Jeb Bush Is Suddenly Attacking Trump. Here's W...
Length: 6335, dtype: object

In [33]:
Dependent=data[['label']]

In [34]:
Dependent

Unnamed: 0,label
0,FAKE
1,FAKE
2,REAL
3,FAKE
4,REAL
...,...
6330,REAL
6331,FAKE
6332,FAKE
6333,REAL


### Step 3: To split Train and Test Values

In [36]:
X_train, X_test, y_train, y_test = train_test_split(Independent, Dependent, test_size=0.2, random_state=42)

In [37]:
X_train

1142    Alabama Sen. Sessions Backs Trump’s Immigratio...
2654    As of 6:00 AM NOVEMBER 6th, Trump is leading i...
5395    Time: Investigating Hillary is an Attack on Al...
1170    Women Should Vote With Their Husbands Taki's M...
4371    Pakistan police detain dozens of Imran Khan's ...
                              ...                        
3772    The inane spectacle of the GOP debate: Cruz th...
5191    Clinton, FBIGate and the true depth of the Oba...
5226    Fearing Election Day Trouble, Some US Schools ...
5390    Obama gets away with some whoppers on guns at ...
860     CETA: Canada Has Challenged The EU’s Chemical ...
Length: 5068, dtype: object

In [38]:
y_train

Unnamed: 0,label
1142,REAL
2654,FAKE
5395,FAKE
1170,FAKE
4371,FAKE
...,...
3772,REAL
5191,FAKE
5226,FAKE
5390,REAL


In [39]:
X_test

1357    American Dream, Revisited Will Trump pull a Br...
2080    Clintons Are Under Multiple FBI Investigations...
2718    The FBI Can’t Actually Investigate a Candidate...
812     Confirmed: Public overwhelmingly (10-to-1) say...
4886    Nanny In Jail After Force Feeding Baby To Deat...
                              ...                        
1512    Tennessee Children with Brittle Bones Suffer i...
57      Closed primaries, 'warped' democracy? Politica...
6092    A liberal plan to defeat ISIS: Here’s how we a...
3403    Nobody Won the VP Debate, Least of All the Ame...
292     Wingnuts have a death-grip on Congress: Why Pa...
Length: 1267, dtype: object

In [40]:
y_test

Unnamed: 0,label
1357,FAKE
2080,FAKE
2718,FAKE
812,FAKE
4886,FAKE
...,...
1512,FAKE
57,REAL
6092,REAL
3403,REAL


### Step 4: Initializing TF-IDF Vectorizer

In [41]:
# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Transform the text data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

### Step 5: Choose of Algorithm and Learning

In [43]:
# Initialize and Train SVM Classifier
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_tfidf, y_train)

  y = column_or_1d(y, warn=True)


In [44]:
# Evaluate the model
y_pred = svm_model.predict(X_test_tfidf)
(confusion_matrix(y_test, y_pred))

array([[593,  35],
       [ 49, 590]], dtype=int64)

### Step 6: To find the Classification_Report

In [46]:
Clf_report = classification_report(y_test, y_pred)

In [47]:
Clf_report

'              precision    recall  f1-score   support\n\n        FAKE       0.92      0.94      0.93       628\n        REAL       0.94      0.92      0.93       639\n\n    accuracy                           0.93      1267\n   macro avg       0.93      0.93      0.93      1267\nweighted avg       0.93      0.93      0.93      1267\n'

### Step 7: To save the Model

In [19]:
filename="Fake_News_Prediction.pkl"

In [20]:
pickle.dump(svm_model ,open(filename,'wb'))

In [21]:
pickle.dump(tfidf_vectorizer, open('tfidf_vectorizer.pkl', 'wb'))