In [1]:
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
df = pd.read_csv('data/news.csv')

df.shape
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [3]:
labels = df.label
labels.head()

0    FAKE
1    FAKE
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

In [4]:
x_train, x_test, y_train, y_test = train_test_split(df['text'], labels, test_size=0.2, random_state=7)

In [5]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)

In [6]:
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

y_pred = pac.predict(tfidf_test)
score = accuracy_score(y_test, y_pred)
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 92.74%


In [7]:
confusion_matrix(y_test, y_pred, labels=['FAKE','REAL'])

array([[589,  49],
       [ 43, 586]])

In [9]:
import joblib
joblib.dump(pac, 'model.pkl')
print("Model dumped!")

Model dumped!


In [19]:
text = """German Chancellor Olaf Scholz said Friday that it is “wise to be prepared for a long war” in Ukraine, adding that Kyiv's allies will remain together for the duration.

“I think it is wise to be prepared for a long war and it is wise to give Putin the message that we are ready to just stay all the time together with Ukraine, and that we will constantly support the country,” Scholz told CNN’s Christiane Amanpour at Germany’s annual Munich Security Conference.
“The really important decision we should take all together is saying that we are willing to do it as long as necessary, and that we will do our best,” the chancellor said.

Scholz, while avoiding committing to an end target date for the war, said the unity among Ukrainian allies has surprised Putin.

“I'm absolutely sure that Putin never expected that there would be that united Europe, and that there would be that united world. He never thought that the transatlantic partnership would work that good,” he said.

Scholz singled out the United States for their continuous and vital support.

"We just do it together with our friends and partners, and especially with the United States,” Scholz said, adding that he really appreciates his government's “strong alliance” with the US.

On arming Ukraine: Amanpour asked Scholz about the deployment of more German-made Leopard 2 tanks on the ground in Ukraine.

Scholz said more would be deployed “very soon,” together with trained soldiers, but he warned that many of Ukraine’s partners aren’t able to deliver the most modern models of the fighting vehicles.

“I learned many are not able to deliver the most modern things ... but in the ones they are delivering we will give the support as well," Scholz said. "And as you know, there is also a big number of older tanks which we will deliver."

Confronted on concerns over dwindling ammunition stockpiles, Scholz stressed the need for a “permanent production of the most important weapons,” including ammunition.

German Defense Minister Boris Pistorius was also present at Friday's meetings, saying the Munich conference is "more important than ever," given the Russian invasion.

“From the beginning, the security conference has always been a place of understanding and dialogue. What is new is that this is now taking place at the same time as a war of aggression is being waged on European soil by Russia against Ukraine," Pistorius said. "That raises the stakes for the conference even higher."

CNN’s Inke Kappeler contributed to this report from Berlin.


"""

query = tfidf_vectorizer.transform([text])
prediction = pac.predict(query)
print(prediction)

['REAL']
