In [103]:
# 1. News Reader 
# 2. Text classification 
# RohanMengade

In [139]:
import pandas as pd
import urllib
import os
import requests
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
import newspaper

In [141]:
from newspaper import Article
article = Article('https://indianexpress.com/article/sports/cricket/england-vs-west-indies-stuart-broad-angry-6499635/')
article.download()
article.parse()

In [148]:
article.title

'England vs West Indies: Stuart Broad ‘frustrated, angry, gutted’ after being dropped from playing XI'

In [149]:
article.publish_date

datetime.datetime(2020, 7, 10, 19, 50, 39, tzinfo=tzoffset(None, 19800))

In [150]:
article.top_image

'https://images.indianexpress.com/2020/07/stuart-broad.jpg?w=759'

In [151]:
article.text

'England’s Stuart Broad, left, wears a disposable pair of gloves as a precaution against the coronavirus during the third day of the first cricket Test match between England and West Indies, at the Ageas Bowl in Southampton. (AP) England’s Stuart Broad, left, wears a disposable pair of gloves as a precaution against the coronavirus during the third day of the first cricket Test match between England and West Indies, at the Ageas Bowl in Southampton. (AP)\n\nEngland seamer Stuart Broad said that he felt “frustrated, angry and gutted” after being dropped from the opening Test against West Indies.\n\nSpeaking on Sky Sports’ ‘Player Zone’ on Friday, Broad also revealed that he had sought clarifications over his future in the team.\n\nBroad, who is England’s second highest wicket-taker of all time with 485 scalps in the longest format of the game, last missed a home Test eight years ago when he was rested against West Indies in 2012.\n\n“I found out about 6pm the night before the game. Stok

In [None]:
## Noun Phrase extraction
# this type of extraction is important when you want to analyze the “who” in a sentence.

In [152]:
blob = TextBlob("Stokesy told me just that they’re going with extra pace in these conditions,” Broad said")
for np in blob.noun_phrases:
 print (np)

stokesy
’ re
extra pace
broad


In [153]:
# Part-of-speech Tagging

# It tells whether a word is a noun, or an adjective, or a verb, etc. 
# This is just a complete version of noun phrase extraction, 
#  where we want to find all the the parts of speech in a sentence.

In [154]:
for words, tag in blob.tags:
 print (words, tag)

Stokesy NNP
told VBD
me PRP
just RB
that IN
they PRP
’ VBP
re VB
going VBG
with IN
extra JJ
pace NN
in IN
these DT
conditions NNS
” NNP
Broad NNP
said VBD


In [None]:
# Pluralise word

In [155]:
from textblob import Word
w = Word('test')
w.pluralize()

'tests'

## Text classification using TextBlob

In [156]:
news_data = article.text

In [157]:
news_data

'England’s Stuart Broad, left, wears a disposable pair of gloves as a precaution against the coronavirus during the third day of the first cricket Test match between England and West Indies, at the Ageas Bowl in Southampton. (AP) England’s Stuart Broad, left, wears a disposable pair of gloves as a precaution against the coronavirus during the third day of the first cricket Test match between England and West Indies, at the Ageas Bowl in Southampton. (AP)\n\nEngland seamer Stuart Broad said that he felt “frustrated, angry and gutted” after being dropped from the opening Test against West Indies.\n\nSpeaking on Sky Sports’ ‘Player Zone’ on Friday, Broad also revealed that he had sought clarifications over his future in the team.\n\nBroad, who is England’s second highest wicket-taker of all time with 485 scalps in the longest format of the game, last missed a home Test eight years ago when he was rested against West Indies in 2012.\n\n“I found out about 6pm the night before the game. Stok

In [158]:
news_training_selected = [
('England seamer Stuart Broad said that he felt “frustrated, angry and gutted” after being dropped from the opening Test against West Indies.','neg'),
('Broad, who is England’s second highest wicket-taker of all time with 485 scalps in the longest format of the game','pos'),
('I found out about 6pm the night before the game.','pos'),
('I felt like I deserved a spot in the team, like everyone else.','pos'),
('So it is hard to take but also I’m quite pleased I feel frustrated and feel gutted and angry because if I didn’t I’d have a different decision to make.','pos'),
]

In [159]:
news_testing = [
('I spoke to Ed Smith last night and he said he’s involved in picking the 13 and this was picked purely for this pitch.','pos'),
('To say I’m disappointed would be an understatement: you get disappointed if you drop your phone and break your screen','neg'),
('I don’t think I’ve got anything to prove – England know what I can do, the selectors know what I can do – and when I get that opportunity again you can bet I’ll be on the money.','pos'),
('I wanted clarifications on the future going forward and I was given pretty positive feedback going forward.','pos'),
('Stokesy told me just that they’re going with extra pace in these conditions,” Broad said.','pos')]

In [160]:
from textblob import classifiers
classifier = classifiers.NaiveBayesClassifier(news_training_selected)

In [162]:
print (classifier.accuracy(news_testing))
classifier.accuracy(news_testing)
classifier.show_informative_features(10)

0.8
Most Informative Features
       contains(England) = True              neg : pos    =      2.5 : 1.0
        contains(gutted) = True              neg : pos    =      2.5 : 1.0
         contains(angry) = True              neg : pos    =      2.5 : 1.0
    contains(frustrated) = True              neg : pos    =      2.5 : 1.0
          contains(felt) = True              neg : pos    =      2.5 : 1.0
             contains(I) = False             neg : pos    =      2.5 : 1.0
         contains(Broad) = True              neg : pos    =      2.5 : 1.0
           contains(and) = True              neg : pos    =      2.5 : 1.0
            contains(in) = False             neg : pos    =      1.5 : 1.0
             contains(’) = False             neg : pos    =      1.5 : 1.0


##The accuracy of this classifier on the testing dataset 

In [None]:
# Here, we can see that if the text contains “gutted”,"angry","frustrated" then 
  # there is a high probability that the statement will be negative.

In [None]:
 # random text checking

In [163]:
from textblob import TextBlob
blob = TextBlob('Sachin Tendulkar is the best batsman', classifier=classifier)
print (blob.classify())

pos


### Sentiment Analysis 

It is basically the process of determining the attitude or the emotion of the writer, i.e., whether it is positive or negative or neutral.

The sentiment function of textblob returns two properties, polarity, and subjectivity.

Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement. 

Subjective sentences generally refer to personal opinion, emotion or judgment whereas objective refers to factual information. Subjectivity is also a float which lies in the range of [0,1].

In [164]:
print (blob)
blob.sentiment

Sachin Tendulkar is the best batsman


Sentiment(polarity=1.0, subjectivity=0.3)