In [1]:
!pip install -q textblob 

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
from textblob import TextBlob

In [4]:
df = pd.read_csv('drive/MyDrive/bbc_sent_final.csv',encoding = 'utf-8')
df.head()

Unnamed: 0,text,sent
0,Psychiatrists are being urged to ask children ...,0
1,The number of adults seeking help to cope with...,-1
2,The current system of checking newborns for hi...,-1
3,Calorie-filled Easter eggs are being sold in s...,-1
4,"""She is so desperate to end it all, she curren...",-1


In [8]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [9]:
def break_text(text):
  return nltk.tokenize.sent_tokenize(text)

In [10]:
df['sentences']=df.text.apply(break_text)

In [12]:
df.sentences[0]

['Psychiatrists are being urged to ask children with mental health issues how long they spend online and what they use social media for.',
 'Questions about technology should be a routine part of assessments, the Royal College of Psychiatrists says.',
 'It is concerned about how time spent online impacts on mood, sleep, diet and behaviour.',
 'The government is expected to announce plans to regulate social media companies soon.',
 "The College's advice comes as evidence grows of a possible link between harmful content or time spent online, and poor mental health.",
 "It is planning to publish a report later this year about its stance on technology use and children's mental health, which will include recommendations for parents, children and doctors.",
 'When assessing children, psychiatrists are being advised to think about:\nThe Royal College of Psychiatrists recommends that children stop using technology at least an hour before going to bed, and avoid using technology at mealtimes.',

In [23]:
def subjectivity(text):
  return TextBlob(text).sentiment.subjectivity

In [24]:
def polarity(text):
  return TextBlob(text).sentiment.polarity

In [28]:
s = "I AM HAPPY!"
subjectivity(s)

1.0

In [45]:
def get_sub(text):
  sub = []
  for i in text:
    sub.append(subjectivity(i))
  return sum(sub) / len(text)

In [40]:
def get_pol(text):
  pol = []
  for i in text:
    pol.append(polarity(i))
  return sum(pol)

In [46]:
df['subjectivity'] = df.sentences.apply(get_sub)

In [47]:
df['polarity'] = df.sentences.apply(get_pol)

In [53]:
def label(avg_score):
  if avg_score == 0:
    return 0
  elif avg_score >0:
    return 1
  else:
    return -1

In [54]:
df["label"]=df["polarity"].apply(label)

In [60]:
df.head()

Unnamed: 0,text,sent,sentences,subjectivity,polarity,label
0,Psychiatrists are being urged to ask children ...,0,[Psychiatrists are being urged to ask children...,0.309559,0.041968,1
1,The number of adults seeking help to cope with...,-1,[The number of adults seeking help to cope wit...,0.479917,-1.191439,-1
2,The current system of checking newborns for hi...,-1,[The current system of checking newborns for h...,0.481573,0.819141,1
3,Calorie-filled Easter eggs are being sold in s...,-1,[Calorie-filled Easter eggs are being sold in ...,0.386764,1.110516,1
4,"""She is so desperate to end it all, she curren...",-1,"[""She is so desperate to end it all, she curre...",0.402972,0.674756,1


In [58]:
diff = df.label == df.sent

In [59]:
tb_count = 0
for i in diff:
  if i:
    tb_count += 1

tb_count
  

34

# Vader 

In [61]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
nltk.download('vader_lexicon')
sia = SIA()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...




In [67]:
def vader_pol(text):
  return sia.polarity_scores(text)['compound']

In [63]:
def get_vader_pol(text):
  v_pol =[]
  for i in text:
    v_pol.append(vader_pol(i))
  return sum(v_pol)  

In [68]:
df['vader_pol']=df.sentences.apply(get_vader_pol)

In [70]:
df['vader_label']=df.vader_pol.apply(label)

In [71]:
df.head()

Unnamed: 0,text,sent,sentences,subjectivity,polarity,label,vader_pol,vader_label
0,Psychiatrists are being urged to ask children ...,0,[Psychiatrists are being urged to ask children...,0.309559,0.041968,1,-2.9821,-1
1,The number of adults seeking help to cope with...,-1,[The number of adults seeking help to cope wit...,0.479917,-1.191439,-1,-1.5966,-1
2,The current system of checking newborns for hi...,-1,[The current system of checking newborns for h...,0.481573,0.819141,1,-6.4828,-1
3,Calorie-filled Easter eggs are being sold in s...,-1,[Calorie-filled Easter eggs are being sold in ...,0.386764,1.110516,1,1.028,1
4,"""She is so desperate to end it all, she curren...",-1,"[""She is so desperate to end it all, she curre...",0.402972,0.674756,1,-1.0905,-1


In [72]:
vader_diff = df['sent'] == df['vader_label']

In [73]:
v_count = 0
for i in vader_diff:
  if i:
    v_count += 1

v_count

40