<a href="https://colab.research.google.com/github/raunaksatone5/Sentimental-Analysis-using-NLP/blob/main/Sentimental_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# importimg required libraries

import numpy as np 
import re 
import os 
import pandas as pd
from nltk.tokenize import RegexpTokenizer , sent_tokenize

In [4]:
# loading data
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/company assignment/Blackcoffer assignment/Input.xlsx - Sheet1 (1).csv')

In [5]:
# first look at our data
df.head()

Unnamed: 0,URL_ID,URL
0,37,https://insights.blackcoffer.com/ai-in-healthc...
1,38,https://insights.blackcoffer.com/what-if-the-c...
2,39,https://insights.blackcoffer.com/what-jobs-wil...
3,40,https://insights.blackcoffer.com/will-machine-...
4,41,https://insights.blackcoffer.com/will-ai-repla...


In [6]:
# lets see how our url looks like
df['URL'][1]

'https://insights.blackcoffer.com/what-if-the-creation-is-taking-over-the-creator/'

In [7]:
# lets split the url to get the article title
df['article_title'] = df['URL'].str.split('/').str[3]
df.head()

Unnamed: 0,URL_ID,URL,article_title
0,37,https://insights.blackcoffer.com/ai-in-healthc...,ai-in-healthcare-to-improve-patient-outcomes
1,38,https://insights.blackcoffer.com/what-if-the-c...,what-if-the-creation-is-taking-over-the-creator
2,39,https://insights.blackcoffer.com/what-jobs-wil...,what-jobs-will-robots-take-from-humans-in-the-...
3,40,https://insights.blackcoffer.com/will-machine-...,will-machine-replace-the-human-in-the-future-o...
4,41,https://insights.blackcoffer.com/will-ai-repla...,will-ai-replace-us-or-work-with-us


In [8]:
# removing - symbol from sentences in url_id
df['article_title'] = df['article_title'].str.replace('-',' ')

# 1. Sentimental Analysis

### 1.1 cleaning using stopword list

In [9]:
# import stopwords from nltk
from nltk.corpus import stopwords

In [10]:
import nltk
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [11]:
# extracting the stopwords from nltk library
sw = stopwords.words('english')
# displaying the stopwords
sw = list(sw)

In [12]:
word = [i for i in df['article_title']]

In [13]:
word

['ai in healthcare to improve patient outcomes',
 'what if the creation is taking over the creator',
 'what jobs will robots take from humans in the future',
 'will machine replace the human in the future of work',
 'will ai replace us or work with us',
 'man and machines together machines are more diligent than humans blackcoffe',
 'in future or in upcoming years humans and machines are going to work together in every field of work',
 'how neural networks can be applied in various areas in the future',
 'how machine learning will affect your business',
 'deep learning impact on areas of e learning',
 'how to protect future data and its privacy blackcoffer',
 'how machines ai automations and robo human are effective in finance and banking',
 'ai human robotics machine future planet blackcoffer thinking jobs workplace',
 'how ai will change the world blackcoffer',
 'future of work how ai has entered the workplace',
 'ai tool alexa google assistant finance banking tool future',
 'ai heal

In [14]:
# lets define a function to remove stopwords
def stopwords(text):
    '''a function for removing the stopword'''
    # removing the stop words and lowercasing the selected words
    text = [word.lower() for word in text.split() if word.lower() not in sw]
    # joining the list of words with space separator
    return " ".join(text)

In [15]:
# cleaning stop words
df['article_title'] = df['article_title'].apply(stopwords)
df.head()

Unnamed: 0,URL_ID,URL,article_title
0,37,https://insights.blackcoffer.com/ai-in-healthc...,ai healthcare improve patient outcomes
1,38,https://insights.blackcoffer.com/what-if-the-c...,creation taking creator
2,39,https://insights.blackcoffer.com/what-jobs-wil...,jobs robots take humans future
3,40,https://insights.blackcoffer.com/will-machine-...,machine replace human future work
4,41,https://insights.blackcoffer.com/will-ai-repla...,ai replace us work us


### 1.2 Creating a dictionary of Positive and Negative words

In [17]:
import nltk
nltk.downloader.download('vader_lexicon')


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [18]:
# Getting scores for random statement
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
sia.polarity_scores("Wow, NLTK is really powerful!")


{'neg': 0.0, 'neu': 0.295, 'pos': 0.705, 'compound': 0.8012}

### 1.3 Extracting Derived variables

In [19]:
analyzer = SentimentIntensityAnalyzer()
df['compound_score'] = [analyzer.polarity_scores(x)['compound'] for x in df['article_title']]
df['negative_score'] = [analyzer.polarity_scores(x)['neg'] for x in df['article_title']]
df['neutral_score'] = [analyzer.polarity_scores(x)['neu'] for x in df['article_title']]
df['positive_score'] = [analyzer.polarity_scores(x)['pos'] for x in df['article_title']]

In [20]:
df.head()

Unnamed: 0,URL_ID,URL,article_title,compound_score,negative_score,neutral_score,positive_score
0,37,https://insights.blackcoffer.com/ai-in-healthc...,ai healthcare improve patient outcomes,0.4404,0.0,0.58,0.42
1,38,https://insights.blackcoffer.com/what-if-the-c...,creation taking creator,0.2732,0.0,0.488,0.512
2,39,https://insights.blackcoffer.com/what-jobs-wil...,jobs robots take humans future,0.0,0.0,1.0,0.0
3,40,https://insights.blackcoffer.com/will-machine-...,machine replace human future work,0.0,0.0,1.0,0.0
4,41,https://insights.blackcoffer.com/will-ai-repla...,ai replace us work us,0.0,0.0,1.0,0.0


# 2. Analysis of Readability

### Average Words per Sentence

In [21]:
# lets calculate average sentence length
number_of_sentences = df['article_title'].size

In [22]:
# word count for each sentence in article title
df['word_count'] = df['article_title'].str.len()

In [23]:
# average sentence length
df['avg_sentence_length'] = df['word_count'] / number_of_sentences

# 4. Complex Word Count

In [24]:
def tokenizer(text):
    text = text.lower()
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(text)
    filtered_words = list(filter(lambda token: token not in sw, tokens))
    return filtered_words

In [25]:
# Counting personal pronouns
def complex_word_count(text):
    tokens = tokenizer(text)
    complexWord = 0
    
    for word in tokens:
        vowels=0
        if word.endswith(('es','ed')):
            pass
        else:
            for w in word:
                if(w=='a' or w=='e' or w=='i' or w=='o' or w=='u'):
                    vowels += 1
            if(vowels > 2):
                complexWord += 1
    return complexWord

In [26]:
# complex word count
df['complex_word'] = df['article_title'].apply(complex_word_count)

In [27]:
df['complex_word_percentage'] = df['complex_word']/df['word_count']

In [28]:
df.head()

Unnamed: 0,URL_ID,URL,article_title,compound_score,negative_score,neutral_score,positive_score,word_count,avg_sentence_length,complex_word,complex_word_percentage
0,37,https://insights.blackcoffer.com/ai-in-healthc...,ai healthcare improve patient outcomes,0.4404,0.0,0.58,0.42,38,0.333333,3,0.078947
1,38,https://insights.blackcoffer.com/what-if-the-c...,creation taking creator,0.2732,0.0,0.488,0.512,23,0.201754,2,0.086957
2,39,https://insights.blackcoffer.com/what-jobs-wil...,jobs robots take humans future,0.0,0.0,1.0,0.0,30,0.263158,1,0.033333
3,40,https://insights.blackcoffer.com/will-machine-...,machine replace human future work,0.0,0.0,1.0,0.0,33,0.289474,3,0.090909
4,41,https://insights.blackcoffer.com/will-ai-repla...,ai replace us work us,0.0,0.0,1.0,0.0,21,0.184211,1,0.047619


# 5. Personal Pronouns

In [29]:
# Counting personal pronouns 
def personal_pronouns(text):
    tokens = tokenizer(text)
    personal_pronouns = 0
    for words in tokens:
      if words in ['I','we','my','ours','us']:
        personal_pronouns += 1
    return personal_pronouns
  

In [30]:
df['personal_pronouns'] = df['article_title'].apply(personal_pronouns)

In [31]:
df.head()

Unnamed: 0,URL_ID,URL,article_title,compound_score,negative_score,neutral_score,positive_score,word_count,avg_sentence_length,complex_word,complex_word_percentage,personal_pronouns
0,37,https://insights.blackcoffer.com/ai-in-healthc...,ai healthcare improve patient outcomes,0.4404,0.0,0.58,0.42,38,0.333333,3,0.078947,0
1,38,https://insights.blackcoffer.com/what-if-the-c...,creation taking creator,0.2732,0.0,0.488,0.512,23,0.201754,2,0.086957,0
2,39,https://insights.blackcoffer.com/what-jobs-wil...,jobs robots take humans future,0.0,0.0,1.0,0.0,30,0.263158,1,0.033333,0
3,40,https://insights.blackcoffer.com/will-machine-...,machine replace human future work,0.0,0.0,1.0,0.0,33,0.289474,3,0.090909,0
4,41,https://insights.blackcoffer.com/will-ai-repla...,ai replace us work us,0.0,0.0,1.0,0.0,21,0.184211,1,0.047619,2


# 6. Syllable count per word

In [32]:
# Counting personal pronouns 
def syllable_count(text):
    # tokens = tokenizer(text)
    syllables = 0
    for words in text:
      list1 = list([word for word in text if not word.endswith(("ed", "es"))])
      # for char in words:
      if word in list1:
        syllables += 1
    return syllables


In [33]:
# Counting personal pronouns 
def syllable_count(text):
    syllables = 0
    # list1 = list([word for word in text if word.endswith(("ed", "es"))])
    for i, w in enumerate(text):
      if w.endswith(('ed','es')):
        syllables += 1
    return syllables

In [34]:
df['syllable_count'] = df['article_title'].apply(syllable_count)
df.head(10)

Unnamed: 0,URL_ID,URL,article_title,compound_score,negative_score,neutral_score,positive_score,word_count,avg_sentence_length,complex_word,complex_word_percentage,personal_pronouns,syllable_count
0,37,https://insights.blackcoffer.com/ai-in-healthc...,ai healthcare improve patient outcomes,0.4404,0.0,0.58,0.42,38,0.333333,3,0.078947,0,0
1,38,https://insights.blackcoffer.com/what-if-the-c...,creation taking creator,0.2732,0.0,0.488,0.512,23,0.201754,2,0.086957,0,0
2,39,https://insights.blackcoffer.com/what-jobs-wil...,jobs robots take humans future,0.0,0.0,1.0,0.0,30,0.263158,1,0.033333,0,0
3,40,https://insights.blackcoffer.com/will-machine-...,machine replace human future work,0.0,0.0,1.0,0.0,33,0.289474,3,0.090909,0,0
4,41,https://insights.blackcoffer.com/will-ai-repla...,ai replace us work us,0.0,0.0,1.0,0.0,21,0.184211,1,0.047619,2,0
5,42,https://insights.blackcoffer.com/man-and-machi...,man machines together machines diligent humans...,0.0,0.0,1.0,0.0,57,0.5,3,0.052632,0,0
6,43,https://insights.blackcoffer.com/in-future-or-...,future upcoming years humans machines going wo...,0.0,0.0,1.0,0.0,74,0.649123,3,0.040541,0,0
7,44,https://insights.blackcoffer.com/how-neural-ne...,neural networks applied various areas future,0.0,0.0,1.0,0.0,44,0.385965,4,0.090909,0,0
8,45,https://insights.blackcoffer.com/how-machine-l...,machine learning affect business,0.0,0.0,1.0,0.0,32,0.280702,3,0.09375,0,0
9,46,https://insights.blackcoffer.com/deep-learning...,deep learning impact areas e learning,0.0,0.0,1.0,0.0,37,0.324561,3,0.081081,0,0


In [35]:
def polarity_score (positive_score , negative_score):
  try:
   return (positive_score - negative_score) / ((positive_score + negative_score))
  except:
   return 0

In [36]:
df["polarity_score"] = np.vectorize(polarity_score)(df['positive_score'],df['negative_score'])


In [37]:
def fog_index(averageSentenceLength, percentageComplexWord):
    fogIndex = 0.4 * (averageSentenceLength + percentageComplexWord)
    return fogIndex

In [38]:
df['fog_index'] = fog_index(df['avg_sentence_length'],df['complex_word_percentage'])

In [41]:
df.head()

Unnamed: 0,URL_ID,URL,article_title,compound_score,negative_score,neutral_score,positive_score,word_count,avg_sentence_length,complex_word,complex_word_percentage,personal_pronouns,syllable_count,polarity_score,fog_index
0,37,https://insights.blackcoffer.com/ai-in-healthc...,ai healthcare improve patient outcomes,0.4404,0.0,0.58,0.42,38,0.333333,3,0.078947,0,0,1.0,0.164912
1,38,https://insights.blackcoffer.com/what-if-the-c...,creation taking creator,0.2732,0.0,0.488,0.512,23,0.201754,2,0.086957,0,0,1.0,0.115484
2,39,https://insights.blackcoffer.com/what-jobs-wil...,jobs robots take humans future,0.0,0.0,1.0,0.0,30,0.263158,1,0.033333,0,0,0.0,0.118596
3,40,https://insights.blackcoffer.com/will-machine-...,machine replace human future work,0.0,0.0,1.0,0.0,33,0.289474,3,0.090909,0,0,0.0,0.152153
4,41,https://insights.blackcoffer.com/will-ai-repla...,ai replace us work us,0.0,0.0,1.0,0.0,21,0.184211,1,0.047619,2,0,0.0,0.092732
