## Installing the dependencies

!pip install textblob

!python -m textblob.download_corpora

### Importing the necessary packages

In [1]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

from textblob import TextBlob
import pandas as pd
import re

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\GM\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


### Reading the csv file and droping the Nan values and reseting index values

In [2]:
df = pd.read_csv('Final_HIV_without_Nan.csv')
df.head()

Unnamed: 0,Headlines,Descriptions,Authors,Published_Dates,Publication,Articles,category,Keywords,Summaries,Source_URLs
0,Drug â€˜can greatly reduce risk of HIV infecti...,The largest online news service for Afghanistan,Khaama Press,25-11-2010,The Khaama Press News Agency,A drug used to treat HIV-positive patients may...,{},"['drug', 'reduce', 'infection', 'results', 'us...",A drug used to treat HIV-positive patients may...,https://www.khaama.com/drug-can-greatly-reduce...
1,Afghanistan recorded 1367 HIV/AIDS positive ca...,The largest online news service for Afghanistan,Ahmad Shah Ghanizada,06-07-2013,The Khaama Press News Agency,"At least 1,367 positive cases of Human immunod...",{},"['afghanistan', 'paikan', 'positive', '2011', ...","At least 1,367 positive cases of Human immunod...",https://www.khaama.com/afghanistan-recorded-13...
2,Afghanistan sees 38 percent increase in HIV/AI...,The largest online news service for Afghanistan,Ahmad Shah Ghanizada,01-12-2013,The Khaama Press News Agency,Officials in the ministry of public health of ...,{},"['afghanistan', 'positive', 'sees', 'virus', '...",Officials in the ministry of public health of ...,https://www.khaama.com/afghanistan-sees-38-per...
3,"Over 1,200 people live with HIV in Afghanistan...",The largest online news service for Afghanistan,Sayed Jawad,01-12-2012,The Khaama Press News Agency,"Today marks World AIDS Day, a time for the glo...",{},"['immunodeficiency', '1200', 'aids', 'afghanis...","Today marks World AIDS Day, a time for the glo...",https://www.khaama.com/over-1200-people-live-w...
4,Aishwarya Rai Bachchan named UN HIV/AIDS envoy,The largest online news service for Afghanistan,Sajad,25-09-2012,The Khaama Press News Agency,The United Nations Monday announced the additi...,{},"['infections', 'mother', 'aishwarya', 'rai', '...",The United Nations Monday announced the additi...,https://www.khaama.com/aishwarya-rai-bachchan-...


### Cleaning the extracted articles 

In [3]:
df['Articles'] = [' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", df['Articles'][i]).split()) for i in range(len(df['Articles']))]

### Sentiment for each cleaned articles

In [4]:
sentiment = []
for i in range(len(df['Articles'])):
    analysis = TextBlob(df['Articles'][i]) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        sentiment.append('positive')
    elif analysis.sentiment.polarity == 0: 
        sentiment.append('neutral')
    else: 
        sentiment.append('negative')
        
df['Sentiment'] = sentiment

In [5]:
sia = SentimentIntensityAnalyzer()
df['Sentiment_Score'] = [sia.polarity_scores(df['Articles'][i])['compound'] for i in range(len(df['Articles']))]

### Droping the unnecessary columns to display the Article and Sentiment Columns

In [6]:
data = df.drop(['Headlines', 'Descriptions', 'Authors', 'Published_Dates', 'Publication', 'Keywords', 'Summaries'], axis=1)
data

Unnamed: 0,Articles,category,Source_URLs,Sentiment,Sentiment_Score
0,A drug used to treat HIV positive patients may...,{},https://www.khaama.com/drug-can-greatly-reduce...,positive,0.9765
1,At least 1 367 positive cases of Human immunod...,{},https://www.khaama.com/afghanistan-recorded-13...,positive,0.9153
2,Officials in the ministry of public health of ...,{},https://www.khaama.com/afghanistan-sees-38-per...,positive,0.9674
3,Today marks World AIDS Day a time for the glob...,{},https://www.khaama.com/over-1200-people-live-w...,negative,-0.9468
4,The United Nations Monday announced the additi...,{},https://www.khaama.com/aishwarya-rai-bachchan-...,positive,0.8858
5,Saturday December 03 2011 Head of the Afghan M...,{},https://www.khaama.com/concerns-over-hiv-outsp...,positive,0.9843
6,The Ministry of Public of Health of Afghanista...,{},https://www.khaama.com/hivaids-infection-on-th...,positive,-0.9393
7,The Egypt s military has claimed that it has i...,{},https://www.khaama.com/egypts-army-claims-it-h...,positive,-0.5267
8,The Afghan Public Health Minister Ferozuddin F...,{},https://www.khaama.com/afghan-minister-narrowl...,negative,-0.8826
9,Local security officials in northern Baghlan p...,{},https://www.khaama.com/drugs-and-weapons-seize...,negative,-0.9349


### Calculating the total percent of the polarity for each article

In [7]:
# picking positive tweets from articles
ptweets = [df['Articles'][i] for i in range(len(df)) if df['Sentiment'][i] == 'positive'] 
# percentage of positive tweets 
print("Positive articles percentage: {} %".format(100*len(ptweets)/len(df['Articles'])))
# picking negative tweets from articles
ntweets = [df['Articles'][i] for i in range(len(df)) if df['Sentiment'][i] == 'negative'] 
# percentage of negative articles 
print("Negative articles percentage: {} %".format(100*len(ntweets)/len(df['Articles']))) 
# percentage of neutral articles
print("Neutral articles percentage: {} %".format(100*(len(df['Articles']) - len(ntweets) - len(ptweets))/len(df['Articles']))) 

Positive articles percentage: 92.93103448275862 %
Negative articles percentage: 6.551724137931035 %
Neutral articles percentage: 0.5172413793103449 %
