## Installing the dependencies

!pip install textblob

!python -m textblob.download_corpora

### Importing the necessary packages

In [1]:
from textblob import TextBlob
import pandas as pd
import re

### Reading the csv file and droping the Nan values and reseting index values

In [2]:
df = pd.read_csv('Final.csv').dropna().reset_index(drop=True)
df.head()

Unnamed: 0,Headlines,Descriptions,Authors,Published_Dates,Publication,Articles,category,Keywords,Summaries,Source_URLs
0,BOOZE RUINS LIVES!,â€œTHE wake-up call for me should have been th...,Jabu Kumalo,2018-08-15T14:30:02.000Z,DailySun,â€œTHE wake-up call for me should have been th...,{},"['aa', 'booze', 'hit', 'lives', 'decided', 'in...",He told Daily Sun he sometimes couldnâ€™t beli...,https://www.dailysun.co.za/News/National/booze...
1,"ALCOHOLICS, IT'S NEVER TOO LATE!",IF YOU have a drinking problem or you know som...,Jabu Kumalo,2017-10-11T15:30:03.000Z,DailySun,IF YOU have a drinking problem or you know som...,{},"['west', 'alcoholics', 'visit', 'thought', 'la...",IF YOU have a drinking problem or you know som...,https://www.dailysun.co.za/News/National/alcoh...
2,NO BOOZE FOR THESE TWO!,SUNDAY was a joyful day as a husband and wife ...,Jabu Kumalo,2018-10-03T18:00:10.000Z,DailySun,SUNDAY was a joyful day as a husband and wife ...,{},"['anonymous', 'times', 'alcoholics', 'booze', ...",SUNDAY was a joyful day as a husband and wife ...,https://www.dailysun.co.za/News/National/no-bo...
3,A SEASON TO SOBER UP!,MZANSI is known as a nation of boozers.,Sifiso Jimta,2018-09-05T11:30:02.000Z,DailySun,MZANSI is known as a nation of boozers.To give...,{},"['season', 'janet', 'soberspringchallenge', 'i...",The organisation has launched the #SoberSpring...,https://www.dailysun.co.za/News/National/a-sea...
4,NAIR MAY GO FOR MENTAL HEALTH CHECK!,HE called President Cyril Ramaphosa the k-word...,Sun Reporter,2018-10-02T09:34:03.000Z,DailySun,HE called President Cyril Ramaphosa the k-word...,{},"['surgeon', 'district', 'nairs', 'fort', 'napi...",Govender argued that Nair should be treated as...,https://www.dailysun.co.za/News/National/nair-...


### Cleaning the extracted articles 

In [3]:
df['clean_news'] = [' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", df['Articles'][i]).split()) for i in range(len(df['Articles']))]

### Sentiment for each cleaned articles

In [4]:
sentiment = []
for i in range(len(df['clean_news'])):
    analysis = TextBlob(df['clean_news'][i]) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        sentiment.append('positive')
    elif analysis.sentiment.polarity == 0: 
        sentiment.append('neutral')
    else: 
        sentiment.append('negative')
        
df['sentiment'] = sentiment

### Droping the unnecessary columns 

In [5]:
data = df.drop(['Headlines', 'Descriptions', 'Authors', 'Published_Dates', 'Articles', 'Keywords', 'Summaries', 'Publication', 'category', 'Source_URLs'], axis=1)
data

Unnamed: 0,clean_news,sentiment
0,THE wake up call for me should have been the d...,positive
1,IF YOU have a drinking problem or you know som...,positive
2,SUNDAY was a joyful day as a husband and wife ...,positive
3,MZANSI is known as a nation of boozers To give...,positive
4,HE called President Cyril Ramaphosa the k word...,positive
5,HE STARTED the race in the middle of the night...,positive
6,ORLANDO PIRATES players allegedly escaped from...,negative
7,Rumors Slipping into prostitution Safe spaces ...,positive
8,motivating talk The AA members explained the p...,positive
9,Dear Bachi I am a 34 year old health professio...,positive


### Calculating the total percent of the polarity for each article

In [6]:
# picking positive tweets from articles
ptweets = [df['clean_news'][i] for i in range(len(df)) if df['sentiment'][i] == 'positive'] 
# percentage of positive tweets 
print("Positive articles percentage: {} %".format(100*len(ptweets)/len(df['clean_news'])))
# picking negative tweets from articles
ntweets = [df['clean_news'][i] for i in range(len(df)) if df['sentiment'][i] == 'negative'] 
# percentage of negative articles 
print("Negative articles percentage: {} %".format(100*len(ntweets)/len(df['clean_news']))) 
# percentage of neutral articles
print("Neutral articles percentage: {} %".format(100*(len(df['clean_news']) - len(ntweets) - len(ptweets))/len(df['clean_news']))) 

Positive articles percentage: 75.92592592592592 %
Negative articles percentage: 24.074074074074073 %
Neutral articles percentage: 0.0 %
