-
Notifications
You must be signed in to change notification settings - Fork 0
/
Movie Sentiment Analysis.py
214 lines (171 loc) · 8.05 KB
/
Movie Sentiment Analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# libraries to include
import re
import tweepy
import matplotlib.pyplot as plt
from tweepy import OAuthHandler
from textblob import TextBlob
import pandas as pd
from youtube import youtube_search
import omdb # to access imdb API
import pandas as pd # for data array handling
from bs4 import BeautifulSoup # for website parsing and scraping (rotten tomatoes)
import requests # for http access
import numpy as np
# making a generic twitter class for sentiment analysis
class TwitterClient(object):
# class constructor
def __init__(self):
# keys and tokens from the developer console
consumer_key = 'nEO5EZpVBNe1NuZ0H0Hpkqae3'
consumer_secret = '6uxcNCxHZWR9X9fji6uRn8LA3s1JTscf2vg89mkCYbkZY4JUTT'
access_token = '298403978-dFLlbaYtt62pA8FlrJUonGI1lLvLGWbsMpMRnOS9'
access_token_secret = 'Dmoo99m4tV4u6lmxEoZvbSO3dyUsMVKGBwQnxWtzzP9uf'
# attempt authentication
try:
# create OAuthHandler object
self.auth = OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
self.auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
self.api = tweepy.API(self.auth)
except:
print("Error: Authentication Failed!")
def clean_tweet(self, tweet):
# utility function to clean tweets by removing hyperlinks, special characters etc using regex
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
# main function to get tweets and parse them
def get_tweet_sentiment(self, tweet):
# create TextBlob object of passed tweet text
analysis = TextBlob(self.clean_tweet(tweet))
# set sentiment
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'
def get_tweets(self, query, count = 10):
# empty list to store parsed tweets
tweets = []
try:
# call twitter api to fetch tweets
fetched_tweets = self.api.search(q = query, count = count)
# parsing tweets one by one
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
# saving sentiment of tweet
parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text)
# appending parsed tweet to tweets list
if tweet.retweet_count > 0:
# if tweet has retweets, ensure that it is appended only once
if parsed_tweet not in tweets:
tweets.append(parsed_tweet)
else:
tweets.append(parsed_tweet)
# return parsed tweets
return tweets
except tweepy.TweepError as e:
# print error (if any)
print("Error : " + str(e))
def youtube()
test = youtube_search("Deadpool 2: The Final Trailer")
df = pd.DataFrame(data=test)
df.head()
df1 = df[['title','viewCount','channelTitle','commentCount','likeCount','dislikeCount','tags','favoriteCount','videoId','channelId','categoryId']]
df1.columns = ['Title','viewCount','channelTitle','commentCount','likeCount','dislikeCount','tags','favoriteCount','videoId','channelId','categoryId']
#assigning column names
df1.head()
#import numpy as np
numeric_dtype = ['viewCount','likeCount','dislikeCount']
for i in numeric_dtype:
df1[i] = df[i].astype(int)
movie = df1[df1['Title']=='Deadpool 2: The Final Trailer']
movie.head()
pos, neg, neut = 0, 0, 0
positive = df1['likeCount']
Negative = df1['dislikeCount']
total = df1['viewCount']
#summing the list
for i in range(len(positive)):
pos += positive[i]
neg += Negative[i]
neut += (total[i] - positive[i] - Negative[i])
total = pos+ neg + neut
pos = 100 * pos / total
neg = 100 * neg / total
neut = 100 * neut / total
slices = [pos + 0.6*neut, neg+ 0.4*neut]
cols = ['gold','red']
feedback = ['Positive Review','Negative Review']
plt.pie(slices, labels = feedback, colors = cols, startangle = 90, shadow = True, explode = (0.1, 0.1), autopct = '%1.1f%%')
plt.axis('equal')
plt.title('Sentiment Pie Chart')
plt.show()
res=[pos+0.6*neut,neg+0.4*neut,pos+neg+neut]
return res
def rotten_tomatoes()
title = "Deadpool 2"
tomato_base_url = 'https://www.rottentomatoes.com/m/'
tomato_url = tomato_base_url + re.sub(':', '', re.sub(' ', '_', title))
soup = BeautifulSoup(requests.get(tomato_url).text) # rotten tomatoes: website parse tree
s = str(soup.find('span', {'class': 'superPageFontColor'}).contents[0])
RT_user_rating = int(re.findall(r'\d+', s)[0])
RT_prof_score = int(min(soup.find('span', {'class': 'meter-value superPageFontColor'}).contents[0]))
print(RT_prof_score)
rt_user_neg = 100 - RT_user_rating
rt_prof_neg = 100 - RT_prof_score
labels = 'user_pos', 'user_neg'
sizes = RT_user_rating, rt_user_neg
cols = ['gold', 'red']
feedback = ['positive reviews', 'negative reviews']
explode = (0.1, 0.1)
plt.pie(sizes, explode=explode, colors=cols, startangle=90, shadow=True)
plt.axis('equal')
plt.title('sentiments')
plt.show()
sizes = RT_prof_score, rt_prof_neg
cols = ['green', 'blue']
feedback = ['positive reviews', 'negative reviews']
explode = (0.1, 0.1)
plt.pie(sizes, explode=explode, colors=cols, startangle=90, shadow=True)
plt.axis('equal')
plt.title('sentiments')
plt.show()
result=[(RT_user_rating+RT_prof_score)/200,(rt_user_neg+RT_prof_neg)/200]
return result
# Main Driver Function
if __name__ == "__main__":
# creating object of TwitterClient class
api = TwitterClient()
# calling function to get tweets
tweets = api.get_tweets(query = 'Deadpool 2', count = 200000000)
# picking positive tweets from tweeats
ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
# picking -ve tweets from tweets
ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
# percentage of +ve tweets
print("Positive Tweets Percentage: {} %".format(100*(len(ptweets))/(len(ntweets) + len(ptweets))))
# percentage of -ve tweets
print("Negative Tweets Percentage: {} %".format(100*len(ntweets)/(len(ptweets) + len(ntweets))))
# making a Pie Chart for the data
positives = 100 * (0.6 * (len(tweets) - len(ptweets) - len(ntweets)) + len(ptweets)) / len(tweets)
negatives = 100 * (0.4 * (len(tweets) - len(ptweets) - len(ntweets)) + len(ntweets)) / len(tweets)
slices = [positives, negatives]
cols = ['g','r']
feedback = ['Positive Review','Negative Review']
plt.pie(slices, labels = feedback, colors = cols, startangle = 90, shadow = True, explode = (0.1, 0.1), autopct = '%1.1f%%')
plt.title('Sentiment Pie Chart')
plt.show()
twitter_result=[positives,negatives,positives+negatives]
youtube_result=youtube()
rotten_tomatoes=rotten_tomatoes()
final_result=twitter_result+youtube_result
final_result_percentage=[final_result[0]/final_result[2],final_result[1]/final_result[2]]
final_percentages=[(final_result_percentage[0]+rotten_tomatoes[0])/200,(final_result_percentage[1]+rotten_tomatoes[1])/200]
feedback = ['Final Positive Review','Final Negative Review']
plt.pie(final_percentages, labels = feedback, colors = cols, startangle = 90, shadow = True, explode = (0.1, 0.1), autopct = '%1.1f%%')
plt.title('Sentiment Pie Chart')
plt.show()