-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
68 lines (52 loc) · 1.43 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download(["stopwords", "names"])
nltk.download('vader_lexicon')
stopwords = nltk.corpus.stopwords.words("english")
qwords = [
'who', 'what', 'when', 'where', 'why', 'how', 'is', 'can', 'does', 'do'
]
df = pd.read_csv('AirlineAmended2.csv')
sia = SentimentIntensityAnalyzer()
unwanted = nltk.corpus.stopwords.words("english")
unwanted.extend([w.lower() for w in nltk.corpus.names.words()])
def remove_unwanted(text):
text_words = text.split(' ')
text_words = [t.lower() for t in text_words]
for s in stopwords:
# print(s, text_words)
if (s in text_words):
text_words.remove(s)
return ' '.join(text_words)
tweets = df['text']
score_col = []
sentiment_col = []
type_col = []
airline_col = []
for f in tweets:
f = remove_unwanted(f)
# print(f)
output = sia.polarity_scores(f)['compound']
# print(output)
score_col.append(output)
if (output > 0.5):
sentiment = 'positive'
elif (output < -0.5):
sentiment = 'negative'
else:
sentiment = 'neutral'
sentiment_col.append(sentiment)
if ('?' in f) and (any(qwords in f for qwords in qwords)):
senttype = 'question'
else:
senttype = ''
type_col.append(senttype)
airline = f.split()[0]
airline_col.append(airline)
df['Compound'] = score_col
df['Sentiment'] = sentiment_col
df['Airplane'] = airline_col
df['Question Tag'] = type_col
df.to_csv('AirlineResult.csv')