-
Notifications
You must be signed in to change notification settings - Fork 0
/
coronavirus_tweets.test.py
155 lines (104 loc) · 5.65 KB
/
coronavirus_tweets.test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# import library
import unittest
import pandas as pd
# Import Test File
import coronavirus_tweets
# create a class
class TestCoronavirusTweets(unittest.TestCase):
def setUp(self):
self.csv = pd.read_csv('data/coronavirus_tweets.csv', encoding='latin-1')
def test_read_csv_3(self):
expected = self.csv
received = coronavirus_tweets.read_csv_3('data/coronavirus_tweets.csv')
self.assertEqual(expected.to_json(), received.to_json())
def test_get_sentiments(self):
expected = ['Neutral', 'Positive', 'Extremely Negative', 'Negative', 'Extremely Positive']
received = coronavirus_tweets.get_sentiments(self.csv)
self.assertEqual(expected, list(received))
def test_second_most_popular_sentiment(self):
expected = 'Negative'
received = coronavirus_tweets.second_most_popular_sentiment(self.csv)
self.assertEqual(expected, received)
def test_date_most_popular_tweets(self):
expected = '25-03-2020'
received = coronavirus_tweets.date_most_popular_tweets(self.csv)
self.assertEqual(expected, received)
def test_lower_case(self):
expected = self.csv.copy()
expected['OriginalTweet'] = expected['OriginalTweet'].str.lower()
received = coronavirus_tweets.lower_case(self.csv)
self.assertEqual(expected.to_json(), received.to_json())
def test_remove_non_alphabetic_chars(self):
expected = self.csv.copy()
expected['OriginalTweet'] = expected['OriginalTweet'].str.replace('[^a-zA-Z\s]+', ' ', regex=True)
received = coronavirus_tweets.remove_non_alphabetic_chars(self.csv)
self.assertEqual(expected.to_json(), received.to_json())
def test_remove_multiple_consecutive_whitespaces(self):
expected = self.csv.copy()
expected['OriginalTweet'] = expected['OriginalTweet'].str.replace('\s+', ' ', regex=True)
received = coronavirus_tweets.remove_multiple_consecutive_whitespaces(self.csv)
self.assertEqual(expected.to_json(), received.to_json())
def test_tokenize(self):
expected = self.csv.copy()
expected['OriginalTweet'] = expected['OriginalTweet'].str.split()
received = coronavirus_tweets.tokenize(self.csv)
self.assertEqual(expected.to_json(), received.to_json())
def test_count_words_with_repetitions(self):
expected = 1350959
input_args = coronavirus_tweets.lower_case(self.csv)
input_args = coronavirus_tweets.remove_non_alphabetic_chars(input_args)
input_args = coronavirus_tweets.remove_multiple_consecutive_whitespaces(input_args)
input_args = coronavirus_tweets.tokenize(input_args)
received = coronavirus_tweets.count_words_with_repetitions(input_args)
self.assertEqual(expected, received)
def test_count_words_without_repetitions(self):
expected = 80071
input_args = coronavirus_tweets.lower_case(self.csv)
input_args = coronavirus_tweets.remove_non_alphabetic_chars(input_args)
input_args = coronavirus_tweets.remove_multiple_consecutive_whitespaces(input_args)
input_args = coronavirus_tweets.tokenize(input_args)
received = coronavirus_tweets.count_words_without_repetitions(input_args)
self.assertEqual(expected, received)
def test_frequent_words(self):
expected = ['the', 'to', 't', 'co', 'and', 'https', 'covid', 'of', 'a', 'in']
input_args = coronavirus_tweets.lower_case(self.csv)
input_args = coronavirus_tweets.remove_non_alphabetic_chars(input_args)
input_args = coronavirus_tweets.remove_multiple_consecutive_whitespaces(input_args)
input_args = coronavirus_tweets.tokenize(input_args)
received = coronavirus_tweets.frequent_words(input_args, 10)
self.assertEqual(expected, received)
def test_remove_stopwords(self):
expected_length = 11
expected_values = ['menyrbie', 'phil', 'gahan', 'chrisitv', 'https', 'ifz', 'fan', 'https', 'ghgfzcc', 'https', 'nlzdxno']
input_args = coronavirus_tweets.lower_case(self.csv)
input_args = coronavirus_tweets.remove_non_alphabetic_chars(input_args)
input_args = coronavirus_tweets.remove_multiple_consecutive_whitespaces(input_args)
input_args = coronavirus_tweets.tokenize(input_args)
received = coronavirus_tweets.remove_stop_words(input_args)
received_first_row = received['OriginalTweet'].iloc[0]
self.assertEqual(len(received_first_row), expected_length)
self.assertEqual(received_first_row, expected_values)
def test_stemming(self):
expected_value = ['menyrbi', 'phil', 'gahan', 'chrisitv', 'http', 'ifz', 'fan', 'http', 'ghgfzcc', 'http', 'nlzdxno']
expected_length = 11
input_args = coronavirus_tweets.lower_case(self.csv)
input_args = coronavirus_tweets.remove_non_alphabetic_chars(input_args)
input_args = coronavirus_tweets.remove_multiple_consecutive_whitespaces(input_args)
input_args = coronavirus_tweets.tokenize(input_args)
input_args = coronavirus_tweets.remove_stop_words(input_args)
received = coronavirus_tweets.stemming(input_args)
received_value = received['OriginalTweet'].iloc[0]
self.assertEqual(expected_value, received_value)
self.assertEqual(len(received_value), expected_length)
def test_mnb_predict(self):
expected = len(self.csv["Sentiment"])
received = coronavirus_tweets.mnb_predict(self.csv)
self.assertEqual(expected, len(received))
def test_mnb_accuracy(self):
expected = 0.997
predict_values = coronavirus_tweets.mnb_predict(self.csv)
received = coronavirus_tweets.mnb_accuracy(predict_values, self.csv["Sentiment"].values)
self.assertEqual(expected, received)
# driver code
if __name__ == '__main__':
unittest.main()