Skip to content

Commit

Permalink
Merge pull request #35 from rguptauw/string_fix
Browse files Browse the repository at this point in the history
string fix
  • Loading branch information
vivanvish committed Jun 6, 2018
2 parents 5ea9765 + 479cd8e commit 79dfb3f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 16 deletions.
5 changes: 3 additions & 2 deletions pybcoin/SentimentAnalyzer/sentiment_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, config):
self.logger = logging.getLogger('simpleExample')
self.analyzer = SentimentIntensityAnalyzer()
self.path = config['Sentiment']['text_csv_path']
self.write_format = '_sentiment.csv'

def __sentiment_count__(self, grouped_Data):
"""
Expand Down Expand Up @@ -61,6 +62,6 @@ def sentiment_scorer(self, keyword='tweets'):

text_df = text_df.groupby(['Date']).apply(
self.__sentiment_count__).reset_index().drop(['level_1'], axis=1)
hist_sent = pd.read_csv(self.path + keyword + '_sentiment.csv')
hist_sent = pd.read_csv(self.path + keyword + self.write_format)
hist_sent = hist_sent.append(text_df)
hist_sent.to_csv(self.path + keyword + '_sentiment.csv', index=False)
hist_sent.to_csv(self.path + keyword + self.write_format, index=False)
18 changes: 11 additions & 7 deletions pybcoin/test/test_sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def setUp(self):
self.text = ['neo', 'bitcoin', 'bitcoin', 'fork']
self.date = ['22/05/2016']
self.path = './pybcoin/test/data/'
self.tweets_path = 'tweets_sentiment.csv'
self.image_path = './pybcoin/static/date_22-05-2016.png'
self.static_path = './pybcoin/static/'
self.image_name = 'date_22-05-2016.png'

"""
Test function for text sentiments.
Expand All @@ -34,10 +38,10 @@ def test_sentiment_score(self):
"""
self.collector.sentiment_scorer(keyword='tweets')
test_sentiment = pd.read_csv(self.collector.path +
'tweets_sentiment.csv')
self.tweets_path)
flag = test_sentiment.empty
self.assertEqual(flag, False)
os.remove('./pybcoin/static/date_22-05-2016.png')
os.remove(self.image_path)

def test_sentiment_column_names(self):

Expand All @@ -46,17 +50,17 @@ def test_sentiment_column_names(self):
"""
self.collector.sentiment_scorer(keyword='tweets')
test_col_names = pd.read_csv(self.collector.path +
'tweets_sentiment.csv').columns
self.tweets_path).columns
self.assertEqual(sorted(test_col_names),
['Date', 'Negative', 'Positive'])
os.remove('./pybcoin/static/date_22-05-2016.png')
os.remove(self.image_path)

def test_sentiment_wordcloud(self):

"""
Checks that wordcloud is generated.
"""
create_word_cloud(self.text, self.date)
self.assertEqual(os.path.isfile('./pybcoin/static/' +
'date_22-05-2016.png'), True)
os.remove('./pybcoin/static/date_22-05-2016.png')
self.assertEqual(os.path.isfile(self.static_path +
self.image_name), True)
os.remove(self.image_path)
15 changes: 8 additions & 7 deletions pybcoin/utils/pre_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ def remove_urls(text):
: param path
: return text(str)
"""
url_pattern = r'(?:(?:http|https|https://www|https://|http://|):\/\/)' \
'?([-a-zA-Z0-9.]{2,256}\.[a-z]{2,4})\b(?:\/[-a-zA-Z0-9@:' \
'%_\+.~#?&//=]*)?'
text = re.sub(r"http\S+", "", text)
text = re.sub(r'(?:(?:http|https|https://www|https://|http://|):\/\/)'
'?([-a-zA-Z0-9.]{2,256}\.[a-z]{2,4})\b(?:\/[-a-zA-Z0-9@:'
'%_\+.~#?&//=]*)?', "", text, flags=re.MULTILINE)
text = re.sub(url_pattern, "", text, flags=re.MULTILINE)
text = '\n'.join([a for a in text.split("\n") if a.strip()])
return text

Expand Down Expand Up @@ -100,18 +101,18 @@ def create_word_cloud(text, date_generated):
: param text
: return png(matplotlib image)
"""
word_cloud_path = './pybcoin/static/date_'
text = ' '.join(text)
wordcloud = WordCloud(stopwords=STOPWORDS,
background_color='black',
background_color='white',
width=2500,
height=2000
).generate(text)
plt.figure(1, figsize=(13, 13))
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('./pybcoin/static/date_' +
date_generated[0].replace('/', '-') +
'.png', facecolor='k', bbox_inches='tight')
plt.savefig(word_cloud_path + date_generated[0].replace('/', '-') +
'.png', bbox_inches='tight')


def pre_process_data(path):
Expand Down

0 comments on commit 79dfb3f

Please sign in to comment.