<a href="https://colab.research.google.com/github/nakhimchea/sentiment_analysis_ipynb/blob/main/SmartSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# *Import Libraries*

**SmartSA: Analysis of Tweeter and Apply to Strategy**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

import pandas

**Twitter Scraping**

In [None]:
import snscrape.modules.twitter as twitter

# *Loading RoBERTa model*

**Get Model**

In [None]:
RoBERTa = 'cardiffnlp/twitter-roberta-base-sentiment'
model = AutoModelForSequenceClassification.from_pretrained(RoBERTa)

**Sparse Labels**

In [None]:
labels = ['Negative', 'Neutral', 'Positive']

# *Getting Data from Social Network (Twitter)*

**Search Query**

In [None]:
query = '(from:elonmusk) until:2020-01-01 since:2010-01-01'
limit = 3

**Query Tweets and Preprocessing**

In [None]:
tweets = []
for tweet in twitter.TwitterSearchScraper(query).get_items():
  print(vars(tweet))
  
  # preprocessing tweets
  tweetWords = []
  for word in tweet.split(' '):
    if word.startswith('@') and len(word) > 1:
      word = '@user'
    elif word.startwith('http'):
      word = 'http'
    tweetWords.append(word)

  tweetContent = ' '.join(tweetWords)

  if len(tweets) == limit:
    break
  else:
    tweets.append([tweet.date, tweet.username, tweetContent])

dataframe = pandas.DataFrame(tweets, columns=['Date', 'User', 'Tweet'])
print(dataframe)

**Save Tweets**

In [None]:
dataframe.to_csv('tweets.csv')

# *Sentiment Analysis*

**Tokenize Tweets**

In [None]:
tokenizer = AutoTokenizer.from_pretrained(RoBERTa)
encodedTweet = tokenizer(tweetContent, return_tensors='pt')

**Tweet Classifications**

In [None]:
roBERTaBottleNeck = model(**encodedTweet)
probabilities = softmax(roBERTaBottleNeck[0][0].detach().numpy)

**Classification Table**

In [None]:
target = []
for index in range(len(probabilities)):
  if probabilities[index] > 0.4:
    target.append([tweetContent, labels[index]])

targetTable = pandas.DataFrame(target, columns=['Tweet', 'Annotation'])
print(targetTable)