# Sentiment Analysis on Tweets for Stocks

In [24]:
# Libraries
import twint
import nest_asyncio
nest_asyncio.apply()
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from datetime import datetime, timedelta

# Gathering Tweets

## Function for Twint

In [18]:
def getTweets(search_term, date, limit):
    """
    Configures Twint and returns a dataframe of the results.
    """
    # Configuring Twint for search
    c = twint.Config()

    # The limit of tweets to retrieve
    c.Limit = limit

    # Count of the number of tweets fetched
    c.Count = True

    # Search term
    c.Search = search_term

    # Removing retweets
    c.Filter_retweets = True

    # Getting popular tweets
    c.Popular_tweets = True

    # Lowercasing tweets
    c.Lowercase = True

    # English only
    c.Lang ='en'

    # Excluding tweets with links
    c.Links ='exclude'

    # Tweets until a specified date
    c.Until = date

    # Making the results pandas friendly
    c.Pandas = True
    
    # Stopping print in terminal
    c.Hide_output = True

    # Searching
    twint.run.Search(c)

    # Assigning the DF
    df = twint.storage.panda.Tweets_df
    
    # Filtering out other cashtag mentions
    df = df[df['cashtags'].apply(lambda x: len(x)==1)]
    
    return df

## Using Twint
Tweet retrieval over a period of time.

In [19]:
def tweetByDay(start, stop, df):
    """
    Gathers a set amount of tweets daily between two dates and returns a dataframe.
    """
    # Finishing the recursive loop
    if start==stop:
        return df
    
    # Appending the new set of tweets for the day
    df = df.append(getTweets("$BBIG", start, 20), ignore_index=True)
    
    # The new start date
    new_start = (datetime.strptime(start, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")
    
    print(new_start)
    
    return tweetByDay(new_start, stop, df)

In [20]:
# Getting tweets daily
df = tweetByDay("2021-09-01", "2021-08-29", pd.DataFrame())

[+] Finished: Successfully collected 20 Tweets.
2021-08-31
[+] Finished: Successfully collected 20 Tweets.
2021-08-30
[+] Finished: Successfully collected 20 Tweets.
2021-08-29


# Sentiment Analysis

In [34]:
# Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

df['tweet'].apply(lambda x: sia.polarity_scores(x)['compound'])

0     0.6486
1     0.5399
2    -0.5871
3     0.0000
4     0.0000
5     0.0000
6     0.2500
7    -0.2732
8    -0.3818
9     0.4898
10    0.0000
11    0.0000
12   -0.4404
13    0.7712
14    0.5413
15    0.0000
16   -0.0258
17    0.0000
18    0.2732
19   -0.3947
20   -0.4019
21   -0.7184
22    0.0000
23    0.9068
24    0.3400
25    0.3612
26   -0.4767
27   -0.1027
28    0.0000
29    0.3612
30    0.0000
31    0.0000
32    0.2120
33    0.4404
34    0.2023
35    0.1531
36    0.0000
37    0.8625
38    0.7430
Name: tweet, dtype: float64