# Twitter Trading Strategy
Letting Tweet Sentiment determine buy, sell, or holds.

In [1]:
# Libraries
import twint
import nest_asyncio
nest_asyncio.apply()
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from datetime import datetime, timedelta
import plotly.express as px
import yfinance as yf

# Helper Functions
Getting prices and Tweets

## Twint Query Function

In [None]:
def getTweets(search_term, until, limit=20):
    """
    Configures Twint and returns a dataframe of tweets for a specific day.
    """
    # Configuring Twint for search
    c = twint.Config()

    # The limit of tweets to retrieve
    c.Limit = limit

    # Search term
    c.Search = search_term

    # Removing retweets
    c.Filter_retweets = True
    
    # Popular tweets
    c.Popular_tweets = True

    # Lowercasing tweets
    c.Lowercase = True

    # English only
    c.Lang = 'en'

    # Tweets until a specified date
    c.Until = until + " 00:00:00"
    
    # Making the results pandas friendly
    c.Pandas = True
    
    # Stopping print in terminal
    c.Hide_output = True

    # Searching
    twint.run.Search(c)
    
    # Assigning the DF
    df = twint.storage.panda.Tweets_df
    
    # Returning an empty DF if no tweets were found
    if len(df)<=0:
        return pd.DataFrame()
    
    # Formatting the date
    df['date'] = df['date'].apply(lambda x: x.split(" ")[0])
    
    return df

## Twint over Time Function

In [None]:
def tweetByDay(start, end, df, search, limit=20):
    """
    Runs the twint query everyday between the given dates and returns
    the total dataframe.
    """
    # Finishing the recursive loop
    if start==end:
        # Removing any potential duplicates
        df = df.drop_duplicates(subset="id")
        print(len(df))
        return df
    
    # Appending the new set of tweets for the day
    tweet_df = getTweets(search, end, limit)
    
    # Running the query a few more times in case twint missed some tweets
    run = 0 
    
    while len(tweet_df)==0 or run<=2:
        # Running query again
        tweet_df = getTweets(search, end, limit)
        
        # Counting how many times it ran
        run += 1
    
    # Adding the new tweets
    df = df.append(tweet_df, ignore_index=True)
    
    # Updating the new end date
    new_end = (datetime.strptime(end, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
    
    # Printing scraping status
    print(f"\t{len(df)} Total Tweets collected as of {new_end}\t")
    
    # Running the function again
    return tweetByDay(start, new_end, df, search)

## Sentiment Function

In [None]:
def getSentiment(df, measurement="compound"):
    """
    Given a DF of tweets, analyzes the tweets and returns a new DF
    of sentiment scores based on the given measurement.
    Accepted sentiment measurements: ["pos", "neg", "neu", "compound"]
    """

    # Sentiment Analyzer
    sia = SentimentIntensityAnalyzer()

    # Getting the sentiment score
    df['sentiment'] = df['tweet'].apply(lambda x: sia.polarity_scores(x)[measurement])

    # Creating a DF with the average sentiment score each day
    sent_df = df.groupby('date')['sentiment'].mean().reset_index()
    
    # Converting the dates to datetime
    sent_df['date'] = sent_df['date'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
    
    return sent_df

## Price Data Function

In [None]:
def getStockPrices(ticker, start, end):
    """
    Gets the historical daily prices between two dates. Scaling the prices based on a
    given sentiment dataframe.
    """
    # Setting the stock
    stock = yf.Ticker(ticker)

    # Getting historical prices
    stock_df = stock.history(start=end, end=start, interval="1d")[['Close']]
        
    # Getting the daily percent returns
    stock_df = stock_df.pct_change(1).dropna()
    
    # Some reformatting
    stock_df = stock_df.reset_index().rename(
        columns={
            "Date": "date",
            "Close": "returns"
        }
    )
    
    return stock_df

## Sentiment and Price Function

In [None]:
def sentimentAndPrice(ticker, start, end, numtweets=20):
    """
    Visually compares sentiment with the closing price of a given stock ticker.
    """
    # Creating a DF that contains daily tweets between two dates
    df = tweetByDay(start, end, pd.DataFrame(), search="$"+ticker, limit=numtweets)
        
    # Analyzing the sentiment of each tweet
    sent_df = getSentiment(
        df, 
        measurement='compound'
    )
    
    # Getting stock price history
    stock_df = getStockPrices(
        ticker, 
        start, 
        end
    )
    
    # Merging the two DF
    comb_df = sent_df.merge(stock_df, how='outer', sort=True)
    
    # Shifting the sentiment scores 1 day to compensate for lookahead bias
    comb_df['sentiment'] = comb_df['sentiment'].shift(1)
    
    # Scaling the returns
#     scaler = MinMaxScaler(
#         feature_range=(
#             comb_df['sentiment'].min(),
#             comb_df['sentiment'].max()
#         )
#     )

#     comb_df[['returns']] = scaler.fit_transform(comb_df[['returns']])
        
    # How often sentiment matched return

    # Dropping NAs so they are not compared
    drop_df = comb_df.dropna()

    # Comparing matches
    match = (drop_df['sentiment'].apply(lambda x: x>0)==drop_df['returns'].apply(lambda x: x>0))

    # Counting instances where they match
    match = match.value_counts().rename({False: "Didn't predict return",
                                         True: "Successfully predicted return"}).to_frame()
    
    # Visualizing matches in sentiment and return
    fig = px.bar(
        match,
        x=0,
        y=match.index,
        color=match.index,
        title="Instances when Sentiment predicts Return",
        labels={"index": "Prediction",
                "0": "Count"}
    )

    fig.show()
    
        
    # Visualizing the sentiment and price
    fig = px.bar(
        comb_df,
        x='date',
        y=['returns', 'sentiment'],
        barmode='group',
        title=f"Returns & Sentiment over Time for {ticker}"
    )
    
    return fig.show()