## Using Alpha Vantage API to Retrieve Sentiment Scores

In [2]:
import requests

def avg_sentiment(symbol,key):
    '''
    Input -> Ticker symbol and Alpha Vantage API key obtained from Alpha Vantage Site
        Obtain news sentiment data from API url and convert data to a json file.
        Find 'feed' section in the json file and loop through to find the 'overall_sentiment_score'
        associated with each article.
        Add these scores to a list and then find the average value in the list.
    Return -> Average public sentiment score related to a stock if there exists a news feed relating to said stock.
    '''
    url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={symbol}&apikey={key}'
    r = requests.get(url)
    data = r.json()

    sentiment_list=[]
    if ('feed' in data):
        for article in data['feed']:
            new_sentiment=article['overall_sentiment_score']
            sentiment_list.append(new_sentiment)
        return round(sum(sentiment_list)/len(sentiment_list),4)
    else:
        return 0

## Find All S&P 500 Tickers

In [3]:
import pandas as pd
def get_all_tickers():
    '''
    Input -> None
        Scrape Wikipedia S&P 500 page and convert the stock tickers column into a csv file.
    Return -> tickers csv file
    '''
    table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    df = table[0]
    df.to_csv('S&P500-Info.csv')
    df.to_csv("S&P500-Symbols.csv", columns=['Symbol'])
    return df['Symbol']

## Find and Sort Tickers by Sentiment Score

In [4]:
def sort_stocks_by_sentiment(ticks,num_ticks):
    '''
    Input -> list of tickers, how many tickers you want to end up with
        Create a dictionary with a key named 'Ticker' and a key named 'sentiment_scores'.
        Loop through the list of tickers and add the ticker and sentiment score to the dictionary.
        Convert the dictionary to a dataframe and then sort the dataframe in descending order,
        retrieving the tickers for the top few tickers up to num_ticks.
        Convert these obtained results into a list.
    Return -> list of tickers
    
    '''
    tick_sent = {'Ticker':[],'sentiment_scores':[]}
    
    for i in ticks:
        tick_sent['Ticker'].append(i)
        tick_sent['sentiment_scores'].append(avg_sentiment(i))
    ts_df = pd.DataFrame.from_dict(tick_sent)
    ts_df = ts_df.sort_values(by=['sentiment_scores'],ascending=False)[:num_ticks]['Ticker'].tolist()
    return ts_df

# Find Best Trading Strategy For Each Ticker

In [5]:
#import relevant libraries
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("seaborn")

## Breakout Strategy

In [6]:
def breakout(stock, start, end, tolerance, SMA):
    '''
    Input -> ticker for relevant stock, start date, end date, absolute tolerance level before breaking out, list containing the simple moving averages for short and long intervals.
        Retrieve stock data from yfinance and convert Close prices column to a dataframe.
        Create a column in the new data frame for returns if strategy is not applied.
        Create columns for the simple moving average of the Close prices for both short and long intervals using rolling average function
        
        Create a position column which tells us to buy a stock when the short run average is above the tolerance threshold for the long run average and to sell when the short run average is smaller.
        From this column, create a new column called "strategy" which calculates the return under this strategy using the position column.
    Return -> Total Returns, Risk and Dataframe of Cumulative Returns
    '''
    df = yf.download(stock,start=start,end=end)
    data=df.Close.to_frame()
    data["returns"]=np.log(data.Close.div(data.Close.shift(1))) #Use closing prices in present period for the next period
    data["SMA_S"]=data.Close.rolling(int(SMA[0])).mean()
    data["SMA_L"]=data.Close.rolling(int(SMA[1])).mean()
    data.dropna(inplace=True)
    
    #Buy when short interval average increases above upper tolerance on long interval average.
    #Sell when short interval average decreases below lower tolerance on long interval average.
    data["position"]=np.where(data["SMA_S"]>(data["SMA_L"]+tolerance),1,0)
    data["position"]=np.where(data["SMA_S"]<(data["SMA_L"]-tolerance),-1,data["position"])
    data["strategy"]=data["returns"]*data.position.shift(1) #Needs to shift position forward 1 period since returns come in next period
    data.dropna(inplace=True)
    
    #Data for plotting Later
    data["returnsstrategy"]=data["strategy"].cumsum().apply(np.exp)
    
    ret=np.exp(data["strategy"].sum())
    std=data["strategy"].std()*np.sqrt(252)
    
    return ret, std, data

## Long Bias Strategy

In [7]:
def long_bias(stock, start, end, SMA): #(SMA_S,SMA_L)
    '''
    Input -> ticker for relevant stock, start date, end date, list containing the simple moving averages for short and long intervals.
        Retrieve stock data from yfinance and convert Close prices column to a dataframe.
        Create a column in the new data frame for returns if strategy is not applied.
        Create columns for the simple moving average of the Close prices for both short and long intervals using rolling average function
        
        Create a position column tells us to buy a stock if the short run average is higher than the long run average and to hold otherwise.
        From this column, create a new column called "strategy" which calculates the return under this strategy using the position column.
    Return -> Total Returns, Risk and Dataframe of Cumulative Returns
    '''
    df = yf.download(stock,start=start,end=end)
    data=df.Close.to_frame()
    data["returns"]=np.log(data.Close.div(data.Close.shift(1))) #Use closing prices in present period for the next period
    data["SMA_S"]=data.Close.rolling(int(SMA[0])).mean()
    data["SMA_L"]=data.Close.rolling(int(SMA[1])).mean()
    data.dropna(inplace=True)
    
    #Buy when doing well in short interval average.
    #Hold in all other scenarios.
    data["position"]=np.where(data["SMA_S"]>data["SMA_L"],1,0)
    data["strategy"]=data["returns"]*data.position.shift(1) #Needs to shift position forward 1 period since returns come in next period
    data.dropna(inplace=True)
    
    #Data for plotting Later
    data["returnsstrategy"]=data["strategy"].cumsum().apply(np.exp)
    
    ret=np.exp(data["strategy"].sum())
    std=data["strategy"].std()*np.sqrt(252)
    
    return ret,std,data

## Momentum Strategy

In [8]:
def momentum(stock, start, end, SMA):
    '''
    Input -> ticker for relevant stock, start date, end date, list containing the simple moving averages for short and long intervals.
        Retrieve stock data from yfinance and convert Close prices column to a dataframe.
        Create a column in the new data frame for returns if strategy is not applied.
        Create columns for the simple moving average of the Close prices for both short and long intervals using rolling average function
        
        Create a position column tells us to buy a stock if the short run average is higher than the long run average and to sell if the short run average is doing worse.
        From this column, create a new column called "strategy" which calculates the return under this strategy using the position column.
    Return -> Total Returns, Risk and Dataframe of Cumulative Returns
    '''
    df = yf.download(stock,start=start,end=end)
    data=df.Close.to_frame()
    data["returns"]=np.log(data.Close.div(data.Close.shift(1))) #Use closing prices in present period for the next period
    data["SMA_S"]=data.Close.rolling(int(SMA[0])).mean()
    data["SMA_L"]=data.Close.rolling(int(SMA[1])).mean()
    data.dropna(inplace=True)
    
    #Buy when doing better in short interval compared to long interval
    #Sell when doing worse in short interval compared to long interval
    #Hold when doing the same in both intervals
    data["position"]=np.where(data["SMA_S"]>data["SMA_L"],1,0)
    data["position"]=np.where(data["SMA_S"]<data["SMA_L"],-1,data["position"])
    data["strategy"]=data["returns"]*data.position.shift(1) #Needs to shift position forward 1 period since returns come in next period
    data.dropna(inplace=True)
    
    #Data for plotting Later
    data["returnsstrategy"]=data["strategy"].cumsum().apply(np.exp)
    
    ret=np.exp(data["strategy"].sum())
    std=data["strategy"].std()*np.sqrt(252)
    
    return ret, std, data

## Plot Graphs For Stock Returns Under Each Strategy

In [10]:
def plot_data(data1, data2, data3):
    '''
    Input -> Dataframe of cumulative strategic returns for each strategy, e.g. data1 = long_bias(stock, start, end, SMA)[2]
        Plot colour-coded line curves for the cumulative strategic returns for each strategy.
    Return -> None
    '''
    line1=data1["returnsstrategy"].plot(figsize=(12,8),color='g')
    line2=data2["returnsstrategy"].plot(figsize=(12,8),color='r')
    line3=data3["returnsstrategy"].plot(figsize=(12,8),color='b')
    plt.ylabel("Return")
    plt.title("Returns of All Strategies")

    red_patch = mpatches.Patch(color='red', label='Long Bias Strategy')
    green_patch = mpatches.Patch(color='green', label='Break-Out Strategy')
    blue_patch = mpatches.Patch(color='blue', label='Momentum Strategy')
    plt.legend(handles=[red_patch,blue_patch,green_patch])

    plt.show()

## Create Comparison DataFrame

In [None]:
def compare_strat(data1,data2,data3):
    '''
    Input -> Dataframe of cumulative strategic returns for each strategy, e.g. data1 = long_bias(stock, start, end, SMA)[2]
        Create a dataframe with columns for strategy, return and risk.
        Create a dataframe which sorts the previous dataframe by returns (highest to lowest) and obtain the respective strategy.
        Create a dataframe which sorts the previous dataframe by risk (lowest to highest) and obtain the respective strategy.
    Return -> Strategy with highest return, Strategy with lowest risk
    '''
    data = [['Long Bias',data1[0],data1[1]], ['Momentum',data2[0], data2[1]], ['Break Out',data3[0], data3[1]]]

    # Create the pandas DataFrame
    df = pd.DataFrame(data, columns=['Strategy', 'Returns', 'Risk'])
    best_return = df.sort_values(by=['Returns'],ascending=False)['Strategy'][0]
    lowest_risk = df.sort_values(by=['Risk'])['Strategy'][0]
    
    return best_return, lowest_risk