# Algorthrimic Trading with Sentimental Analysis

There has been on going research in the area of Sentimental Analysis and it's application to stock returns. 

In [None]:
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline.data.sentdex import sentiment
import blaze
import datetime as dt
from quantopian.pipeline.data.morningstar import operation_ratios
from quantopian.interactive.data.sentdex import sentiment
from datetime import timedelta
from pytz import timezone
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from collections import Counter
import talib
import statsmodels.api as sm
import numpy 
import pandas as pd
import alphalens

# Data for Sentimental Analysis

Looking at sample data from the the sample set wee can pull it to see how the sample data is avaliable for a specific company such as Apple. This is to look at the sample data that we have for the sentiment signals for specific companies

In [None]:
AAPL_sentiment.head()

In [None]:
AAPL_sentiment['sentiment_signal'].plot()

# Pre-Process the Data for Sentiments

Running a PCA on the sentiments to determine a best fit line after making an algorithm with

In [None]:
from quantopian.pipeline.data.sentdex import sentiment

In [None]:
def make_pipeline():
    sentiment_factor = sentiment.sentiment_signal.latest
    
    universe = (Q1500US() & sentiment_factor.notnull())
    
    pipe = Pipeline(columns ={'Sentiment':sentiment_factor, 
                              'Shorts': (sentiment_factor <=-2),
                              'Longs': (sentiment_factor >=4)}, 
                    screen = universe)
    
    return pipe

In [None]:
result = run_pipeline(make_pipeline(), start_date = '2014-01-01' , end_date = '2015-12-31')
result.describe()

In [None]:
import alphalens
""""
In order to see what values of alpha and expected returns will be given the training time from April 1st 2014 to December 31s, 2015 
""""
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=result['Sentiment'], 
                                                                   prices=pricing,
                                                                   quantiles=2,
                                                                   periods = (3,10,30))
alphalens.tears.create_full_tear_sheet(factor_data)

In [None]:
#Running backtest on the sentiment with long/ Short strategy to see how it fairs on the market fromJanuary 1st 2015 to January 2018
bt = get_backtest('5af254a1d1362c4454073df7')
bt.create_full_tear_sheet()

In [None]:
assets = result.index.levels[1].unique()
pricing = get_pricing(assets, start_date = '2014-01-01' , end_date = '2015-12-31', fields = 'open_price' )
len(assets)

In [None]:
pd.crosstab(result["Longs"],result["Sentiment"],margins=True)

In [None]:
result['Sentiment'].hist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=1)
pca.fit(X)
X_pca = pca.transform(X)
print("original shape:   ", X.shape)
print("transformed shape:", X_pca.shape)

In [None]:
X_new = pca.inverse_transform(X_pca)
plt.scatter(X, Y, alpha=0.4)

# Testing alpha for Revenue Growth

In [None]:
def make_pipeline():
    #Testing results for operation_ratios.revenue_growth

    
    testing_factor = operation_ratios.revenue_growth.latest
    universe = (Q1500US() & testing_factor.notnull())
    testing_factor = testing_factor.rank(mask=universe, method ='average')
    
    pipe = Pipeline(columns ={'testing_factor':testing_factor}, screen = universe)
    
    return pipe

result = run_pipeline(make_pipeline(), start_date = '2015-01-01' , end_date = '2016-01-01')
result.head()

In [None]:
assets = result.index.levels[1].unique()
pricing = get_pricing(assets, start_date = '2014-01-01' , end_date = '2015-12-31', fields = 'open_price' )
len(assets)

In [None]:
import alphalens

factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=result['testing_factor'], 
                                                                   prices=pricing,
                                                                   quantiles=2,
                                                                   periods = (3,10,30))
alphalens.tears.create_full_tear_sheet(factor_data)

# Testing alpha for Operations Margin

In [None]:
def make_pipeline():
    #Testing results for operation_ratios.Operations Margin

    
    testing_factor = operation_ratios.revenue_growth.latest
    universe = (Q1500US() & testing_factor.notnull())
    testing_factor = testing_factor.rank(mask=universe, method ='average')
    
    pipe = Pipeline(columns ={'testing_factor':testing_factor}, screen = universe)
    
    return pipe

result = run_pipeline(make_pipeline(), start_date = '2015-01-01' , end_date = '2016-01-01')
result.head()

In [None]:
assets = result.index.levels[1].unique()
pricing = get_pricing(assets, start_date = '2014-01-01' , end_date = '2015-12-31', fields = 'open_price' )
len(assets)

In [None]:
import alphalens

factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=result['testing_factor'], 
                                                                   prices=pricing,
                                                                   quantiles=2,
                                                                   periods = (3,10,30))
alphalens.tears.create_full_tear_sheet(factor_data)

# Testing combination of all three factors

Using all three factors of Sentiment, Operations margin, and Revenue Growth in algorithm in order to test out the impact of the model overtime with the stock market 

In [None]:
def make_pipeline():
    #Good results for operation_ratios.revenue_growth.latest , operation_ratios.operation_margin.latest , sentiment
    # testing_factor = operation_ratios.revenue_growth.latest

    
    testing_factor1 = operation_ratios.revenue_growth.latest
    testing_factor2 = operation_ratios.operation_margin.latest
    testing_factor3 = sentiment.sentiment_signal.latest
    
    universe = (Q1500US() & 
                testing_factor1.notnull() &
               testing_factor2.notnull() &
               testing_factor3.notnull())
    testing_factor1 = testing_factor1.rank(mask=universe, method ='average')
    testing_factor2 = testing_factor2.rank(mask=universe, method ='average')
    testing_factor3 = testing_factor3.rank(mask=universe, method ='average')
    
    testing_factor = testing_factor1 + testing_factor2 +testing_factor3
    
    testing_quantiles = testing_factor.quantiles(2)
    
    pipe = Pipeline(columns ={'testing_factor':testing_factor, 'shorts': testing_quantiles.eq(0),'longs': testing_quantiles.eq(1)}, screen = universe)
    
    return pipe

result = run_pipeline(make_pipeline(), start_date = '2013-04-01' , end_date = '2018-05-01')
result.head()

In [None]:
assets = result.index.levels[1].unique()
pricing = get_pricing(assets, start_date = '2014-12-31' , end_date = '2016-02-01', fields = 'open_price' )
len(assets)

In [None]:
import alphalens

factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=result['testing_factor'], 
                                                                   prices=pricing,
                                                                   quantiles=2,
                                                                   periods = (3,10,30))
alphalens.tears.create_full_tear_sheet(factor_data)


In [None]:
#Running a backtest on running all the factors and weighing them differently
bt = get_backtest('5af26eb4c56ef9437dde7384')
bt.create_full_tear_sheet()

# Conclusion

As can be seen from the results of the backtest with the market is that my algorithm did not beat the market and failed to deliver on the returns given sentimental feedback. As can be seen in the back test running a machine learning model for tracking the bears and bulls market.

In [None]:
#Running a backtest on running all the factors and weighing them differently by developing a bears and bull market.
bt = get_backtest('5af27412a56aa7444ec1b896')
bt.create_full_tear_sheet()


From the results of the back test we could see that with a 11.3% return from the strategy developed.Although developed a more efficient strategy for creating weighted factors it did not help in beating the market. Although from our previous backtest we could see the long/short strategies that were developed. 

There we severl limitations which included the number of frictions upon the market. Markets tend to have more frictions and that would affect our overall return. We alsoonly worked with three factors along with sentimental analysis. In the future, it would be best to test and run more factors not just with sentimental analysis as well as using more neural networking strategy to develop the weights for each of the factors needed.Further research could be used and developed for those purposes.