In [None]:
!pip install gs_quant
!pip install iexfinance

In [102]:
import requests
import json
import pprint as pp
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from iexfinance.stocks import Stock, get_collections
import joblib

from datetime import date
from gs_quant.data import Dataset, Fields
from gs_quant.markets.securities import SecurityMaster, AssetIdentifier
from gs_quant.session import GsSession

client_id = r'd34d7b1b2fc74714bc64ee597c97369b'
client_secret = r'e003a07173f2404eb98a22321912488a0d16f959f6b6c3d70f26b06d2f034708'
IEX_TOKEN="sk_8ba1ae12a31e4c8e966516ce16cdf810"

In [14]:
# logging into marquee
scopes = GsSession.Scopes.get_default()
GsSession.use(client_id=client_id, client_secret=client_secret, scopes=scopes)

# retrieve data 
ds = Dataset('USCANFPP_MINI')

# get list of covered GSIDs
gsids = ds.get_coverage()['gsid'].values.tolist()
GIR_data = ds.get_data(date(2017, 1, 15), date(2018, 1, 15))

In [15]:
auth_data = {
    'grant_type'    : 'client_credentials',
    'client_id'     : client_id,
    'client_secret' : client_secret,
    'scope'         : 'read_content read_financial_data read_product_data read_user_profile'
}

# create session instance
session = requests.Session()

# make a POST to retrieve access_token
auth_request = session.post('https://idfs.gs.com/as/token.oauth2', data = auth_data)
access_token_dict = json.loads(auth_request.text)
access_token = access_token_dict['access_token']

# update session headers w/ access token
session.headers.update({'Authorization':'Bearer '+ access_token})

# api request to retrieve data
request_url = "https://api.marquee.gs.com/v1/assets/data/query"

request_query = {
                    "where": {
                        "gsid": gsids
                    },
                    "fields": ["ticker", "name", "gsid"],
                    "limit": 300
               }

request = session.post(url=request_url, json=request_query)
results = json.loads(request.text)

In [16]:
companies = json_normalize(results['results'])
companies.head()

Unnamed: 0,gsid,name,ticker
0,86196,Ultimate Software Group Inc,ULTI
1,901237,Juniper Networks Inc,JNPR
2,84275,United Natural Foods Inc,UNFI
3,223416,Zynga Inc - CL A,ZNGA
4,176665,Expedia Group Inc,EXPE


In [22]:
new_gir_data = GIR_data.merge(companies, how='inner', on='gsid')
stock_scores = new_gir_data.groupby(by='ticker') \
.agg({'name': 'first', 'integratedScore': 'mean', 'growthScore': 'mean', 'financialReturnsScore': 'mean', 'multipleScore': 'mean'}) \
.sort_values(by='integratedScore', ascending=False).reset_index()

stock_scores.head()

Unnamed: 0,ticker,name,integratedScore,growthScore,financialReturnsScore,multipleScore
0,GT,Goodyear Tire & Rubber Co,0.76015,0.60685,0.718733,0.0451
1,AVGO,Broadcom Inc,0.7317,0.677167,0.8253,0.307433
2,FLEX,Flex LTD,0.688167,0.6225,0.741867,0.300283
3,ADS,Alliance Data Systems Corp,0.684555,0.779227,0.939433,0.6648
4,ROST,Ross Stores Inc,0.6533,0.558533,0.975267,0.573933


In [23]:
data = []
columns = ['ticker', 'day200MovingAvg', 'peRatio', 'ttmEPS', 'week52change', 'maxChangePercent']

for ticker in stock_scores['ticker']:
    try:
        key_data = Stock(ticker, token=IEX_TOKEN).get_key_stats()
        data.append([ticker, key_data['day200MovingAvg'], key_data['peRatio'], \
                key_data['ttmEPS'], key_data['week52change'], key_data['maxChangePercent']])
    except:
        continue

stock_data = pd.DataFrame(data, columns=columns)

In [24]:
stock_data.head()

Unnamed: 0,ticker,day200MovingAvg,peRatio,ttmEPS,week52change,maxChangePercent
0,GT,16.16,20.23,0.8186,-0.185841,2.2031
1,AVGO,283.74,-72.27,-4.01,0.3597,16.9122
2,FLEX,10.06,-33.12,-0.3505,0.637518,0.3469
3,ADS,150.77,12.19,8.5938,-0.473499,5.1647
4,ROST,99.86,24.96,4.435,0.144319,23.2741


In [25]:
merged = stock_scores[['name', 'ticker', 'integratedScore']].merge(stock_data, how='inner', on='ticker')
merged

Unnamed: 0,name,ticker,integratedScore,day200MovingAvg,peRatio,ttmEPS,week52change,maxChangePercent
0,Goodyear Tire & Rubber Co,GT,0.760150,16.16,20.23,0.8186,-0.185841,2.203100
1,Broadcom Inc,AVGO,0.731700,283.74,-72.27,-4.0100,0.359700,16.912200
2,Flex LTD,FLEX,0.688167,10.06,-33.12,-0.3505,0.637518,0.346900
3,Alliance Data Systems Corp,ADS,0.684555,150.77,12.19,8.5938,-0.473499,5.164700
4,Ross Stores Inc,ROST,0.653300,99.86,24.96,4.4350,0.144319,23.274100
5,Facebook Inc-Class A,FB,0.650050,179.42,31.62,5.9430,0.292495,3.914700
6,Juniper Networks Inc,JNPR,0.646650,25.99,23.59,1.0694,-0.103412,2.091900
7,Capri Holdings LTD,CPRI,0.640617,37.87,11.87,2.6845,-0.427314,0.316900
8,Apple Inc,AAPL,0.639733,196.92,20.85,11.8267,0.139991,243.138600
9,Intuit Inc,INTU,0.633483,256.67,43.01,5.9908,0.275468,12.920600


In [56]:
def standardize(df):
    mu = np.mean(df, axis=0)
    sd = np.std(df, axis=0)
    return [(df - mu)/sd]

def select_columns(data, *columns):
    return data.loc[:, columns]

def process_data(data, test=False):
    X = (data.pipe(select_columns,        
                  'peRatio',  
                  'ttmEPS',   
                  'week52change', 
                  'maxChangePercent'))
    if test:
        y = None
    else:
        y = data['integratedScore']
        
    return X, y

def mse(X_test, y_test, model):
    mse = 0
    for row, val in zip([X_test], y_test):
        mse += (model.predict(row) - val)[0] ** 2
        
    return mse / len(X_test)

def test_within_threshold(model, X, y, threshold):
    predicted_values = []
    for row in [X]:
        predicted_values.append(model.predict(row))
    
    num_correct = 0
    
    for p, r in zip(predicted_values[0], y.tolist()): 
        if abs(p - r) <= threshold:
            num_correct += 1
            
    min_max_range = (min(predicted_values[0]), max(predicted_values[0]))
            
    return (num_correct / len(predicted_values[0]), min_max_range)

In [228]:
X_raw, y_raw = process_data(merged)
X = X_raw.dropna()
y = y_raw.drop(20)

In [236]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

stock_prospects = MLPRegressor(activation='logistic')
stock_prospects.fit(X_train, y_train)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [237]:
mse(X_test, y_test, stock_prospects)

2.8210719717434134e-05

In [238]:
test_within_threshold(stock_prospects, X, y, 0.15)

(0.8817204301075269, (0.43079208171625216, 0.7343207315858145))

In [239]:
def get_prospect(ticker, model=stock_prospects):
    
    try:
        key_data = Stock(ticker, token=IEX_TOKEN).get_key_stats()
        X = [[key_data['peRatio'], key_data['ttmEPS'], key_data['week52change'], key_data['maxChangePercent']]]
        if None in X[0]:
            print(f'Sorry, we do not have enough data for {ticker}.')
            return
    except:
        print(f'Sorry, {ticker} was not found in our database.')
        return

    return np.round(model.predict(X)[0], 4)

def get_best_prospect(tickers, model=stock_prospects):
    best_stock, max_prospect = None, float('-inf')
    for ticker in tickers:
        try:
            curr_prospect = get_prospect(ticker, model)
            if curr_prospect > max_prospect:
                best_stock, max_prospect = ticker, curr_prospect
        except:
            continue

    print(f'Out of your options, the best one to invest in is {best_stock}, with a predicted integrated score of {max_prospect}.')
    return (best_stock, max_prospect)

In [240]:
get_best_prospect(["MSFT", "ISRG", "FB", "AMZN"])

Out of your options, the best one to invest in is AMZN, with a predicted integrated score of 0.7202.


('AMZN', 0.7202)

In [241]:
joblib.dump(stock_prospects, 'model.joblib')

['model.joblib']

In [242]:
model = joblib.load('model.joblib')

In [244]:
get_prospect('FB', model)

0.4819

In [227]:
get_collections("Industrials", token=IEX_TOKEN, output_format='pandas').head()

KeyError: 'symbol'