In [11]:
import pandas as pd
from sqlalchemy import create_engine

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
# Connect to the SQLite database
database_name = "data/CompanyData.sqlite"
engine = create_engine(f"sqlite:///{database_name}", echo=False)

In [3]:
# Load the Master file from the database
master_df = pd.read_sql_query("SELECT * FROM MasterData_ML", engine)
master_df.dropna(inplace=True)
master_df.head()

Unnamed: 0,monthend_date,ticker,price,wealth_index,end_wealth_index,total_return,cash,st_debt,lt_debt,equity,...,ebit_ev,name,sector,quantile_total_return,price_1_month_ago,price_3_months_ago,price_12_months_ago,trailing_1_month_return,trailing_3_month_return,trailing_12_month_return
0,2016-04-30,A,40.92,39.693,44.514,12.145718,1931000000.0,80000000.0,1653000000.0,4045000000.0,...,0.041902,Agilent Technologies Inc,Health Care,1,39.85,37.65,41.37,2.685069,8.685259,-1.087745
1,2016-05-31,A,45.89,44.514,43.1432,-3.079481,1931000000.0,80000000.0,1653000000.0,4045000000.0,...,0.037304,Agilent Technologies Inc,Health Care,3,40.92,37.35,41.19,12.14565,22.864793,11.410537
2,2016-06-30,A,44.36,43.1432,46.7904,8.453708,2139000000.0,235000000.0,1654000000.0,4162000000.0,...,0.040637,Agilent Technologies Inc,Health Care,1,45.89,39.85,38.58,-3.33406,11.31744,14.981856
3,2016-07-31,A,48.11,46.7904,45.6913,-2.348986,2139000000.0,235000000.0,1654000000.0,4162000000.0,...,0.037419,Agilent Technologies Inc,Health Care,3,44.36,40.92,40.95,8.453562,17.57087,17.484737
4,2016-08-31,A,46.98,45.6913,45.9121,0.483243,2139000000.0,235000000.0,1654000000.0,4162000000.0,...,0.038333,Agilent Technologies Inc,Health Care,2,48.11,45.89,36.31,-2.348784,2.375245,29.385844


In [34]:
# Figure out the current predictions

first_date_ml = "2017-09-30"
last_date_ml = "2019-08-31"

master_2yrs_df = master_df[first_date_ml <= master_df['monthend_date']]
master_2yrs_df = master_2yrs_df[master_2yrs_df['monthend_date'] <= last_date_ml]

cols = ['earnings_yield','book_yield','revenue_ev','ebit_ev','net_debt_capital', \
       'trailing_1_month_return','trailing_3_month_return','trailing_12_month_return']

X_master = master_2yrs_df[cols]
y_master = master_2yrs_df["quantile_total_return"]

# I intentionally did not "scale" the data, since it ruins the model
X_scaled = X_master.to_numpy()

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_master)
encoded_y = label_encoder.transform(y_master)

# Step 2: Convert encoded labels to one-hot-encoding
y_categorical = to_categorical(encoded_y)

# Create model and add layers
n_nodes = 100
model = Sequential()
model.add(Dense(units=n_nodes, activation='relu', input_dim=8))
model.add(Dense(units=n_nodes, activation='relu'))
model.add(Dense(units=n_nodes, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(
    X_scaled,
    y_categorical,
    epochs=40,
    shuffle=True,
    verbose=0)



<tensorflow.python.keras.callbacks.History at 0x24c20d47160>

In [35]:
encoded_predictions = model.predict_classes(X_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
print(f"Predicted classes: {prediction_labels}")
print (prediction_labels.shape)

Predicted classes: [2 1 1 ... 2 2 2]
(11542,)


In [49]:
ticker_date_df = master_2yrs_df[["monthend_date", "ticker"]]
ticker_date_df["dl_prediction"] = prediction_labels
ticker_date_df.head()
predictions_df = ticker_date_df[ticker_date_df["monthend_date"]==last_date_m1].reset_index()
predictions_df.drop(["index", "monthend_date"], axis=1, inplace=True)
print (len(predictions_df.index))
predictions_df.head()


484


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,ticker,dl_prediction
0,A,2
1,AAL,1
2,AAP,1
3,AAPL,1
4,ABBV,1


In [78]:
# Get the current values of the criteria for all stocks in the most recent month
criteria_df = master_df[master_df["monthend_date"]==last_date_ml].reset_index()

# Calculate the quintiles for earnings_yield, across all sectors
criteria_df['q_price_earnings'] = pd.qcut(criteria_df['earnings_yield'], 5, labels=False) + 1

# Calculate the quintile for earnings_yield, grouped by sector
criteria_df['qs_price_earnings'] = criteria_df.groupby(['sector'])['earnings_yield'].transform(  \
                                    lambda x: pd.qcut(x, 5, labels=False) + 1)

# Repeat for book_yield
criteria_df['q_price_book'] = pd.qcut(criteria_df['book_yield'], 5, labels=False) + 1
criteria_df['qs_price_book'] = criteria_df.groupby(['sector'])['book_yield'].transform(  \
                                    lambda x: pd.qcut(x, 5, labels=False) + 1)

# Repeat for revenue_ev
criteria_df['q_ev_revenue'] = pd.qcut(criteria_df['revenue_ev'], 5, labels=False) + 1
criteria_df['qs_ev_revenue'] = criteria_df.groupby(['sector'])['revenue_ev'].transform(  \
                                    lambda x: pd.qcut(x, 5, labels=False) + 1)

# Repeat for ebit_ev
criteria_df['q_ev_ebit'] = pd.qcut(criteria_df['ebit_ev'], 5, labels=False) + 1
criteria_df['qs_ev_ebit'] = criteria_df.groupby(['sector'])['ebit_ev'].transform(  \
                                    lambda x: pd.qcut(x, 5, labels=False, duplicates='drop') + 1)

# Reset qs_ebit_ev to zero for the Financials sector
criteria_df['qs_ev_ebit'] = criteria_df.apply(lambda x: 0 if x['sector']=="Financials" else x['qs_ev_ebit'], axis=1)

# Repeat for net_debt_capital
criteria_df['q_net_debt_capital'] = pd.qcut(criteria_df['net_debt_capital'], 5, labels=False) + 1
criteria_df['qs_net_debt_capital'] = criteria_df.groupby(['sector'])['net_debt_capital'].transform(  \
                                    lambda x: pd.qcut(x, 5, labels=False) + 1)

# Repeat for market_cap
# Recall that we need to switch the quintiles, so that the largest values are set to quintile 5, smallest to 1
criteria_df['q_market_cap'] = pd.qcut(criteria_df['market_cap'], 5, labels=False) + 1
criteria_df['qs_market_cap'] = criteria_df.groupby(['sector'])['market_cap'].transform(  \
                                    lambda x: 5 - pd.qcut(x, 5, labels=False))

print (len(criteria_df.index))
criteria_df.drop('index', axis=1, inplace=True)

criteria_df.to_csv ("current_data.csv")

484


In [79]:
# merge the two dataframes
merged_df = criteria_df.join(predictions_df.set_index('ticker'), on='ticker')

# list the columns
print (merged_df.columns)

# Keep selected items
cols = ["ticker", "sector", "dl_prediction", \
        "price_earnings", "price_book", "market_cap", "net_debt_capital", "ev_revenue", "ev_ebit", \
        "q_price_earnings", "q_price_book", "q_market_cap", "q_net_debt_capital", "q_ev_revenue", "q_ev_ebit", \
        "qs_price_earnings", "qs_price_book", "qs_market_cap", "qs_net_debt_capital", "qs_ev_revenue", "qs_ev_ebit"]

keep_df = merged_df[cols]

Index(['monthend_date', 'ticker', 'price', 'wealth_index', 'end_wealth_index',
       'total_return', 'cash', 'st_debt', 'lt_debt', 'equity', 'revenue',
       'ebit', 'net_income', 'basic_eps', 'diluted_eps', 'shares', 'qtr_date',
       'price_earnings', 'earnings_yield', 'book_value', 'price_book',
       'book_yield', 'market_cap', 'net_debt', 'net_debt_capital',
       'enterprise_value', 'ev_revenue', 'revenue_ev', 'ev_ebit', 'ebit_ev',
       'name', 'sector', 'quantile_total_return', 'price_1_month_ago',
       'price_3_months_ago', 'price_12_months_ago', 'trailing_1_month_return',
       'trailing_3_month_return', 'trailing_12_month_return',
       'q_price_earnings', 'qs_price_earnings', 'q_price_book',
       'qs_price_book', 'q_ev_revenue', 'qs_ev_revenue', 'q_ev_ebit',
       'qs_ev_ebit', 'q_net_debt_capital', 'qs_net_debt_capital',
       'q_market_cap', 'qs_market_cap', 'dl_prediction'],
      dtype='object')


In [80]:
keep_df.head()

Unnamed: 0,ticker,sector,dl_prediction,price_earnings,price_book,market_cap,net_debt_capital,ev_revenue,ev_ebit,q_price_earnings,...,q_market_cap,q_net_debt_capital,q_ev_revenue,q_ev_ebit,qs_price_earnings,qs_price_book,qs_market_cap,qs_net_debt_capital,qs_ev_revenue,qs_ev_ebit
0,A,Health Care,2,20.552023,4.428146,22694250000.0,-7.487416,4.450538,23.389789,3,...,3,1,2,3,4.0,3.0,3.0,1.0,3.0,3.0
1,AAL,Industrials,1,7.761062,-542.731684,11940100000.0,100.088176,0.819522,13.258656,5,...,2,5,5,5,5.0,1.0,4.0,5.0,5.0,5.0
2,AAP,Consumer Discretionary,1,22.953411,2.716148,9971798000.0,-0.020923,1.031735,16.155109,3,...,1,1,5,4,2.0,4.0,4.0,1.0,4.0,3.0
3,AAPL,Information Technology,1,17.719864,10.162673,980250800000.0,37.505831,4.007732,16.114412,4,...,5,3,3,4,4.0,2.0,1.0,4.0,3.0,4.0
4,ABBV,Health Care,1,23.395018,-11.206987,95999050000.0,136.416971,3.92616,17.969564,3,...,5,5,3,4,4.0,1.0,1.0,5.0,4.0,4.0


In [83]:
# To write this dataframe to an SQL table with any keys, do the following
# Use the "df.to_sql" to write the dataframe to an SQL table with a temporary name
final_table_name = "CurrentData"

# Now, write the dataframe to the SQL table
keep_df.to_sql('Temp', con=engine, if_exists='replace', index=False)

# Drop any existing NEW table, the one that will contain the data and index
# sql_stmt = "DROP TABLE " + final_table_name
# engine.execute(sql_stmt)

# Create a NEW table to hold all the data, and be sure to include the index definition
create_table_sql = 'CREATE TABLE "' + final_table_name + '" (' + \
    'ticker VARCHAR, ' + \
    'sector VARCHAR, ' + \
    'dl_prediction FLOAT, ' + \
    'price_earnings FLOAT, ' + \
    'price_book FLOAT, ' + \
    'market_cap FLOAT, ' + \
    'net_debt_capital FLOAT, ' + \
    'ev_revenue FLOAT, ' + \
    'ev_ebit FLOAT, ' + \
    'q_price_earnings FLOAT, ' + \
    'q_price_book FLOAT, ' + \
    'q_market_cap FLOAT, ' + \
    'q_net_debt_capital FLOAT, ' + \
    'q_ev_revenue FLOAT, ' + \
    'q_ev_ebit FLOAT, ' + \
    'qs_price_earnings FLOAT, ' + \
    'qs_price_book FLOAT, ' + \
    'qs_market_cap FLOAT, ' + \
    'qs_net_debt_capital FLOAT, ' + \
    'qs_ev_revenue FLOAT, ' + \
    'qs_ev_ebit FLOAT, ' + \
    'PRIMARY KEY (ticker));'

# Create the table.  It will be empty
engine.execute(create_table_sql)

# Now copy the data from the Temp table into the New table
sql_stmt = "INSERT INTO " + final_table_name + " SELECT * FROM Temp"
engine.execute(sql_stmt)

# Delete the Temp table
sql_stmt = "DROP TABLE Temp"
engine.execute(sql_stmt)

<sqlalchemy.engine.result.ResultProxy at 0x24c223aef60>