In [1185]:
# importing necessary modules.
import pandas as pd
import config9 as cfg
from sqlalchemy import create_engine
import numpy as np
import matplotlib.pyplot as plt

In [None]:
postgres_password = ''
database_name = ''

In [1186]:
# creating a connection to an aws rds postgres cloud database.
rds_connection_string = f'postgres:{postgres_password}@localhost:5432/{database_name}'
engine = create_engine(f'postgres://{rds_connection_string}')

In [1187]:
# checking out the table names in the database.
engine.table_names()

['new_route', 'avg_sandp', 'sandp2', 'sandp', 'russell_2000', 'sandp_russell']

In [1188]:
# checking out the SandP_500 table.
sandp = pd.read_sql_query('SELECT * FROM sandp', con=engine)
sandp.head()

Unnamed: 0,symbol,name,price_to_bookvalue,price_to_book,price_to_sales,price_to_earnings,receivables_turnover,price_to_free_cash_flow,price_to_operating_cash_flow,enterprise_value_multiple,...,asset_growth,book_value_per_share_growth,debt_growth,randd_expense_growth,sganda_expense_growth,percent_return_on_investment,above_below_sandp_return,market_cap,industry,sector
0,A,"Agilent Technologies, Inc.",2.5517,5.17165,1.4813,6.30885,7.0014,18.1807,13.57995,16.746223,...,0.18005,0.15185,0.31885,-0.0674,0.0188,215.407785,0,7462700000.0,Medical Diagnostics & Research,Healthcare
1,AAL,"American Airlines Group, Inc.",-99.0,-99.0,0.11565,-99.0,27.3348,-99.0,2.20925,61.812855,...,-0.0017,-0.06235,0.00975,-99.0,0.01435,492.976589,1,613380900.0,Airlines,Industrials
2,AAP,"Advance Auto Parts, Inc.",2.9566,3.1038,0.706,14.1684,57.0461,7.4185,5.4613,0.499984,...,0.0367,0.1952,-0.5715,-99.0,0.1036,158.290852,0,1912092000.0,Retail - Apparel & Specialty,Consumer Cyclical
3,AAPL,"Apple, Inc.",5.42765,4.62715,3.98235,19.54495,10.0906,17.3279,15.3596,0.493341,...,0.448,0.44245,-99.0,0.2694,0.21645,641.441749,1,91517930000.0,Computer Hardware,Technology
4,ABBV,"AbbVie, Inc.",16.4687,-99.0,3.019,10.4836,-99.0,9.2123,8.7454,0.001609,...,-99.0,-99.0,-99.0,0.4613,0.1408,127.493713,0,66190650000.0,Drug Manufacturers,Healthcare


In [1189]:
# converting industry columns into 1's and 0's.
industry = pd.get_dummies(sandp['industry'])

In [1190]:
# converting sector columns into 1's and 0's.
sector = pd.get_dummies(sandp['sector'])
sector.columns

Index(['Basic Materials', 'Communication Services', 'Consumer Cyclical',
       'Consumer Defensive', 'Energy', 'Financial Services', 'Healthcare',
       'Industrials', 'Real Estate', 'Technology', 'Utilities'],
      dtype='object')

In [1191]:
# bringing everything together.
sandp = pd.concat([sandp, industry, sector], axis=1)

In [1192]:
X = sandp[['net_income_growth','short_term_coverage_ratio','eps_diluted_growth', 'gross_profit_margin', 'price_to_book', 'price_to_sales', 'price_to_free_cash_flow', 'asset_growth', 'Airlines',
'Computer Hardware','Drug Manufacturers', 'Medical Devices','Application Software', 'Semiconductors','Consumer Packaged Goods', 
'Business Services', 'Credit Services','Utilities - Regulated', 'Insurance - Life', 'REITs','Brokers & Exchanges','Biotechnology',
'Asset Management', 'Online Media','Oil & Gas - E&P','Autos', 'Banks', 'Travel & Leisure', 'Entertainment','Agriculture',
'Transportation & Logistics','Oil & Gas - Integrated','Industrial Distribution','Metals & Mining','Oil & Gas - Services',
'Personal Services','Engineering & Construction', 'Oil & Gas - Midstream','Beverages - Non-Alcoholic', 'Truck Manufacturing', 
'Employment Services', 'Forest Products']]
target = sandp['above_below_sandp_return']

In [1193]:
# dropping unnecessary columns.
sandp.drop(['symbol', 'name', 'percent_return_on_investment'], axis=1, inplace=True)

In [1194]:
from sklearn.model_selection import train_test_split
X_train, X_test, target_train, target_test = train_test_split(X, target, random_state=1)

In [1195]:
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [1196]:
X_trained_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [1197]:
from tensorflow.keras.utils import to_categorical
# One-hot encoding
target_train_categorical = to_categorical(target_train)
target_test_categorical = to_categorical(target_test)

In [1198]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(units=500, activation="relu", input_dim=42))
model.add(Dense(units=2, activation='softmax'))

In [1199]:
model.summary()

Model: "sequential_67"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_167 (Dense)            (None, 500)               21500     
_________________________________________________________________
dense_168 (Dense)            (None, 2)                 1002      
Total params: 22,502
Trainable params: 22,502
Non-trainable params: 0
_________________________________________________________________


In [1200]:
import keras
import keras_metrics as km
# compiling the model.
model.compile(optimizer="adam",
             loss="categorical_crossentropy",
             metrics=['accuracy'])

In [1201]:
# fitting the model to the training data.
model.fit(
    X_trained_scaled,
    target_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 351 samples
Epoch 1/100
351/351 - 0s - loss: 0.6620 - accuracy: 0.6125
Epoch 2/100
351/351 - 0s - loss: 0.5360 - accuracy: 0.7151
Epoch 3/100
351/351 - 0s - loss: 0.4939 - accuracy: 0.7521
Epoch 4/100
351/351 - 0s - loss: 0.4801 - accuracy: 0.7578
Epoch 5/100
351/351 - 0s - loss: 0.4737 - accuracy: 0.7607
Epoch 6/100
351/351 - 0s - loss: 0.4676 - accuracy: 0.7436
Epoch 7/100
351/351 - 0s - loss: 0.4614 - accuracy: 0.7607
Epoch 8/100
351/351 - 0s - loss: 0.4586 - accuracy: 0.7635
Epoch 9/100
351/351 - 0s - loss: 0.4519 - accuracy: 0.7635
Epoch 10/100
351/351 - 0s - loss: 0.4545 - accuracy: 0.7692
Epoch 11/100
351/351 - 0s - loss: 0.4535 - accuracy: 0.7692
Epoch 12/100
351/351 - 0s - loss: 0.4431 - accuracy: 0.7749
Epoch 13/100
351/351 - 0s - loss: 0.4428 - accuracy: 0.7635
Epoch 14/100
351/351 - 0s - loss: 0.4389 - accuracy: 0.7721
Epoch 15/100
351/351 - 0s - loss: 0.4390 - accuracy: 0.7664
Epoch 16/100
351/351 - 0s - loss: 0.4329 - accuracy: 0.7778
Epoch 17/100
351/351 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x1d763be4940>

In [1203]:
deep_model = Sequential()
deep_model.add(Dense(units=500, activation="relu", input_dim=42))
deep_model.add(Dense(units=500, activation="relu"))
deep_model.add(Dense(units=2, activation='softmax'))

In [1204]:
model.summary()

Model: "sequential_67"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_167 (Dense)            (None, 500)               21500     
_________________________________________________________________
dense_168 (Dense)            (None, 2)                 1002      
Total params: 22,502
Trainable params: 22,502
Non-trainable params: 0
_________________________________________________________________


In [1205]:
deep_model.compile(optimizer="adam",
                   loss="categorical_crossentropy",
                   metrics=['accuracy']
    )

In [1206]:
deep_model.fit(
    X_trained_scaled,
    target_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)   

Train on 351 samples
Epoch 1/100
351/351 - 1s - loss: 0.6182 - accuracy: 0.6895
Epoch 2/100
351/351 - 0s - loss: 0.5384 - accuracy: 0.7208
Epoch 3/100
351/351 - 0s - loss: 0.4896 - accuracy: 0.7350
Epoch 4/100
351/351 - 0s - loss: 0.4809 - accuracy: 0.7436
Epoch 5/100
351/351 - 0s - loss: 0.4622 - accuracy: 0.7550
Epoch 6/100
351/351 - 0s - loss: 0.4582 - accuracy: 0.7578
Epoch 7/100
351/351 - 0s - loss: 0.4497 - accuracy: 0.7635
Epoch 8/100
351/351 - 0s - loss: 0.4353 - accuracy: 0.7778
Epoch 9/100
351/351 - 0s - loss: 0.4294 - accuracy: 0.7806
Epoch 10/100
351/351 - 0s - loss: 0.4265 - accuracy: 0.7664
Epoch 11/100
351/351 - 0s - loss: 0.4251 - accuracy: 0.7664
Epoch 12/100
351/351 - 0s - loss: 0.4178 - accuracy: 0.8063
Epoch 13/100
351/351 - 0s - loss: 0.4063 - accuracy: 0.7749
Epoch 14/100
351/351 - 0s - loss: 0.4114 - accuracy: 0.7863
Epoch 15/100
351/351 - 0s - loss: 0.3916 - accuracy: 0.8034
Epoch 16/100
351/351 - 0s - loss: 0.3971 - accuracy: 0.7835
Epoch 17/100
351/351 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x1d76418cb38>

In [1207]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, target_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

117/1 - 0s - loss: 3.7744 - accuracy: 0.7009
Deep Neural Network - Loss: 2.898243834829738, Accuracy: 0.7008547186851501


In [1208]:
encoded_predictions = model.predict_classes(X)



In [1209]:
predictions = encoded_predictions.tolist()

In [1210]:
# checking out the SandP_500 table.
sandp = pd.read_sql_query('SELECT * FROM sandp', con=engine)
sandp.head()

Unnamed: 0,symbol,name,price_to_bookvalue,price_to_book,price_to_sales,price_to_earnings,receivables_turnover,price_to_free_cash_flow,price_to_operating_cash_flow,enterprise_value_multiple,...,asset_growth,book_value_per_share_growth,debt_growth,randd_expense_growth,sganda_expense_growth,percent_return_on_investment,above_below_sandp_return,market_cap,industry,sector
0,A,"Agilent Technologies, Inc.",2.5517,5.17165,1.4813,6.30885,7.0014,18.1807,13.57995,16.746223,...,0.18005,0.15185,0.31885,-0.0674,0.0188,215.407785,0,7462700000.0,Medical Diagnostics & Research,Healthcare
1,AAL,"American Airlines Group, Inc.",-99.0,-99.0,0.11565,-99.0,27.3348,-99.0,2.20925,61.812855,...,-0.0017,-0.06235,0.00975,-99.0,0.01435,492.976589,1,613380900.0,Airlines,Industrials
2,AAP,"Advance Auto Parts, Inc.",2.9566,3.1038,0.706,14.1684,57.0461,7.4185,5.4613,0.499984,...,0.0367,0.1952,-0.5715,-99.0,0.1036,158.290852,0,1912092000.0,Retail - Apparel & Specialty,Consumer Cyclical
3,AAPL,"Apple, Inc.",5.42765,4.62715,3.98235,19.54495,10.0906,17.3279,15.3596,0.493341,...,0.448,0.44245,-99.0,0.2694,0.21645,641.441749,1,91517930000.0,Computer Hardware,Technology
4,ABBV,"AbbVie, Inc.",16.4687,-99.0,3.019,10.4836,-99.0,9.2123,8.7454,0.001609,...,-99.0,-99.0,-99.0,0.4613,0.1408,127.493713,0,66190650000.0,Drug Manufacturers,Healthcare


In [1211]:
sandp = sandp[['name', 'percent_return_on_investment', 'above_below_sandp_return']]

In [1212]:
sandp.head()

Unnamed: 0,name,percent_return_on_investment,above_below_sandp_return
0,"Agilent Technologies, Inc.",215.407785,0
1,"American Airlines Group, Inc.",492.976589,1
2,"Advance Auto Parts, Inc.",158.290852,0
3,"Apple, Inc.",641.441749,1
4,"AbbVie, Inc.",127.493713,0


In [1213]:
sandp['predictions'] = predictions

In [1214]:
sandp.head()

Unnamed: 0,name,percent_return_on_investment,above_below_sandp_return,predictions
0,"Agilent Technologies, Inc.",215.407785,0,1
1,"American Airlines Group, Inc.",492.976589,1,1
2,"Advance Auto Parts, Inc.",158.290852,0,1
3,"Apple, Inc.",641.441749,1,1
4,"AbbVie, Inc.",127.493713,0,1


In [1215]:
sandp['predictions'].value_counts()

1    307
0    161
Name: predictions, dtype: int64

In [1216]:
r = 0
predicted_positives = []
for (x,y) in zip(sandp['predictions'],sandp['percent_return_on_investment']):
    if x == 1:
        predicted_positives.append(y)
        if y > 228:
            r += 1
print(r)

124


In [1217]:
predicted = pd.DataFrame(predicted_positives)

In [1218]:
len(predicted)

307

In [1219]:
r/len(predicted)

0.40390879478827363

In [1220]:
for x in predicted[0]:
    if x < 0:
        print(x)

-8.70733249051834
-58.564988730278
-12.4704025256512
-72.3336853220697
-20.8840486867393
-4.98723404255319
-6.37676456271296
-18.3637728296222
-29.9843178254051
-12.3555266156601


In [1221]:
gain = predicted.sum()

In [1222]:
gain 

0    79139.849558
dtype: float64

In [1223]:
(gain - len(predicted))/len(predicted)

0    256.784526
dtype: float64