In [1]:
# importing necessary modules.
import pandas as pd
import config9 as cfg
from sqlalchemy import create_engine
import numpy as np
import matplotlib.pyplot as plt

In [2]:
postgres_password = cfg.password
database_name = 'optimal_portfolio'

In [3]:
# creating a connection to an aws rds postgres cloud database.
rds_connection_string = f'postgres:{postgres_password}@localhost:5432/{database_name}'
engine = create_engine(f'postgres://{rds_connection_string}')

In [4]:
# checking out the table names in the database.
engine.table_names()

['sandp']

In [5]:
# checking out the SandP_500 table.
sandp = pd.read_sql_query('SELECT * FROM sandp', con=engine)
sandp.head()

Unnamed: 0,symbol,name,price_to_bookvalue,price_to_book,price_to_sales,price_to_earnings,receivables_turnover,price_to_free_cash_flow,price_to_operating_cash_flow,enterprise_value_multiple,...,inventory_growth,asset_growth,book_value_per_share_growth,debt_growth,randd_expense_growth,sganda_expense_growth,percent_return_on_investment,industry,sector,above_below_sandp_return
0,A,"Agilent Technologies, Inc.",2.5517,5.17165,1.4813,6.30885,7.0014,18.1807,13.57995,16.746223,...,0.0758,0.18005,0.15185,0.31885,-0.0674,0.0188,215.407785,Medical Diagnostics & Research,Healthcare,0
1,AAL,"American Airlines Group, Inc.",-99.0,-99.0,0.11565,-99.0,27.3348,-99.0,2.20925,61.812855,...,0.0637,-0.0017,-0.06235,0.00975,-99.0,0.01435,492.976589,Airlines,Industrials,1
2,AAP,"Advance Auto Parts, Inc.",2.9566,3.1038,0.706,14.1684,57.0461,7.4185,5.4613,0.499984,...,0.0054,0.0367,0.1952,-0.5715,-99.0,0.1036,158.290852,Retail - Apparel & Specialty,Consumer Cyclical,0
3,AAPL,"Apple, Inc.",5.42765,4.62715,3.98235,19.54495,10.0906,17.3279,15.3596,0.493341,...,0.6019,0.448,0.44245,-99.0,0.2694,0.21645,641.441749,Computer Hardware,Technology,1
4,ABBV,"AbbVie, Inc.",16.4687,-99.0,3.019,10.4836,-99.0,9.2123,8.7454,0.001609,...,-99.0,-99.0,-99.0,-99.0,0.4613,0.1408,127.493713,Drug Manufacturers,Healthcare,0


In [6]:
# converting industry columns into 1's and 0's.
industry = pd.get_dummies(sandp['industry'])

In [7]:
# converting sector columns into 1's and 0's.
sector = pd.get_dummies(sandp['sector'])
sector.columns

Index(['Basic Materials', 'Communication Services', 'Consumer Cyclical',
       'Consumer Defensive', 'Energy', 'Financial Services', 'Healthcare',
       'Industrials', 'Real Estate', 'Technology', 'Utilities'],
      dtype='object')

In [8]:
# bringing everything together.
sandp = pd.concat([sandp, industry, sector], axis=1)

In [9]:
X = sandp[['net_income_growth','short_term_coverage_ratio','eps_diluted_growth', 'gross_profit_margin', 'price_to_book', 'price_to_sales', 'price_to_free_cash_flow', 'asset_growth', 'Airlines',
'Computer Hardware','Drug Manufacturers', 'Medical Devices','Application Software', 'Semiconductors','Consumer Packaged Goods', 
'Business Services', 'Credit Services','Utilities - Regulated', 'Insurance - Life', 'REITs','Brokers & Exchanges','Biotechnology',
'Asset Management', 'Online Media','Oil & Gas - E&P','Autos', 'Banks', 'Travel & Leisure', 'Entertainment','Agriculture',
'Transportation & Logistics','Oil & Gas - Integrated','Industrial Distribution','Metals & Mining','Oil & Gas - Services',
'Personal Services','Engineering & Construction', 'Oil & Gas - Midstream','Beverages - Non-Alcoholic', 'Truck Manufacturing', 
'Employment Services', 'Forest Products']]
target = sandp['above_below_sandp_return']

In [10]:
# dropping unnecessary columns.
sandp.drop(['symbol', 'name', 'percent_return_on_investment'], axis=1, inplace=True)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, target_train, target_test = train_test_split(X, target, random_state=1)

In [12]:
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [13]:
X_trained_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
from tensorflow.keras.utils import to_categorical
# One-hot encoding
target_train_categorical = to_categorical(target_train)
target_test_categorical = to_categorical(target_test)

In [15]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(units=500, activation="relu", input_dim=42))
model.add(Dense(units=2, activation='softmax'))

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 500)               21500     
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1002      
Total params: 22,502
Trainable params: 22,502
Non-trainable params: 0
_________________________________________________________________


In [17]:
import keras
import keras_metrics as km
# compiling the model.
model.compile(optimizer="adam",
             loss="categorical_crossentropy",
             metrics=['accuracy'])

Using TensorFlow backend.


In [18]:
# fitting the model to the training data.
model.fit(
    X_trained_scaled,
    target_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 351 samples
Epoch 1/100
351/351 - 1s - loss: 0.6267 - accuracy: 0.6353
Epoch 2/100
351/351 - 0s - loss: 0.5266 - accuracy: 0.7293
Epoch 3/100
351/351 - 0s - loss: 0.4950 - accuracy: 0.7379
Epoch 4/100
351/351 - 0s - loss: 0.4788 - accuracy: 0.7436
Epoch 5/100
351/351 - 0s - loss: 0.4694 - accuracy: 0.7493
Epoch 6/100
351/351 - 0s - loss: 0.4648 - accuracy: 0.7607
Epoch 7/100
351/351 - 0s - loss: 0.4610 - accuracy: 0.7607
Epoch 8/100
351/351 - 0s - loss: 0.4555 - accuracy: 0.7692
Epoch 9/100
351/351 - 0s - loss: 0.4545 - accuracy: 0.7721
Epoch 10/100
351/351 - 0s - loss: 0.4489 - accuracy: 0.7749
Epoch 11/100
351/351 - 0s - loss: 0.4447 - accuracy: 0.7749
Epoch 12/100
351/351 - 0s - loss: 0.4436 - accuracy: 0.7778
Epoch 13/100
351/351 - 0s - loss: 0.4388 - accuracy: 0.7778
Epoch 14/100
351/351 - 0s - loss: 0.4372 - accuracy: 0.7863
Epoch 15/100
351/351 - 0s - loss: 0.4359 - accuracy: 0.7835
Epoch 16/100
351/351 - 0s - loss: 0.4333 - accuracy: 0.7806
Epoch 17/100
351/351 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x221f4e943c8>

In [19]:
deep_model = Sequential()
deep_model.add(Dense(units=500, activation="relu", input_dim=42))
deep_model.add(Dense(units=500, activation="relu"))
deep_model.add(Dense(units=2, activation='softmax'))

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 500)               21500     
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1002      
Total params: 22,502
Trainable params: 22,502
Non-trainable params: 0
_________________________________________________________________


In [21]:
deep_model.compile(optimizer="adam",
                   loss="categorical_crossentropy",
                   metrics=['accuracy']
    )

In [22]:
deep_model.fit(
    X_trained_scaled,
    target_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)   

Train on 351 samples
Epoch 1/100
351/351 - 0s - loss: 0.6229 - accuracy: 0.6695
Epoch 2/100
351/351 - 0s - loss: 0.5094 - accuracy: 0.7322
Epoch 3/100
351/351 - 0s - loss: 0.4821 - accuracy: 0.7493
Epoch 4/100
351/351 - 0s - loss: 0.4607 - accuracy: 0.7635
Epoch 5/100
351/351 - 0s - loss: 0.4519 - accuracy: 0.7664
Epoch 6/100
351/351 - 0s - loss: 0.4472 - accuracy: 0.7664
Epoch 7/100
351/351 - 0s - loss: 0.4395 - accuracy: 0.7749
Epoch 8/100
351/351 - 0s - loss: 0.4337 - accuracy: 0.7721
Epoch 9/100
351/351 - 0s - loss: 0.4345 - accuracy: 0.7692
Epoch 10/100
351/351 - 0s - loss: 0.4420 - accuracy: 0.7721
Epoch 11/100
351/351 - 0s - loss: 0.4097 - accuracy: 0.7949
Epoch 12/100
351/351 - 0s - loss: 0.4135 - accuracy: 0.7749
Epoch 13/100
351/351 - 0s - loss: 0.4083 - accuracy: 0.8034
Epoch 14/100
351/351 - 0s - loss: 0.3976 - accuracy: 0.8006
Epoch 15/100
351/351 - 0s - loss: 0.4001 - accuracy: 0.7977
Epoch 16/100
351/351 - 0s - loss: 0.3966 - accuracy: 0.7920
Epoch 17/100
351/351 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x221f647c518>

In [28]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, target_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

117/1 - 0s - loss: 3.5509 - accuracy: 0.7009
Deep Neural Network - Loss: 2.8388405579787035, Accuracy: 0.7008547186851501


In [29]:
encoded_predictions = model.predict_classes(X)



In [30]:
predictions = encoded_predictions.tolist()

In [31]:
# checking out the SandP_500 table.
sandp = pd.read_sql_query('SELECT * FROM sandp', con=engine)
sandp.head()

Unnamed: 0,symbol,name,price_to_bookvalue,price_to_book,price_to_sales,price_to_earnings,receivables_turnover,price_to_free_cash_flow,price_to_operating_cash_flow,enterprise_value_multiple,...,inventory_growth,asset_growth,book_value_per_share_growth,debt_growth,randd_expense_growth,sganda_expense_growth,percent_return_on_investment,industry,sector,above_below_sandp_return
0,A,"Agilent Technologies, Inc.",2.5517,5.17165,1.4813,6.30885,7.0014,18.1807,13.57995,16.746223,...,0.0758,0.18005,0.15185,0.31885,-0.0674,0.0188,215.407785,Medical Diagnostics & Research,Healthcare,0
1,AAL,"American Airlines Group, Inc.",-99.0,-99.0,0.11565,-99.0,27.3348,-99.0,2.20925,61.812855,...,0.0637,-0.0017,-0.06235,0.00975,-99.0,0.01435,492.976589,Airlines,Industrials,1
2,AAP,"Advance Auto Parts, Inc.",2.9566,3.1038,0.706,14.1684,57.0461,7.4185,5.4613,0.499984,...,0.0054,0.0367,0.1952,-0.5715,-99.0,0.1036,158.290852,Retail - Apparel & Specialty,Consumer Cyclical,0
3,AAPL,"Apple, Inc.",5.42765,4.62715,3.98235,19.54495,10.0906,17.3279,15.3596,0.493341,...,0.6019,0.448,0.44245,-99.0,0.2694,0.21645,641.441749,Computer Hardware,Technology,1
4,ABBV,"AbbVie, Inc.",16.4687,-99.0,3.019,10.4836,-99.0,9.2123,8.7454,0.001609,...,-99.0,-99.0,-99.0,-99.0,0.4613,0.1408,127.493713,Drug Manufacturers,Healthcare,0


In [32]:
sandp = sandp[['name', 'percent_return_on_investment', 'above_below_sandp_return']]

In [33]:
sandp.head()

Unnamed: 0,name,percent_return_on_investment,above_below_sandp_return
0,"Agilent Technologies, Inc.",215.407785,0
1,"American Airlines Group, Inc.",492.976589,1
2,"Advance Auto Parts, Inc.",158.290852,0
3,"Apple, Inc.",641.441749,1
4,"AbbVie, Inc.",127.493713,0


In [34]:
sandp['predictions'] = predictions

In [35]:
sandp.head()

Unnamed: 0,name,percent_return_on_investment,above_below_sandp_return,predictions
0,"Agilent Technologies, Inc.",215.407785,0,1
1,"American Airlines Group, Inc.",492.976589,1,0
2,"Advance Auto Parts, Inc.",158.290852,0,1
3,"Apple, Inc.",641.441749,1,1
4,"AbbVie, Inc.",127.493713,0,1


In [36]:
sandp['predictions'].value_counts()

1    308
0    160
Name: predictions, dtype: int64

In [37]:
r = 0
predicted_positives = []
for (x,y) in zip(sandp['predictions'],sandp['percent_return_on_investment']):
    if x == 1:
        predicted_positives.append(y)
        if y > 228:
            r += 1
print(r)

126


In [38]:
predicted = pd.DataFrame(predicted_positives)

In [39]:
len(predicted)

308

In [40]:
r/len(predicted)

0.4090909090909091

In [41]:
for x in predicted[0]:
    if x < 0:
        print(x)

-8.70733249051834
-58.564988730277975
-12.470402525651156
-72.3336853220697
-20.884048686739256
-4.987234042553191
-6.376764562712964
-18.363772829622206
-29.984317825405128
-12.355526615660105


In [42]:
gain = predicted.sum()

In [43]:
gain 

0    79272.802492
dtype: float64

In [44]:
(gain - len(predicted))/len(predicted)

0    256.379229
dtype: float64

In [45]:
import joblib
filename = 'deep_learning_model.sav'
joblib.dump(deep_model, filename)

TypeError: can't pickle _thread._local objects