## Logistic Regression
$$ g\big(x\big) = \frac{1}{1 + e^{-(w.x + b)}} $$

In [122]:
import numpy as np
from modules.eda_functions import *
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

In [123]:
path = '../data/lags/'

In [124]:
all_data_as_dict = import_all_files_as_dict(path)

In [125]:
all_data_as_dict

{'ABEO':              Close   lag_1  label
 Date                             
 2017-08-22  196.25  196.25      0
 2017-08-23  218.75  196.25      0
 2017-08-24  240.00  218.75      0
 2017-08-25  226.25  240.00      1
 2017-08-28  237.50  226.25      0
 ...            ...     ...    ...
 2022-08-15    4.65    4.43      0
 2022-08-16    4.77    4.65      0
 2022-08-17    4.60    4.77      1
 2022-08-18    4.36    4.60      1
 2022-08-19    4.11    4.36      1
 
 [1258 rows x 3 columns],
 'ABIO':                 Close      lag_1  label
 Date                                   
 2017-08-22  18.900000  20.700001      1
 2017-08-23  19.799999  18.900000      0
 2017-08-24  20.700001  19.799999      0
 2017-08-25  21.240000  20.700001      0
 2017-08-28  19.799999  21.240000      1
 ...               ...        ...    ...
 2022-08-15   2.450000   2.470000      1
 2022-08-16   2.410000   2.450000      1
 2022-08-17   2.380000   2.410000      1
 2022-08-18   2.360000   2.380000      1
 2022-08-

In [126]:
np.random.seed(1)
num_iters = 1000
alpha = 5.0e-5
lambda_ = 0.7

models = {}

for company in all_data_as_dict.keys():
    X, y = all_data_as_dict[company][['Close', 'lag_1']], all_data_as_dict[company]['label']
    train_len = int(len(all_data_as_dict[company]) * 0.7)
    dev_len = int(len(all_data_as_dict[company]) * 0.15)
    X_train, y_train, X_dev, y_dev, X_test, y_test = np.array(X[:train_len]), np.array(y[:train_len]), np.array(X[train_len:(train_len + dev_len)]), np.array(y[train_len:(train_len + dev_len)]), np.array(X[(train_len + dev_len):]), np.array(y[(train_len + dev_len):])
    model = LogisticRegression(random_state=12)
    model.fit(X_train, y_train)
    y_hat = model.predict(X_dev)

    models[company] = {
        'model': model,
        'y_hat': y_hat,
        'X_dev': X_dev,
        'y_dev': y_dev,
        'X_test': X_test,
        'y_test': y_test
    }

In [127]:
costs = {}
for company in all_data_as_dict.keys():
    cost = models[company]['model'].score(models[company]['X_dev'], models[company]['y_hat'])
    costs[company] = cost

In [128]:
costs

{'ABEO': 1.0,
 'ABIO': 1.0,
 'ABUS': 1.0,
 'ACAD': 1.0,
 'ACER': 1.0,
 'ACHN': 1.0,
 'ACHV': 1.0,
 'ACIU': 1.0,
 'ACOR': 1.0,
 'ACRS': 1.0,
 'ACST': 1.0,
 'ADAP': 1.0,
 'ADIL': 1.0,
 'ADMA': 1.0,
 'ADVM': 1.0,
 'ADXS': 1.0,
 'AEZS': 1.0,
 'AFMD': 1.0,
 'AGE': 1.0,
 'AGEN': 1.0,
 'AGIO': 1.0,
 'AGLE': 1.0,
 'AGTC': 1.0,
 'AKBA': 1.0,
 'AKTX': 1.0,
 'ALBO': 1.0,
 'ALDX': 1.0,
 'ALKS': 1.0,
 'ALLK': 1.0,
 'ALLO': 1.0,
 'ALNA': 1.0,
 'ALNY': 1.0,
 'ALPN': 1.0,
 'ALRN': 1.0,
 'ALT': 1.0,
 'AMGN': 1.0,
 'AMPE': 1.0,
 'AMRN': 1.0,
 'ANAB': 1.0,
 'ANIK': 1.0,
 'ANIP': 1.0,
 'APLS': 1.0,
 'APM': 1.0,
 'APTO': 1.0,
 'APTX': 1.0,
 'APVO': 1.0,
 'AQB': 1.0,
 'AQST': 1.0,
 'ARAV': 1.0,
 'ARCT': 1.0,
 'ARDS': 1.0,
 'ARDX': 1.0,
 'ARGX': 1.0,
 'ARRY': 1.0,
 'ARVN': 1.0,
 'ARWR': 1.0,
 'ASLN': 1.0,
 'ASMB': 1.0,
 'ASND': 1.0,
 'ASNS': 1.0,
 'ATHX': 1.0,
 'ATNM': 1.0,
 'ATNX': 1.0,
 'ATRA': 1.0,
 'AUPH': 1.0,
 'AUTL': 1.0,
 'AVDL': 1.0,
 'AVEO': 1.0,
 'AVRO': 1.0,
 'AVXL': 1.0,
 'AXON': 1.0,
 'AXSM': 1

In [129]:
predictions = {}
for company in all_data_as_dict.keys():
    X_test = models[company]['X_test']
    y_hat = models[company]['model'].predict(X_test)
    predictions[company] = y_hat

In [130]:
for company in all_data_as_dict.keys():
    print(f"Confusion Matrix {company}:\n{confusion_matrix(models[company]['y_test'], predictions[company])}")

Confusion Matrix ABEO:
[[106   0]
 [  3  81]]
Confusion Matrix ABIO:
[[47 49]
 [ 0 94]]
Confusion Matrix ABUS:
[[91  3]
 [ 0 96]]
Confusion Matrix ACAD:
[[96  0]
 [ 1 93]]
Confusion Matrix ACER:
[[97  0]
 [ 0 93]]
Confusion Matrix ACHN:
[[32]]
Confusion Matrix ACHV:
[[87  0]
 [30 73]]
Confusion Matrix ACIU:
[[84  0]
 [14 92]]
Confusion Matrix ACOR:
[[88  0]
 [18 84]]
Confusion Matrix ACRS:
[[100   0]
 [  1  89]]
Confusion Matrix ACST:
[[98  0]
 [75 17]]
Confusion Matrix ADAP:
[[ 85   0]
 [  0 105]]
Confusion Matrix ADIL:
[[70  1]
 [ 1 82]]
Confusion Matrix ADMA:
[[66 46]
 [ 0 78]]
Confusion Matrix ADVM:
[[95  0]
 [43 52]]
Confusion Matrix ADXS:
[[ 73  11]
 [  0 106]]
Confusion Matrix AEZS:
[[108   0]
 [  6  76]]
Confusion Matrix AFMD:
[[91  0]
 [ 5 94]]
Confusion Matrix AGE:
[[24 46]
 [ 0 71]]
Confusion Matrix AGEN:
[[90  4]
 [ 0 96]]
Confusion Matrix AGIO:
[[ 83   0]
 [  0 107]]
Confusion Matrix AGLE:
[[81  0]
 [45 64]]
Confusion Matrix AGTC:
[[87  0]
 [27 76]]
Confusion Matrix AKBA:


In [131]:
for company in all_data_as_dict.keys():
    print(f"Report: {company}:\n{classification_report(models[company]['y_test'], predictions[company])}")

Report: ABEO:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       106
           1       1.00      0.96      0.98        84

    accuracy                           0.98       190
   macro avg       0.99      0.98      0.98       190
weighted avg       0.98      0.98      0.98       190

Report: ABIO:
              precision    recall  f1-score   support

           0       1.00      0.49      0.66        96
           1       0.66      1.00      0.79        94

    accuracy                           0.74       190
   macro avg       0.83      0.74      0.73       190
weighted avg       0.83      0.74      0.72       190

Report: ABUS:
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        94
           1       0.97      1.00      0.98        96

    accuracy                           0.98       190
   macro avg       0.98      0.98      0.98       190
weighted avg       0.98      0.9