<h3> Objectives</h3>
<ul>
<li>Determine if based on a range scale of dependent variables show discrete outcome of positive or negative.</li>
</ul>

In [1]:
# standard library
import sys, os, re
sys.path.append(os.path.abspath(os.path.join("../..", "src")))
import math

# pip packages
import numpy as np
import pandas as pd
import pickle as p
import matplotlib as mpl
import matplotlib.pyplot as plt

# ml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# local modules
import d02_processing as preprocessor

# matplotlib visualize in jupyter
%matplotlib inline

In [2]:
data = pd.read_csv(os.path.abspath(os.path.join("../..", "data")) + "/03_processed/01_df_trade_processed.csv")
data.head()

Unnamed: 0,trade_num,ticker,profit_loss,cap,time_duration,positions,day,week,date,volume,entry,exit,Market Cap,Short Float,Shs Float,Market Category
0,0,SURG,-0.38,40.39,110,2,1,45,2021-11-09,10,06:49:00,06:50:50,,,,unknown
1,1,SURG,-2.17,40.8,148,2,1,45,2021-11-09,10,06:42:55,06:43:33,,,,unknown
2,2,ATER,-0.19,7.98,67,2,1,45,2021-11-09,1,06:31:18,06:31:49,388.21M,35.60%,19.57M,small
3,3,PPSI,140.79,1702.95,449,9,0,45,2021-11-08,25,06:37:55,06:43:34,64.15M,0.51%,3.79M,micro
4,4,PPSI,-35.62,603.66,138,4,0,45,2021-11-08,50,06:31:57,06:32:21,64.15M,0.51%,3.79M,micro


In [3]:
# convert profit | loss to binary output
data["sign"]=np.where(data["profit_loss"]>0,1,0)
data.head()


Unnamed: 0,trade_num,ticker,profit_loss,cap,time_duration,positions,day,week,date,volume,entry,exit,Market Cap,Short Float,Shs Float,Market Category,sign
0,0,SURG,-0.38,40.39,110,2,1,45,2021-11-09,10,06:49:00,06:50:50,,,,unknown,0
1,1,SURG,-2.17,40.8,148,2,1,45,2021-11-09,10,06:42:55,06:43:33,,,,unknown,0
2,2,ATER,-0.19,7.98,67,2,1,45,2021-11-09,1,06:31:18,06:31:49,388.21M,35.60%,19.57M,small,0
3,3,PPSI,140.79,1702.95,449,9,0,45,2021-11-08,25,06:37:55,06:43:34,64.15M,0.51%,3.79M,micro,1
4,4,PPSI,-35.62,603.66,138,4,0,45,2021-11-08,50,06:31:57,06:32:21,64.15M,0.51%,3.79M,micro,0


In [4]:
# process special case '-' as NaN | convert string to numerical float
data['Shs Float'] = data['Shs Float'].astype(str)
data['Shs Float'] = data['Shs Float'].apply(lambda x: re.sub(r'^-$', str(np.NaN), x))
data['Shs Float'] = data['Shs Float'].replace(np.nan, 0)
data['Shs Float'] = data['Shs Float'].apply(preprocessor.process.value_to_float)

In [5]:
# training | testing data
X_train,X_test,y_train,y_test = train_test_split(data[['Shs Float']],data.sign,train_size=0.9,test_size=0.10)

In [6]:
# build model
model = LogisticRegression()
model.fit(X_train,y_train)

LogisticRegression()

In [7]:
# save the model to disk
filename = 'logistic_regression_model.sav'
p.dump(model, open(filename, 'wb'))

In [8]:
# test model with predict on test set
model.predict(X_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [9]:
# overall model score
model.score(X_test,y_test)

0.5686274509803921

In [10]:
# view model probability on decision making  
model.predict_proba(X_test)

array([[0.50132961, 0.49867039],
       [0.50394499, 0.49605501],
       [0.50046537, 0.49953463],
       [0.50092911, 0.49907089],
       [0.50013458, 0.49986542],
       [0.50059022, 0.49940978],
       [0.50349424, 0.49650576],
       [0.5       , 0.5       ],
       [0.50043618, 0.49956382],
       [0.50053023, 0.49946977],
       [0.50191172, 0.49808828],
       [0.50726536, 0.49273464],
       [0.50071832, 0.49928168],
       [0.50245329, 0.49754671],
       [0.50024484, 0.49975516],
       [0.5311731 , 0.4688269 ],
       [0.52307355, 0.47692645],
       [0.50119665, 0.49880335],
       [0.50266732, 0.49733268],
       [0.52774104, 0.47225896],
       [0.50343425, 0.49656575],
       [0.5011999 , 0.4988001 ],
       [0.50063886, 0.49936114],
       [0.50059022, 0.49940978],
       [0.50291215, 0.49708785],
       [0.5       , 0.5       ],
       [0.50064535, 0.49935465],
       [0.5008756 , 0.4991244 ],
       [0.50167337, 0.49832663],
       [0.50053023, 0.49946977],
       [0.