In [1]:
# standard library
import os, sys
sys.path.append(os.path.abspath(os.path.join("../..", "src")))

# pip packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# local packages
import d01_utils as stats
import d02_processing as preprocessor
import d03_models as model
import d04_visualization as vis


<h3>Input Data</h3>

<p>The input data is an exported csv file from a broker that records the transactions of stock exchange on the US markets.</p>
<p>In this summary report, I will be using my personal data from the year of 2021 to current date as of writing this, June 30, 2021.</p>

In [23]:
orig_csv = pd.read_csv(os.path.abspath(os.path.join("../..", "data")) + "/01_raw/01_trade_activity.csv")
orig_csv.head()

Unnamed: 0.1,Unnamed: 0,Exec Time,Spread,Side,Qty,Pos Effect,Symbol,Exp,Strike,Type,Price,Net Price,Order Type
0,,6/30/21 07:39:40,STOCK,SELL,-1,TO CLOSE,CUEN,,,STOCK,8.42,8.42,MKT
1,,6/30/21 07:35:39,STOCK,BUY,1,TO OPEN,CUEN,,,STOCK,8.495,8.5,MKT
2,,6/30/21 06:59:24,STOCK,SELL,-1,TO CLOSE,CUEN,,,STOCK,8.1656,8.17,MKT
3,,6/30/21 06:58:17,STOCK,SELL,-10,TO CLOSE,CUEN,,,STOCK,8.37,8.37,LMT
4,,6/30/21 06:57:59,STOCK,BUY,10,TO OPEN,CUEN,,,STOCK,8.5579,8.558,LMT


<h3>Preprocessing Data</h3>

<p>Removing missing values, empty NaN columns, and unused data columns</p>
<p>Adding feature set (Net Position) to use for processing</p>


In [25]:
features_added = pd.read_csv(os.path.abspath(os.path.join("../..", "data")) + "/02_intermediate/01_df_trade_activity.csv")
features_added.head()


Unnamed: 0,Exec Time,Qty,Symbol,Type,Net Price,Net Position
0,2021-06-30 07:39:40,-1,CUEN,STOCK,8.42,-8.42
1,2021-06-30 07:35:39,1,CUEN,STOCK,8.5,8.5
2,2021-06-30 06:59:24,-1,CUEN,STOCK,8.17,-8.17
3,2021-06-30 06:58:17,-10,CUEN,STOCK,8.37,-83.7
4,2021-06-30 06:57:59,10,CUEN,STOCK,8.558,85.58


<h3>Process Data and Output Results</h3>

<p>Individual trades divided. Processing information: </p>
<ul>
<li>Company Name(ticker)</li>
<li>Cost of Trade</li>
<li>Quantity(number of shares) in the Trade</li>
<li>Duruation of the Trade</li>
<li>Net gain or net loss</li>
<li>Number of Entrys(+1 +2 +3...shares etc) </li>
<li>Number of Exits(-1,-2,-3...shares etc) </li>
</ul>


In [27]:
output = pd.read_csv(os.path.abspath(os.path.join("../..", "data")) + "/03_processed/01_df_trade_processed.csv")
output.head()

Unnamed: 0.1,Unnamed: 0,ticker,profit_loss,cap,time_duration,positions,entry_pos,exit_pos,entrys,exits
0,0,CUEN,-0.08,8.5,319,2,1,1,[Timestamp('2021-06-30 07:35:39')],[Timestamp('2021-06-30 07:39:40')]
1,1,CUEN,-1.96,93.83,136,4,2,2,"[Timestamp('2021-06-30 06:57:59'), Timestamp('...","[Timestamp('2021-06-30 06:59:24'), Timestamp('..."
2,2,CUEN,-0.2,43.45,72,2,1,1,[Timestamp('2021-06-30 06:45:28')],[Timestamp('2021-06-30 06:45:44')]
3,3,CUEN,3.6,84.0,55,2,1,1,[Timestamp('2021-06-30 06:40:15')],[Timestamp('2021-06-30 06:40:40')]
4,4,CUEN,0.73,47.18,111,3,2,1,"[Timestamp('2021-06-30 06:39:42'), Timestamp('...",[Timestamp('2021-06-30 06:40:09')]


<h3>Visualize Cummulative Profit and Losses</h3>

<p> X Axis = trade number = last number on the x axis is my most recent trade</p>
<p> Y Axis = net reflection of trading activity </p>

<img src="trend_plots_01.png"/>

<h3>Histograms</h3>

<img src="hist_01.png"/>

<h3>Linear Regression</h3>

<img src="linear_regression_scatter_plots_01.png"/>

<h3>Bar Charts</h3>

<img src="bar_charts_01.png"/>