In [1]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from statsmodels.tsa.stattools import adfuller

# Step 1: Create or connect to the SQLite database (it will open if it already exists)
conn = sqlite3.connect('1_financial_data.db')  # Connect to the existing SQLite database

# Step 2: Load the data from the SQLite database into pandas DataFrames

# Load equities_data from the 'equities_data' table
equities_data = pd.read_sql('SELECT * FROM equities_data', conn)

# Load volume_threshold_options_data from the 'volume_threshold_options_data' table
volume_threshold_options_data = pd.read_sql('SELECT * FROM volume_threshold_options_data', conn)

# Load merged_data from the 'merged_data' table
merged_data = pd.read_sql('SELECT * FROM merged_data', conn)

# Step 3: Close the connection
conn.close()


# #INPUT DATA FROM 1_clean_download_data.ipynb

# # Define the dates
# start_year = 2020
# start_month = 1
# start_day = 1

# end_year = 2023
# end_month = 12
# end_day = 30

In [2]:
# Optional: Print the data to verify
print("Equities Data:")
print(equities_data.head(10))

Equities Data:
   permno     cusip                 date    bid    ask       vol   shrout  \
0   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
1   10200  75991610  2020-01-03 00:00:00  92.74  92.78  246742.0  52078.0   
2   10200  75991610  2020-01-06 00:00:00  93.89  93.90  380649.0  52078.0   
3   10200  75991610  2020-01-07 00:00:00  95.00  95.08  370624.0  52078.0   
4   10200  75991610  2020-01-08 00:00:00  95.13  95.14  259393.0  52078.0   
5   10200  75991610  2020-01-09 00:00:00  96.41  96.46  340540.0  52078.0   
6   10200  75991610  2020-01-10 00:00:00  97.89  97.90  384846.0  52078.0   
7   10200  75991610  2020-01-13 00:00:00  99.01  99.02  374095.0  52078.0   
8   10200  75991610  2020-01-14 00:00:00  99.37  99.40  346843.0  52078.0   
9   10200  75991610  2020-01-15 00:00:00  99.65  99.66  412594.0  52078.0   

      prc      mktcap  
0  93.780  4883874.84  
1  92.740  4829713.72  
2  93.895  4889863.81  
3  95.000  4947410.00  
4  95.130  495418

In [3]:
print("\nVolume Threshold Options Data:")
print(volume_threshold_options_data.head(10))


Volume Threshold Options Data:
                  date ticker      exdate cp_flag  strike_price  \
0  2020-01-02 00:00:00   ABEO  2020-01-17       C        2500.0   
1  2020-01-02 00:00:00   ABEO  2020-01-17       P        2500.0   
2  2020-01-02 00:00:00   ABEO  2020-01-17       C        5000.0   
3  2020-01-02 00:00:00   ABEO  2020-01-17       P       20000.0   
4  2020-01-02 00:00:00   ABEO  2020-02-21       P        2500.0   
5  2020-01-02 00:00:00   ABEO  2020-02-21       C        2500.0   
6  2020-01-02 00:00:00   ABEO  2020-02-21       C        5000.0   
7  2020-01-02 00:00:00   ABEO  2020-03-20       P        2500.0   
8  2020-01-02 00:00:00   ABEO  2020-03-20       C        2500.0   
9  2020-01-02 00:00:00   ABEO  2020-03-20       C        5000.0   

   strike_price_actual  best_bid  best_offer  volume  open_interest  \
0                 0.25      0.50        1.00    87.0         1661.0   
1                 0.25      0.05        0.15    16.0          760.0   
2                

In [4]:
print("\nMerged Data:")
print(merged_data.head(10))


Merged Data:
   permno     cusip                 date    bid    ask       vol   shrout  \
0   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
1   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
2   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
3   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
4   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
5   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
6   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
7   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
8   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   
9   10200  75991610  2020-01-02 00:00:00  93.72  93.78  313953.0  52078.0   

     prc      mktcap ticker  ... strike_price_actual best_bid  best_offer  \
0  93.78  4883874.84   RGEN  ...                 9.5     1.40

In [5]:
merged_data.date.unique()

array(['2020-01-02 00:00:00', '2020-01-03 00:00:00',
       '2020-01-06 00:00:00', '2020-01-07 00:00:00',
       '2020-01-08 00:00:00', '2020-01-09 00:00:00',
       '2020-01-10 00:00:00', '2020-01-13 00:00:00',
       '2020-01-14 00:00:00', '2020-01-15 00:00:00',
       '2020-01-16 00:00:00', '2020-01-17 00:00:00',
       '2020-01-21 00:00:00', '2020-01-22 00:00:00',
       '2020-01-23 00:00:00', '2020-01-24 00:00:00',
       '2020-01-27 00:00:00', '2020-01-28 00:00:00',
       '2020-01-29 00:00:00', '2020-01-30 00:00:00',
       '2020-01-31 00:00:00', '2020-02-03 00:00:00',
       '2020-02-04 00:00:00', '2020-02-05 00:00:00',
       '2020-02-06 00:00:00', '2020-02-07 00:00:00',
       '2020-02-10 00:00:00', '2020-02-11 00:00:00',
       '2020-02-12 00:00:00', '2020-02-13 00:00:00',
       '2020-02-14 00:00:00', '2020-02-18 00:00:00',
       '2020-02-19 00:00:00', '2020-02-20 00:00:00',
       '2020-02-21 00:00:00', '2020-02-24 00:00:00',
       '2020-02-25 00:00:00', '2020-02-26 00:0