# Detection from Thetadata (Trade Data)

This notebook uses historical options data from ThetaData to develop the Bulk Volume Classification (BVC) algorithm, which identifies information-based trading activity and infers trade aggressors.

In [2]:
import os
import time
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta, datetime, date
from thetadata import ThetaClient, OptionReqType, OptionRight, StockReqType, DateRange, DataType, TradeCondition, Exchange

ModuleNotFoundError: No module named 'thetadata'

# Detection from Thetadata (Trade Data)

This notebook uses historical options data from ThetaData to develop the Bulk Volume Classification (BVC) algorithm, which identifies information-based trading activity and infers trade aggressors.

In [None]:
import os
import time
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta, datetime, date
from thetadata import ThetaClient, OptionReqType, OptionRight, StockReqType, DateRange, DataType, TradeCondition, Exchange

ModuleNotFoundError: No module named 'thetadata'

### Function to Get Expirations

In [None]:
def get_expirations(ticker) -> pd.DataFrame:
    """Request expirations dates from a some of the options root"""
    client = ThetaClient(username=os.environ['thetadata_username'], passwd=os.environ['thetadata_password'], jvm_mem=4, timeout=15)
    with client.connect():
        data = client.get_expirations(root=ticker)
    return data

### Function to Get Strikes

In [None]:
def get_strikes(ticker, expiration_dates) -> dict:
    """Request strikes prices from a some of the option contract"""
    client = ThetaClient(username=os.environ['thetadata_username'], passwd=os.environ['thetadata_password'], jvm_mem=4, timeout=15)
    all_strikes = {}
    with client.connect():
        for exp_date in expiration_dates:
            data = client.get_strikes(root=ticker, exp=exp_date)
            all_strikes[exp_date] = pd.to_numeric(data)
    return all_strikes

### Function to Get Option Trades

In [None]:
def get_option_trades(ticker, exp_date, strikes, start_date, end_date, interval_size=0, opt_type=OptionRight.CALL) -> pd.DataFrame:
    """Request trades for some of the contract's across a multiple of the strikes"""
    client = ThetaClient(username=os.environ['thetadata_username'], passwd=os.environ['thetadata_password'], jvm_mem=12, timeout=30)
    datas = {}
    with client.connect():
        for strike in strikes:
            try:
                data = client.get_hist_option(
                    req=OptionReqType.TRADE_QUOTE,
                    root=ticker,
                    exp=exp_date,
                    strike=strike,
                    right=opt_type,
                    date_range=DateRange(start_date, end_date),
                    progress_bar=False,
                    interval_size=interval_size
                )
                datas[strike] = data
            except Exception as e:
                datas[strike] = np.nan
                print(e)
    return datas

### Function to Process Trade Data

In [None]:
def process_trade_data(trades_df):
    """Preprocess the thetadata to have columns and process the conditions."""
    contract_size = 100
    trades_df['DATETIME'] = trades_df[DataType.DATE] + pd.TimedeltaIndex(trades_df[DataType.MS_OF_DAY], unit='ms')
    trades_df['Value'] = trades_df.apply(lambda row: row[DataType.PRICE] * row[DataType.SIZE] * contract_size, axis=1)
    trades_df['Volume_Order'] = trades_df[DataType.SIZE].rank(ascending=True, method='first')
    trades_df.set_index('DATETIME', inplace=True)
    trades_df.sort_index()

    trade_cond = {x.value: x.name for x in TradeCondition}
    exchanges = {x.value[0]: x.name for x in Exchange}

    trades_df['Condition'] = trades_df.apply(lambda row: trade_cond[row[DataType.CONDITION]] if row[DataType.CONDITION] in trade_cond.keys() else trade_cond[-row[DataType.CONDITION]], axis=1)
    trades_df['BID_EXCHANGE'] = trades_df.apply(lambda row: exchanges[row[DataType.BID_EXCHANGE]] if row[DataType.BID_EXCHANGE] in exchanges.keys() else exchanges[-row[DataType.BID_EXCHANGE]], axis=1)
    trades_df['ASK_EXCHANGE'] = trades_df.apply(lambda row: exchanges[row[DataType.ASK_EXCHANGE]] if row[DataType.ASK_EXCHANGE] in exchanges.keys() else exchanges[-row[DataType.ASK_EXCHANGE]], axis=1)
    
    trades_df = trades_df.drop(columns=[DataType.MS_OF_DAY, DataType.MS_OF_DAY2, DataType.SEQUENCE, DataType.CONDITION, DataType.BID_EXCHANGE, DataType.ASK_EXCHANGE])
    return trades_df

### Function to Get Expirations

In [None]:
def get_expirations(ticker) -> pd.DataFrame:
    """Request expirations dates from a some of the options root"""
    client = ThetaClient(username=os.environ['thetadata_username'], passwd=os.environ['thetadata_password'], jvm_mem=4, timeout=15)
    with client.connect():
        data = client.get_expirations(root=ticker)
    return data

### Function to Get Strikes

In [None]:
def get_strikes(ticker, expiration_dates) -> dict:
    """Request strikes prices from a some of the option contract"""
    client = ThetaClient(username=os.environ['thetadata_username'], passwd=os.environ['thetadata_password'], jvm_mem=4, timeout=15)
    all_strikes = {}
    with client.connect():
        for exp_date in expiration_dates:
            data = client.get_strikes(root=ticker, exp=exp_date)
            all_strikes[exp_date] = pd.to_numeric(data)
    return all_strikes

### Function to Get Option Trades

In [None]:
def get_option_trades(ticker, exp_date, strikes, start_date, end_date, interval_size=0, opt_type=OptionRight.CALL) -> pd.DataFrame:
    """Request trades for some of the contract's across a multiple of the strikes"""
    client = ThetaClient(username=os.environ['thetadata_username'], passwd=os.environ['thetadata_password'], jvm_mem=12, timeout=30)
    datas = {}
    with client.connect():
        for strike in strikes:
            try:
                data = client.get_hist_option(
                    req=OptionReqType.TRADE_QUOTE,
                    root=ticker,
                    exp=exp_date,
                    strike=strike,
                    right=opt_type,
                    date_range=DateRange(start_date, end_date),
                    progress_bar=False,
                    interval_size=interval_size
                )
                datas[strike] = data
            except Exception as e:
                datas[strike] = np.nan
                print(e)
    return datas

### Function to Process Trade Data

In [None]:
def process_trade_data(trades_df):
    """Preprocess the thetadata to have columns and process the conditions."""
    contract_size = 100
    trades_df['DATETIME'] = trades_df[DataType.DATE] + pd.TimedeltaIndex(trades_df[DataType.MS_OF_DAY], unit='ms')
    trades_df['Value'] = trades_df.apply(lambda row: row[DataType.PRICE] * row[DataType.SIZE] * contract_size, axis=1)
    trades_df['Volume_Order'] = trades_df[DataType.SIZE].rank(ascending=True, method='first')
    trades_df.set_index('DATETIME', inplace=True)
    trades_df.sort_index()

    trade_cond = {x.value: x.name for x in TradeCondition}
    exchanges = {x.value[0]: x.name for x in Exchange}

    trades_df['Condition'] = trades_df.apply(lambda row: trade_cond[row[DataType.CONDITION]] if row[DataType.CONDITION] in trade_cond.keys() else trade_cond[-row[DataType.CONDITION]], axis=1)
    trades_df['BID_EXCHANGE'] = trades_df.apply(lambda row: exchanges[row[DataType.BID_EXCHANGE]] if row[DataType.BID_EXCHANGE] in exchanges.keys() else exchanges[-row[DataType.BID_EXCHANGE]], axis=1)
    trades_df['ASK_EXCHANGE'] = trades_df.apply(lambda row: exchanges[row[DataType.ASK_EXCHANGE]] if row[DataType.ASK_EXCHANGE] in exchanges.keys() else exchanges[-row[DataType.ASK_EXCHANGE]], axis=1)
    
    trades_df = trades_df.drop(columns=[DataType.MS_OF_DAY, DataType.MS_OF_DAY2, DataType.SEQUENCE, DataType.CONDITION, DataType.BID_EXCHANGE, DataType.ASK_EXCHANGE])
    return trades_df