# Explore signal/price movement relationship

This is an example notebook to explore whether a trading signal results to profitable trades.

- 3 days 
- It explores a small set of pairs on Uni v3 on Polygon
- Both long and short are considered (though shorts might be theoretical only, if such a lending market doest not exist in the point of time)

First run to prepare the dataset:

```shell
python scripts/prepare-polygon-momentum-candles.py
```

In [None]:
import pandas as pd

from tradingstrategy.client import Client
from tradingstrategy.chain import ChainId
from tradingstrategy.pair import PandasPairUniverse
from tradingstrategy.timebucket import TimeBucket
from tradeexecutor.utils.default_strategies import get_default_strategies_path
from tradeexecutor.strategy.execution_context import notebook_execution_context
from tradeexecutor.utils.default_strategies import get_default_strategies_path
from tradeexecutor.strategy.strategy_module import read_strategy_module
from tradeexecutor.strategy.trading_strategy_universe import TradingStrategyUniverse, load_partial_data, load_trading_and_lending_data
from tradeexecutor.strategy.universe_model import UniverseOptions

client = Client.create_jupyter_client()

# Load preprocessed candle dataset
# See fetch-binance-candles.py   
time_bucket = TimeBucket.d1
fpath = f"/tmp/binance-candles-{time_bucket.value}.parquet"
all_candles_df = pd.read_parquet(fpath)

interesting_pairs = {
     "ETHUSDT",
     "BTCUSDT",
     "LINKUSDT",
     "MATICUSDT",
     "AAVEUSDT",
     "COMPUSDT",
     "MKRUSDT",
     "BNBUSDT",
     "AVAXUSDT",
     "CAKEUSDT",
     "SNXUSDT",
     "CRVUSDT",
}

all_candles_df = all_candles_df[all_candles_df["pair_id"].isin(interesting_pairs)]
pair_ids = all_candles_df["pair_id"].unique()


### Interactive viewer mode

Set Plotly chart output mode to interactive viewing.

In [None]:
from tradeexecutor.utils.notebook import OutputMode, setup_charting_and_output

# setup_charting_and_output(OutputMode.interactive, max_rows=35, height=1000)
setup_charting_and_output(
    OutputMode.static, 
    image_format="png",
    width=1500,
    height=1200,
    max_rows=30,    
    #min_rows=10,
)

#pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.options.display.float_format = '{:,.2f}'.format


## Filter trading pairs

In [None]:
from tradingstrategy.pair import filter_for_stablecoins, StablecoinFilteringMode

candles_df = all_candles_df

print(f"We have {len(pair_ids)} tradeable pairs")
print(f"We have {len(candles_df):,} candles for tradeable pairs, out of {len(all_candles_df):,} total candles")


## Explore signal vs price change

Create a function `calculate_signal_vs_profit` which calculates 
- Signal (naive momentum)
- Profit: the last and the best future price we can get
- Allows us to play around with different time windows
- Split between shorts and longs


In [None]:
from pandas.tseries.frequencies import to_offset
import humanize

# Set up parameters of time windows
# we use for this notebook run
#lookback_window = pd.Timedelta(days=2)
#profit_window = pd.Timedelta(days=2)

lookback_window = pd.Timedelta(days=7)
profit_window = pd.Timedelta(days=7)

long_lookback_window = lookback_window * 4
daily_volume_threshold = 10_000

# short - long EMA pairs
short_long_ema_pairs = [
    (8, 24),
    (16, 48),
    (32, 96),
]

# Normalise EMA diff to 90 days/90 hours
ema_diff_short_normalisation_period = 63
ema_diff_long_normalisation_period =  252

min_entries = 14

min_age = pd.DateOffset(days=20)

zero_signal_cut_off = 0.0001

quantile_sanity_threshold = 0.9995

# signal_source = "momentum"
signal_source = "weighted_ema"

apply_filter_by_signal = False
apply_filter_by_profit = False
apply_response_function = True

# Use later in chart titles
signal_window_label = humanize.naturaldelta(lookback_window)
profit_window_label = humanize.naturaldelta(profit_window)


Create a function `calculate_signal_vs_price_for_pair` which calculates 
- Calculates the signal vs. for certain trading pair
- Bundle few different pairs to the same `DataFrame` so we can examine them together


In [None]:
from math import e, sqrt
import numpy as np
from pandas.core.groupby import DataFrameGroupBy
from pandas_ta import ema, pvt, obv


def calculate_ema_signal(df: pd.DataFrame):
    """Calculate different EMA, their diffs as a single signal

    The signal is based on the idea that when short moving average or price
    is moving faster than the long moving average of the price,
    the asset has momentu.

    - We calculate three different long EMA and short EMA differences
    - We normalise these difference over the price development of some duration
    - Each normalised difference is mapped to -1 ... 1
    - Signal is equally weighted sum of all SMA diffs

    Sources:

    - Momentum and trend following trading strategies for currencies and bitcoin
      by Janick Rohrbach, Silvan Suremann, Joerg Osterriede]

    - Dissecting Investment Strategies in the Cross Section and Time Series
      by Jamil Baza, Nick Grangerb, Campbell R. Harveyc, Nicolas Le Rouxd and Sandy Rattraye

    :return:
        DataFrame with added columns.

        Columns: signal, ema_signal_1, ema_long_1, ema_short_1, ema_diff_1...
        
    """

    # Did we manage to calculate all long/short EMA pairs for this pair
    # or is the data duration too short / not enough data
    enough_data = True

    for idx, ema_tuple in enumerate(short_long_ema_pairs, start=1):
        short_ema, long_ema = ema_tuple
        assert short_ema < long_ema
        df[f"ema_long_{idx}"] = ema(df["close"], length=long_ema) 
        df[f"ema_short_{idx}"] = ema(df["close"], length=short_ema)
        df[f"ema_diff_{idx}"] = df[f"ema_short_{idx}"] - df[f"ema_long_{idx}"]

        # Normalise EMA diff with 90 candles moving standard deviation        
        df[f"ema_diff_normalised_{idx}"] = df[f"ema_diff_{idx}"] / df[f"ema_diff_{idx}"].rolling(ema_diff_short_normalisation_period).std()
        # Normalise the normalised EMA diff with 280 candles moving standard deviation
        df[f"ema_diff_double_normalised_{idx}"] = df[f"ema_diff_normalised_{idx}"] / df[f"ema_diff_normalised_{idx}"].rolling(ema_diff_long_normalisation_period).std()

        # Apply response function to normalise signal on -1 ... +1 range
        if apply_response_function:
            # df[f"ema_diff_double_normalised_{idx}"] = df[f"ema_diff_normalised_{idx}"]
            # x exp(-x^2 / 4)
            # 0.858
            x = df[f"ema_diff_double_normalised_{idx}"]
            denominator = sqrt(2) * e**(-0.5)
            exponent = (x**2) / -4

            if not pd.isnull(exponent).all():
                exponented = np.exp(exponent)
                ranged_response = df[f"ema_signal_{idx}"] = df[f"ema_diff_double_normalised_{idx}"] * exponented / denominator
            else:
                # Could not calculate any of the exponents because all values in the series are NaN
                enough_data = False
                break

            assert ranged_response.max() < 1.1
            assert ranged_response.min() > -1.1

        else:
            # Pass normalised EMA diff as is
            df[f"ema_signal_{idx}"] = df[f"ema_diff_double_normalised_{idx}"]

    if enough_data:
        df["signal"] = 0
        # We could calculate partial results for all EMA pairs        
        for idx, ema_tuple in enumerate(short_long_ema_pairs, start=1):
            df["signal"] += df[f"ema_signal_{idx}"] 
        df["signal"] = df["signal"] / len(short_long_ema_pairs)
    else:
        df["signal"] = pd.NA

    # Trading day needs to use signal calculated from the previous day's data
    df["signal"] = df["signal"].shift(1)

    return df    


def calculate_signal_vs_profit(
    df: pd.DataFrame, 
    pair_id: str,
    momentum_window: pd.Timedelta, 
    profit_window: pd.Timedelta,
    time_frame: pd.Timedelta,        
) -> pd.DataFrame:
    """Calculate signals and profits for all incoming candles."""

    number_of_look_back_candles = lookback_window / time_frame
    number_of_look_forward_candles = profit_window / time_frame
    assert number_of_look_forward_candles > 0 and number_of_look_forward_candles.is_integer(), f"Could not calculate candle count that fits into profit window {profit_window} for data time frame {time_frame}"
    assert number_of_look_back_candles > 0 and number_of_look_forward_candles.is_integer(), f"Could not calculate candle count that fits into profit window {lookback_window} for data time frame {time_frame}"
    number_of_look_forward_candles = int(number_of_look_forward_candles)
    number_of_look_back_candles = int(number_of_look_back_candles)

    # Create entries for past price to be used for signal
    # and future price (used for the price correlation)
    momentum_offset = to_offset(lookback_window)
    profit_offset = to_offset(profit_window)

    # No data left after filtering
    if len(df.index) == 0:
        return pd.DataFrame()

    # Calculate trading pair age in a column
    start = df.index[0]

    # Remove first N days of trading history to filter out scam pump and dumps
    df = df.loc[df.index > start + min_age].copy()

    # No data left after filtering
    if len(df) < number_of_look_back_candles:
        return pd.DataFrame()
    
    df["age"] = df.index - start

    # Fix missing prices
    df["open"] = df["open"].replace(0, np.NaN)

    df["prev"] = df["open"].shift(number_of_look_back_candles)
    df["next"] = df["open"].shift(-number_of_look_forward_candles)

    # What is our predicted price
    df["price_diff"] = (df["next"] - df["open"]) / df["open"]  # Get the profit on the final day of profit window

    # Calculate signal from the past and price difference to the future
    df["momentum"] = (df["prev"] - df["open"]) / df["open"]

    #df["shifted_close"] = df["volume"].shift(1).rolling(obv_len).sum()
    #shifted_close = df.rolling(obv_len)
    #assert shifted_close["close"] is not None
    #assert shifted_close["volume"] is not None
    #import ipdb ; ipdb.set_trace()
    df["pvt"] = pvt(df["close"], df["volume"])
    df["obv"] = obv(df["close"], df["volume"])

    #df["obv"] = obv(shifted_close["close"], shifted_close["volume"])
    #shifted = shifted.iloc[-obv_len:-1]
    # df["obv"] = obv(shifted["close"], shifted["volume"])

    # Drop any momentum value that seems to be incorrect (more than 99% price movement)
    df["momentum"] = np.where(df["momentum"] > 0.99, 0, df["momentum"])
    df["momentum"] = np.where(df["momentum"] < -0.99, 0, df["momentum"])
    
    # df.loc[df["bullish"] & (df["momentum"] >= 0), "signal"] = df["momentum"]
    # df.loc[df["bearish"] & (df["momentum"] < 0), "signal"] = df["momentum"]    
    df["rolling_cum_volume"] = df["volume"].rolling(window=long_lookback_window).sum() 
    df["rolling_obv"] = df["obv"] - df["obv"].shift(periods=number_of_look_back_candles)
    df["rolling_pvt"] = df["pvt"] - df["pvt"].shift(periods=number_of_look_back_candles)
    # df["signal"] = df["rolling_pvt"].shift(1) / df["rolling_cum_volume"].shift(1)
    #df["signal"] = df["rolling_obv"].shift(1) / df["rolling_cum_volume"].shift(1)

    if signal_source == "weighted_ema":
        df = calculate_ema_signal(df)    
    elif signal_source == "momentum":
        df["signal"] = df["momentum"]
    else:
        raise RuntimeError(f"Figure out {signal_source}")
    
    # On negative signals, we go short.
    # On zero signal and lack of data set side to NA
    df["side"] = pd.NA
    
    df.loc[df["signal"] > zero_signal_cut_off, "side"] = "long"
    df.loc[df["signal"] < -zero_signal_cut_off, "side"] = "short"

    # Max and min price wihtin the profit window will determine the profit for longs and shorts respective
    df["max_future_price"] = df["close"].rolling(number_of_look_forward_candles).max().shift(-number_of_look_forward_candles) # Get the max profit on the profit window, assuming take profit %
    df["min_future_price"] = df["close"].rolling(number_of_look_forward_candles).min().shift(-number_of_look_forward_candles) # Get the max profit on the profit window, assuming take profit %    
    
    df["profit"] = df["price_diff"]
    df["profit_max"] = df["profit"]
    df["profit_abs"] = df["profit_max"].abs()
    # Calculate profit separately for longs and shorts
    # using Pandas Mask
    # https://stackoverflow.com/a/33770421/315168
    #
    # We calculate both profit after X time,
    # and also max take profit, assuming
    # we could do a perfect trailing stop loss
    #
    #longs = (df["side"] == "long")
    #shorts = (df["side"] == "short")
    #df.loc[longs, "profit"] = df["price_diff"]
    #df.loc[shorts, "profit"] = -df["price_diff"]
    #df.loc[longs, "profit_max"] = (df["max_future_price"] - df["open"]) / df["open"]  # Get the profit based on max price
    #df.loc[shorts, "profit_max"] = -(df["min_future_price"] - df["open"]) / df["open"]  # Get the profit based on max price

    #df.loc[longs, "desc"] = df.agg('{0[ticker]} long'.format, axis=1)
    #df.loc[shorts, "desc"] = df.agg('{0[ticker]} short'.format, axis=1)

    df["profit"] = df["profit"].fillna(0)
    df["profit_max"] = df["profit_max"].fillna(0)

    # On too low trading volume we zero out signal
    candle_volume_threshold = daily_volume_threshold * (time_frame / pd.Timedelta(days=1))
    volume_threshold_exceeded = df["volume"] >= candle_volume_threshold
    df["signal"] = np.where(volume_threshold_exceeded, df["signal"], np.NaN)
    df["profit"] = np.where(volume_threshold_exceeded, df["profit"], np.NaN)
    df["profit_max"] = np.where(volume_threshold_exceeded, df["profit_max"], np.NaN)
    
    return df


def calculate_signal_vs_price_for_pair(
    grouped_candles: DataFrameGroupBy,
    pair_id: str
) -> pd.DataFrame:
    """Calculate signal vs. profit ratio for an individual pair."""
    try:
        df = grouped_candles.get_group(pair_id).copy()
    except KeyError:
        # Scam pairs 
        return pd.DataFrame()
        
    df = calculate_signal_vs_profit(
        df,
        pair_id,
        lookback_window,
        profit_window,
        time_frame=time_bucket.to_pandas_timedelta(),
    )
    return df

print("Calculating signals")
grouped_candles = candles_df.groupby("pair_id")
per_pair_data = [calculate_signal_vs_price_for_pair(grouped_candles, pair) for pair in pair_ids]

valid_pairs = []
zero_signal_pairs = 0
for pair_data_df in per_pair_data:
    if not pair_data_df.empty and (pair_data_df["signal"].replace(pd.NA, 0).abs() >= zero_signal_cut_off).any():
        valid_pairs.append(pair_data_df)        
    else:
        zero_signal_pairs += 1
        

print(f"Pairs with valid signal data {len(valid_pairs):,}")
print(f"Pairs with zero signal: {zero_signal_pairs:,}")

df = pd.concat(valid_pairs)    

# Fix column order for table rendering
df.insert(0, 'signal', df.pop('signal'))
df.insert(0, 'profit', df.pop('profit'))

print(f"Total signal samples {len(df):,}")



### Display sample content for single trading pair

In [None]:
# Display sample content for one pair
# Pick single pair to examine
# The first pair is ETH-USD
#examined_pair = pair_universe.get_pair_by_human_description((ChainId.polygon, "uniswap-v3", "WMATIC", "USDC", 0.0005))
examined_pair = "BTCUSDT"
single_df = df.loc[df["pair_id"] == examined_pair]

with pd.option_context('display.min_rows', 10), pd.option_context('display.max_rows', 10):
    display(single_df)


### Outlier filter

Get rid of abnormally large values caused on scam tokens and other issues in the input data.

In [None]:
# Store for later
unfiltered_df = df

# Run outlier filter
#
# Use separate quantile for high and low because low min is -1.0 lose your all money when the token goes to zero in rug pull
#
# https://stackoverflow.com/questions/23199796/detect-and-exclude-outliers-in-a-pandas-dataframe
if apply_filter_by_profit:
    q_hi  = df["profit"].quantile(quantile_sanity_threshold)
    q_low = df["profit"].quantile(1 - quantile_sanity_threshold)
    df = df[(df["profit"] < q_hi) & (df["profit"] > q_low)]

# print(f"Max profit high filtering {quantile_sanity_threshold}, quantile high is {q_hi},  quantile low is {q_low}")
#df = df[df["profit_abs"] < q_hi]

if apply_filter_by_signal:
    q_hi  = df["signal"].quantile(quantile_sanity_threshold)
    q_low = df["signal"].quantile(1 - quantile_sanity_threshold)
    df_filtered = df[(df["signal"] < q_hi) & (df["signal"] > q_low)]

#print(f"Max signal high filtering {quantile_sanity_threshold} quantile is {q_hi}")
#df = df[df["signal"].abs() < q_hi]

#price_df = df.sort_values('price_diff', ascending=False).drop_duplicates("timestamp")
#display(price_df)

### Best signal picker


In [None]:
# Find maximum momentum df
profit_df = df.copy()
#profit_df.index = pd.MultiIndex.from_arrays([df.index, df["pair"]])
profit_df["signal"] = profit_df["signal"].fillna(0).replace([np.inf, -np.inf], 0)
profit_df["profit"] = profit_df["profit"].fillna(0).replace([np.inf, -np.inf], 0)
profit_df["profit_max"] = profit_df["profit_max"].fillna(0).replace([np.inf, -np.inf], 0)
profit_df["timestamp"] = profit_df.index
#display(profit_df)

#max_df = df.loc[df.groupby("timestamp")['signal'].idxmax()]
# max_df = df.groupby("timestamp")["profit"].max()
max_df = profit_df.sort_values('signal', key=abs, ascending=False).drop_duplicates("timestamp")
# max_df["signal"] = profit_df["signal"].fillna(0)

max_df = max_df.sort_values("signal", ascending=False)
print(f"Best picked results {len(max_df):,}")
# display(max_df)

#quantile_sanity_threshold = 0.90
#q_hi  = max_df["profit_abs"].quantile(quantile_sanity_threshold)
#print(f"Best profit high filtering {quantile_sanity_threshold} quantile is {q_hi}")
#max_df = max_df[max_df["profit_abs"] < q_hi]
#q_hi  = max_df["signal"].abs().quantile(quantile_sanity_threshold)
#print(f"Best signal high filtering {quantile_sanity_threshold} quantile is {q_hi}")
#max_df = max_df[max_df["signal"].abs() < q_hi]
#print(f"Best picked results, filtered {len(max_df):,}")

display(max_df)

## Quick stats

In [None]:
longs = df[df["side"] == "long"]
long_correct = df.loc[(df["side"] == "long") & (df["profit"] > 0)]
shorts = df[df["side"] == "short"]
short_correct = df.loc[(df["side"] == "short") & (df["profit"] < 0)]

print(f"Longs {len(longs):,} win rate {len(long_correct) / len(longs) * 100:.2f} %")
print(f"Shorts {len(shorts):,} win rate {len(short_correct) / len(shorts) * 100:.2f} %")

longs = max_df[max_df["side"] == "long"]
long_correct = max_df.loc[(max_df["side"] == "long") & (max_df["profit"] > 0)]
shorts = max_df[max_df["side"] == "short"]
short_correct = max_df.loc[(max_df["side"] == "short") & (max_df["profit"] < 0)]

print(f"Best pick longs {len(longs):,} win rate {len(long_correct) / len(longs) * 100:.2f} %")
print(f"Best pick shorts {len(shorts):,} win rate {len(short_correct) / len(shorts) * 100:.2f} %")

# Example trading pair chart

Plot with our indicators



In [None]:
import plotly.express as px
import plotly.graph_objects as go

from tradingstrategy.charting.candle_chart import visualise_ohlcv

print(f"Visualising {len(single_df)} candles")

fig = visualise_ohlcv(
    single_df,
    num_detached_indicators=3,
    subplot_names=["Price", "Signal", "Momentum", "PVT"],
)

fig.add_trace(
    go.Scatter(name="Signal", x=single_df.index, y=single_df["signal"]),
    row=2,
    col=1,
)

fig.add_trace(
    go.Scatter(name="Momentum", x=single_df.index, y=single_df["momentum"]),
    row=3,
    col=1,
)
fig.add_trace(
    go.Scatter(name="PVT", x=single_df.index, y=single_df["pvt"]),
    row=4,
    col=1,
)

fig.show()


## Examine signal

### EMA indicator correlation

See "Table 2: Correlation between the xk" in Momentum and trend following trading strategies for currencies and bitcoin.

In [None]:
diff_df = pd.DataFrame()
for idx in range(len(short_long_ema_pairs)):
    diff_df[f"ema_diff_{idx+1}"] = df[f"ema_diff_{idx+1}"]
    
correlation_matrix = diff_df.corr()
display(correlation_matrix)

### Signal visualisation

In [None]:
import plotly.express as px
import plotly.graph_objects as go

from tradingstrategy.charting.candle_chart import visualise_ohlcv

fig = visualise_ohlcv(
    single_df,
    num_detached_indicators=3,
    subplot_names=["Price", "Signal", "EMA diffs", "Normalised EMA diffs"],
)

fig.add_trace(
    go.Scatter(name=f"Signal", x=single_df.index, y=single_df[f"signal"]),
    row=2,
    col=1,
)

# Plot all EMA diffs in a single chart
for idx, pair in enumerate(short_long_ema_pairs, start=1):
    short_ema, long_ema = pair
    fig.add_trace(
        go.Scatter(name=f"EMA diff #{idx} short:{short_ema} long:{long_ema}", x=single_df.index, y=single_df[f"ema_diff_{idx}"]),
        row=3,
        col=1,
    )

# Plot all EMA diffs in a single chart
for idx, _ in enumerate(short_long_ema_pairs, start=1):
    fig.add_trace(
        go.Scatter(name=f"Normalised EMA diff #{idx}", x=single_df.index, y=single_df[f"ema_diff_normalised_{idx}"]),
        row=4,
        col=1,
    )


fig.show()


## Signal profitability

Different scatter charts to examine if there is a correlation between the signal and the profit.

### Best signal profitability

- For each time frame, pick the strongest signal

In [None]:

signal_threshold = 0.20

fig = px.scatter(
    max_df, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    #trendline="lowess",
    title=f"Best signal per {signal_window_label}, profit window {profit_window_label}, EMA count {len(short_long_ema_pairs)}",
    trendline_color_override="black",
    trendline_scope="overall",
    # color="pair_id",
    #marginal_x="histogram", 
    #marginal_y="rug"    
)
fig.update_layout(legend=dict(title='Pair and direction'))

results = px.get_trendline_results(fig)
res = results.to_dict()["px_fit_results"][0]
print(res.summary())
fig.show()

fig = px.scatter(
    max_df, 
    x="signal", 
    y="profit", 
    #trendline="ols", 
    trendline="lowess",
    title=f"Best signal per {signal_window_label}, LOWESS fitted",
    trendline_color_override="black",
    trendline_scope="overall",
    # color="pair_id",
    #marginal_x="histogram", 
    #marginal_y="rug"    
)
fig.update_layout(legend=dict(title='Pair and direction'))
fig.show()

filtered_max_df = max_df[max_df["signal"].abs() >= signal_threshold]
fig = px.scatter(
    filtered_max_df, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    #trendline="lowess",
    # color="pair_id",
    title=f"Best signal per  {signal_window_label}, profit window {profit_window_label}, filtered for {signal_threshold}",
    trendline_color_override="black",
    trendline_scope="overall",
    #marginal_x="histogram", 
    #marginal_y="rug"    
)
fig.update_layout(legend=dict(title='Pair and direction'))

fig.show()


In [None]:

#display(df)
#display(max_df)

fig = px.scatter(
    df, 
    x="signal", 
    y="profit_max", 
    trendline="ols", 
    # color="desc", 
    title=f"Max potential take profit, signal {signal_window_label}, profit window {profit_window_label}",
    #marginal_x="histogram", 
    #marginal_y="rug"
    trendline_color_override="black"
)
fig.update_layout(legend=dict(title='Pair and direction'))
fig.show()

fig = px.scatter(
    df, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    title=f"Profit last day, signal {signal_window_label}, profit window {profit_window_label}",
    trendline_color_override="black"
    #marginal_x="histogram", 
    #marginal_y="rug"    
)
fig.update_layout(legend=dict(title='Pair and direction'))
fig.show()



### Long only signals

Examine trade types separately to better see if one side is clearly superior.

In [None]:
df_long_only = df.loc[df["side"] == "long"]
max_long_only_df = max_df.loc[max_df["side"] == "long"]

fig = px.scatter(
    df_long_only, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    # color="desc", 
    trendline_color_override="black", 
    title=f"Long only, signal {signal_window_label}, profit window {profit_window_label}",

)
results = px.get_trendline_results(fig)
res = results.to_dict()["px_fit_results"][0]
print(res.summary())
fig.show()
fig.show()

fig = px.scatter(
    max_long_only_df, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    # color="desc", 
    trendline_color_override="black", 
    title=f"Best long only {signal_window_label}, profit window {profit_window_label}",

)
results = px.get_trendline_results(fig)
res = results.to_dict()["px_fit_results"][0]
print(res.summary())
fig.show()
fig.show()

fig = px.scatter(
    df_long_only, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    trendline_color_override="black", 
    title=f"Long only profit last day, signal {signal_window_label}, profit window {profit_window_label}"
)
fig.show()


fig = px.scatter(
    df, 
    x="signal", 
    y="profit_max", 
    trendline="ols", 
    # color="desc", 
    title=f"Max potential take profit, signal {signal_window_label}, profit window {profit_window_label}",
    #marginal_x="histogram", 
    #marginal_y="rug"
    trendline_color_override="black"
)
fig.update_layout(legend=dict(title='Pair and direction'))
fig.show()


threshold = 0.075
filtered_df = df_long_only.loc[abs(df_long_only["signal"]) >= threshold]
fig = px.scatter(
    filtered_df, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    trendline_color_override="black", 
    title=f"Long-only filtered the signal at {threshold}, profit, signal {signal_window_label}, profit window {profit_window_label}"
)
fig.show()


In [None]:
df_short_only = df.loc[df["side"] == "short"]

max_df_short_only = max_df.loc[max_df["side"] == "short"]

fig = px.scatter(
    df_short_only, 
    x="signal", 
    y="profit_max", 
    trendline="ols", 
    title=f"Short only max take profit, signal {signal_window_label}, profit window {profit_window_label}",
    trendline_color_override="black",
)
fig.show()

fig = px.scatter(
    df_short_only, 
    x="signal", 
    y="profit", 
    trendline="ols",     
    title=f"Short only profit last day, signal {signal_window_label}, profit window {profit_window_label}",
    trendline_color_override="black",
)
fig.show()

fig = px.scatter(
    max_df_short_only, 
    x="signal", 
    y="profit", 
    trendline="ols",     
    title=f"Best short picks,signal {signal_window_label}, profit window {profit_window_label}",
    trendline_color_override="black",
)
fig.show()

### Filtered signal

Remove low signals below a certain threshold and see if it affects the correlation between the signal and the profit.

In [None]:

threshold = 0.075

filtered_df = df_long_only.loc[abs(df_long_only["signal"]) >= threshold]

fig = px.scatter(
    filtered_df, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    trendline_color_override="black", 
    title=f"Long-only filtered the signal at {threshold}, profit, signal {signal_window_label}, profit window {profit_window_label}"
)
fig.show()

fig = px.scatter(
    df_long_only, 
    x="signal", 
    y="profit", 
    trendline="ols", 
    trendline_color_override="black", 
    title=f"No filter, profit last day, signal {signal_window_label}, profit window {profit_window_label}")
fig.show()

filtered_df = df_short_only.loc[abs(df_short_only["signal"]) >= threshold]

fig = px.scatter(
    filtered_df, 
    x="signal", 
    y="profit_max", 
    trendline="ols", 
    trendline_color_override="black", 
    title=f"Short only filtered the signal at {threshold}, max take profit, signal {signal_window_label}, profit window {profit_window_label}"
)
fig.show()
