In [1]:
from google.colab import drive

# This will prompt you to authenticate and grant access to your Drive
drive.mount('/content/drive')

import warnings
warnings.filterwarnings("ignore")

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [47]:
insample_data=pd.read_csv('/content/drive/MyDrive/My Projects/FX/insample2.csv')

In [48]:
insample_data

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,nextMidPrice
0,5,US Treasury 5Y,ASK,500000,SniperFund,98.629,98.717,7,MISSED,98.686
1,14,US Treasury 5Y,BID,2500000,SleepyManager,98.502,98.443,5,DONE,98.419
2,21,US Treasury 3Y,BID,100000,RelativeValueStrategies,102.080,102.027,6,MISSED,101.971
3,26,US Treasury 10Y,BID,5000000,HF-Fortress,97.753,97.651,3,MISSED,97.788
4,31,US Treasury 3Y,ASK,100000,TankerAssetManagement,101.279,101.398,3,MISSED,101.374
...,...,...,...,...,...,...,...,...,...,...
995,4394,US Treasury 3Y,BID,2500000,SleepyManager,101.437,101.243,5,MISSED,101.411
996,4397,US Treasury 30Y,BID,250000,CountrysideBroker,92.325,92.094,6,MISSED,91.787
997,4397,US Treasury 5Y,BID,1000000,RelativeValueStrategies,99.708,99.611,3,MISSED,99.940
998,4406,US Treasury 3Y,ASK,100000,RelativeValueStrategies,100.660,100.644,2,DONE,100.666


# Model to predict next mid price

In [49]:
# Basic feature engineering
def prepare_features(df):
    df = df.copy()

    # Calculate price distance in basis points
    df['price_distance'] = (df['QuotedPrice'] - df['MidPrice']).abs() / df['MidPrice'] * 10000

    # Calculate side-dependent price distance
    df['signed_distance'] = df.apply(
        lambda x: (x['QuotedPrice'] - x['MidPrice']) / x['MidPrice'] * 10000 if x['Side'] == 'ASK'
        else (x['MidPrice'] - x['QuotedPrice']) / x['MidPrice'] * 10000,
        axis=1
    )

    df['Side']=df['Side'].apply(lambda x: 1 if x=='ASK' else -1)
    # ASK=1 BID=-1

#     # Create target if available
#     if 'Traded' in df.columns:
#         df['target'] = (df['Traded'] == 'DONE').astype(int)

    return df

# Process insample data
df_processed = prepare_features(insample_data)

In [50]:
df_processed

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,nextMidPrice,price_distance,signed_distance
0,5,US Treasury 5Y,1,500000,SniperFund,98.629,98.717,7,MISSED,98.686,8.922325,8.922325
1,14,US Treasury 5Y,-1,2500000,SleepyManager,98.502,98.443,5,DONE,98.419,5.989726,5.989726
2,21,US Treasury 3Y,-1,100000,RelativeValueStrategies,102.080,102.027,6,MISSED,101.971,5.192006,5.192006
3,26,US Treasury 10Y,-1,5000000,HF-Fortress,97.753,97.651,3,MISSED,97.788,10.434462,10.434462
4,31,US Treasury 3Y,1,100000,TankerAssetManagement,101.279,101.398,3,MISSED,101.374,11.749721,11.749721
...,...,...,...,...,...,...,...,...,...,...,...,...
995,4394,US Treasury 3Y,-1,2500000,SleepyManager,101.437,101.243,5,MISSED,101.411,19.125171,19.125171
996,4397,US Treasury 30Y,-1,250000,CountrysideBroker,92.325,92.094,6,MISSED,91.787,25.020309,25.020309
997,4397,US Treasury 5Y,-1,1000000,RelativeValueStrategies,99.708,99.611,3,MISSED,99.940,9.728407,9.728407
998,4406,US Treasury 3Y,1,100000,RelativeValueStrategies,100.660,100.644,2,DONE,100.666,1.589509,-1.589509


In [51]:
# Preprocess the data
features = ['MidPrice', 'Notional', 'Competitors', 'price_distance', 'signed_distance', 'Side' ]
insample_data = pd.get_dummies(df_processed, columns=['Side'], drop_first=True)

X = df_processed[features]
y = df_processed['nextMidPrice']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_pred = linear_model.predict(X_test)
print("Linear Regression MAE:", mean_absolute_error(y_test, linear_pred))
print("Linear Regression MSE:", mean_squared_error(y_test, linear_pred))

# Random Forest Regression
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
print("Random Forest MAE:", mean_absolute_error(y_test, rf_pred))
print("Random Forest MSE:", mean_squared_error(y_test, rf_pred))

# Gradient Boosting Regression
gb_model = GradientBoostingRegressor(random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
print("Gradient Boosting MAE:", mean_absolute_error(y_test, gb_pred))
print("Gradient Boosting MSE:", mean_squared_error(y_test, gb_pred))


Linear Regression MAE: 0.11886152821703738
Linear Regression MSE: 0.026710619207340875
Random Forest MAE: 0.15718010000000335
Random Forest MSE: 0.043399104722001765
Gradient Boosting MAE: 0.1466256898198492
Gradient Boosting MSE: 0.040375383564086896


# Pred Competition Next Mid Price

In [52]:
comp_data=pd.read_csv('/content/drive/MyDrive/My Projects/FX/competition.csv')

In [53]:
processed_comp_data=prepare_features(comp_data)

In [54]:
processed_comp_data

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,nextMidPrice,price_distance,signed_distance
0,4881,US Treasury 30Y,-1,2500000,SniperFund,94.198,94.014,1,,,19.533323,19.533323
1,4885,US Treasury 2Y,-1,10000000,CountrysideBroker,101.921,101.745,7,,,17.268276,17.268276
2,4885,US Treasury 2Y,-1,500000,SleepyManager,99.615,99.548,7,,,6.725895,6.725895
3,4894,US Treasury 3Y,1,2500000,RelativeValueStrategies,100.768,100.869,4,,,10.023023,10.023023
4,4903,US Treasury 5Y,1,5000000,SniperFund,99.96,100.077,4,,,11.704682,11.704682
5,4911,US Treasury 5Y,1,250000,SleepyManager,97.621,97.699,5,,,7.990084,7.990084
6,4913,US Treasury 30Y,-1,250000,RelativeValueStrategies,93.568,93.407,1,,,17.206737,17.206737
7,4920,US Treasury 30Y,-1,100000,RelativeValueStrategies,96.49,96.334,4,,,16.167478,16.167478
8,4925,US Treasury 10Y,-1,100000,HF-Fortress,94.95,94.839,7,,,11.690363,11.690363
9,4934,US Treasury 5Y,1,2500000,TankerAssetManagement,97.995,98.096,2,,,10.306648,10.306648


In [55]:
linear_model_used = LinearRegression()
linear_model_used.fit(X, y)

In [56]:
X_comp=processed_comp_data[features]
next_mp_pred=linear_model.predict(X_comp)

In [57]:
comp_data['nextMidPrice']=pd.Series(next_mp_pred)

# Optimizer

In [58]:
optimizer_features=['Bond', 'Side', 'Notional', 'Counterparty', 'MidPrice','Competitors','nextMidPrice']
optimized_data=comp_data[optimizer_features]
optimized_data

Unnamed: 0,Bond,Side,Notional,Counterparty,MidPrice,Competitors,nextMidPrice
0,US Treasury 30Y,BID,2500000,SniperFund,94.198,1,94.162255
1,US Treasury 2Y,BID,10000000,CountrysideBroker,101.921,7,101.89622
2,US Treasury 2Y,BID,500000,SleepyManager,99.615,7,99.595189
3,US Treasury 3Y,ASK,2500000,RelativeValueStrategies,100.768,4,100.784031
4,US Treasury 5Y,ASK,5000000,SniperFund,99.96,4,99.973545
5,US Treasury 5Y,ASK,250000,SleepyManager,97.621,5,97.63669
6,US Treasury 30Y,BID,250000,RelativeValueStrategies,93.568,1,93.533314
7,US Treasury 30Y,BID,100000,RelativeValueStrategies,96.49,4,96.461947
8,US Treasury 10Y,BID,100000,HF-Fortress,94.95,7,94.92478
9,US Treasury 5Y,ASK,2500000,TankerAssetManagement,97.995,2,98.00593


In [59]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import pandas as pd
from sklearn.metrics import log_loss, accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt

In [60]:
training_columns = ['Notional', 'Competitors', 'AbsPriceDiff', 'Side_BID',
       'Counterparty_HF-Fortress', 'Counterparty_RelativeValueStrategies',
       'Counterparty_SleepyManager', 'Counterparty_SniperFund',
       'Counterparty_TankerAssetManagement', 'Bond_US Treasury 2Y',
       'Bond_US Treasury 30Y', 'Bond_US Treasury 3Y', 'Bond_US Treasury 5Y']

best_params_accuracy = {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 100, 'subsample': 1.0}

df = pd.read_csv('/content/drive/MyDrive/My Projects/FX/insample2.csv')

# Create new column for absolute difference
df['AbsPriceDiff'] = abs(df['QuotedPrice'] - df['MidPrice'])

# Drop QuotedPrice and MidPrice columns
df = df.drop(columns=['QuotedPrice', 'MidPrice', 'Time', 'nextMidPrice'])

# Encode categorical columns
df = pd.get_dummies(df, columns=['Side', 'Counterparty', 'Bond'], drop_first=True)

# Encode target column
df['Traded'] = df['Traded'].map({'MISSED': 0, 'DONE': 1})

# Feature matrix and target vector
X = df.drop(columns=['Traded'])
y = df['Traded']

xgb_model = XGBClassifier(**best_params_accuracy, use_label_encoder=False, eval_metric='logloss')

# # Fit the model on the entire training data
xgb_model.fit(X, y)

In [61]:
def traded_prob_model(bond, side, notional, counterparty, mid_price, competitors, quoted_price):
    global training_columns, xgb_model
    input_data = pd.DataFrame({
            'Bond': [bond],
            'Side': [side],
            'Notional': [notional],
            'Counterparty': [counterparty],
            'MidPrice': [mid_price],
            'Competitors': [competitors],
            'QuotedPrice': [quoted_price]
        })

    # Calculate the absolute price difference
    input_data['AbsPriceDiff'] = abs(input_data['QuotedPrice'] - input_data['MidPrice'])

    # Drop irrelevant columns
    input_data = input_data.drop(columns=['QuotedPrice', 'MidPrice'])

    # Encode categorical variables
    input_data = pd.get_dummies(input_data, columns=['Side', 'Counterparty', 'Bond'], drop_first=True)

    # Add missing columns with default value 0 to match training data
    for col in training_columns:
        if col not in input_data.columns:
            input_data[col] = 0

    # Ensure column order matches training data
    input_data = input_data[training_columns]

    # Predict using the trained model
    pred_prob = xgb_model.predict_proba(input_data)[:, 1]  # Probability of DONE
    pred_class = xgb_model.predict(input_data)[0]          # Predicted class

    return pred_prob

In [62]:
# Example usage
# Ensure that the global model xgb_model is trained
# Uncomment the following lines to use after training
# predicted_prob, predicted_class
pred_traded_prob = traded_prob_model(
    bond="US Treasury 30Y",
    side="BID",
    notional=1000000,
    counterparty="SleepyManager",
    mid_price=97.219,
    competitors=6,
    quoted_price=97.22
)[0]


print("Predicted Probability of DONE:", pred_traded_prob)

Predicted Probability of DONE: 0.6402246


In [22]:
# Define the traded_prob_model function, Replace with Phase I model
# def traded_prob_model(bond, side, notional, counterparty, mid_price, competitors):
#     """
#     Example rule-based model for predicting traded probability.
#     """
#     base_prob = 0.5  # Base probability
#     side_factor = 0.1 if side == 'ASK' else -0.1  # Side effect
#     competitor_factor = max(0, 1 - competitors * 0.1)  # Competitors reduce probability
#     notional_factor = min(1, notional / 1e7)  # Larger notional increases probability
#     return max(0, base_prob + side_factor + competitor_factor + notional_factor - abs(mid_price - 100) * 0.01)

# Wrapper function for the optimizer to handle row data
def wrapped_traded_prob_model(quoted_price, row):
    return traded_prob_model(
        bond=row['Bond'],
        side=row['Side'],
        notional=row['Notional'],
        counterparty=row['Counterparty'],
        mid_price=row['MidPrice'],
        competitors=row['Competitors'],
        quoted_price=quoted_price)


In [23]:
class RFQOptimizer:
    def __init__(self, next_mid_price, side, traded_prob_model, prob_thres=0.5, alpha=2.0, beta=0.5, gamma=1.5, epsilon=1e-6,bound=0.2):
        """
        初始化优化器参数
        """
        self.next_mid_price = next_mid_price
        self.side = 1 if side == 'ASK' else -1
        self.traded_prob_model = traded_prob_model
        self.prob_thres = prob_thres
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.epsilon = epsilon
        self.bound = bound  # 限定报价范围的上下界

    def weighted_traded_prob(self, traded_prob):
        """
        成交概率的非线性加权
        """
        return self.alpha * traded_prob * (1 - self.beta * traded_prob)

    def objective(self, quoted_price):
        """
        目标函数计算
        """
        quoted_price = round(float(quoted_price), 2)
        traded_prob = self.traded_prob_model(quoted_price)
        weighted_prob = self.weighted_traded_prob(traded_prob)
        spread = (quoted_price - self.next_mid_price) * self.side
        risk_penalty = self.gamma / (abs(spread)*100 + self.epsilon)

        return weighted_prob - risk_penalty

    def prob_constraint(self, quoted_price):
        """
        成交概率约束：成交概率需高于阈值
        """
        quoted_price = round(float(quoted_price), 2)
        traded_prob = self.traded_prob_model(quoted_price)
        print('traded prob:',traded_prob)
        return traded_prob - self.prob_thres

    def spread_constraint(self, quoted_price):
        """
        Spread 约束：保证报价产生非负收益
        """
        quoted_price = round(float(quoted_price), 2)
        spread = (quoted_price - self.next_mid_price) * self.side
        return spread

    def lower_bound_constraint1(self, quoted_price):
        """
        下界约束：报价不得低于指定范围
        """
        return quoted_price - (self.next_mid_price - self.bound)

    def upper_bound_constraint1(self, quoted_price):
        """
        上界约束：报价不得高于指定范围
        """
        return (self.next_mid_price + self.bound) - quoted_price

    def optimize(self):
        """
        使用网格搜索方法寻找最优报价
        """
        # 生成报价的网格
        quoted_price_grid = [
            round(self.next_mid_price + i * 0.01, 2)
            for i in range(-int(self.bound / 0.01), int(self.bound / 0.01) + 1)
        ]

        # 初始化最优解
        best_price = None
        best_score = 0

        # 遍历所有报价
        for quoted_price in quoted_price_grid:
            # 检查约束条件
            print('quoted price:',quoted_price)
            if (self.prob_constraint(quoted_price) >= 0 and
                    self.spread_constraint(quoted_price) >= 0 and
                    self.lower_bound_constraint1(quoted_price) >= 0 and
                    self.upper_bound_constraint1(quoted_price) >= 0):
                # 计算目标函数值
                score = self.objective(quoted_price)
                print('score:',score)
                if score > best_score:
                    best_score = score
                    best_price = quoted_price

        return best_price

# Process the dataset to calculate optimal prices
optimal_prices = []
sigma=mean_squared_error(y_test, linear_pred)**0.5

for i, row in comp_data.iterrows():
    next_mid_price = row['nextMidPrice']
    optimizer = RFQOptimizer(
        next_mid_price=next_mid_price,
        side=row['Side'],
        traded_prob_model=lambda qp: wrapped_traded_prob_model(qp, row),
        prob_thres=0.1,
        alpha=1.0,
        beta=0.5,
        gamma=0.5,
        epsilon=1e-6,
        bound=0.2
    )
    optimal_price = optimizer.optimize()
    if optimal_price is None:
        print(f'row{i} failed!')
        side=1 if row['Side']=='ASK' else -1
        optimal_price=row['nextMidPrice']+2*sigma*side
    else:
        print(f'row{i} succeed!')
    optimal_prices.append(round(optimal_price,2))


# Add results to the dataset
comp_data['OptimalQuotedPrice'] = optimal_prices


quoted price: 93.96
traded prob: [0.17623803]
quoted price: 93.97
traded prob: [0.17623803]
score: [0.13470098]
quoted price: 93.98
traded prob: [0.1817775]
score: [0.13782191]
quoted price: 93.99
traded prob: [0.1817775]
score: [0.13622926]
quoted price: 94.0
traded prob: [0.29976913]
score: [0.22402269]
quoted price: 94.01
traded prob: [0.32385433]
score: [0.2385739]
quoted price: 94.02
traded prob: [0.30696565]
score: [0.22470358]
quoted price: 94.03
traded prob: [0.3004764]
score: [0.21752764]
quoted price: 94.04
traded prob: [0.3930202]
score: [0.2748897]
quoted price: 94.05
traded prob: [0.41628227]
score: [0.28509542]
quoted price: 94.06
traded prob: [0.393984]
score: [0.267475]
quoted price: 94.07
traded prob: [0.47361818]
score: [0.30726358]
quoted price: 94.08
traded prob: [0.47361818]
score: [0.30067462]
quoted price: 94.09
traded prob: [0.6756602]
score: [0.37820262]
quoted price: 94.1
traded prob: [0.73703617]
score: [0.38511032]
quoted price: 94.11
traded prob: [0.8006026

In [24]:
# Process the dataset to calculate optimal prices
optimal_prices = []
sigma=mean_squared_error(y_test, linear_pred)**0.5

for i, row in comp_data.iterrows():
    next_mid_price = row['nextMidPrice']
    optimizer = RFQOptimizer(
        next_mid_price=next_mid_price,
        side=row['Side'],
        traded_prob_model=lambda qp: wrapped_traded_prob_model(qp, row),
        prob_thres=0.1,
        alpha=1.0,
        beta=0.5,
        gamma=0.5,
        epsilon=1e-6,
        bound=0.2
    )
    optimal_price = optimizer.optimize()
    if optimal_price is None:
        print(f'row{i} failed!')
        side=1 if row['Side']=='ASK' else -1
        optimal_price=row['nextMidPrice']+2*sigma*side
    else:
        print(f'row{i} succeed!')
    optimal_prices.append(round(optimal_price,2))


# Add results to the dataset
comp_data['OptimalQuotedPrice'] = optimal_prices


quoted price: 93.96
traded prob: [0.17623803]
quoted price: 93.97
traded prob: [0.17623803]
score: [0.13470098]
quoted price: 93.98
traded prob: [0.1817775]
score: [0.13782191]
quoted price: 93.99
traded prob: [0.1817775]
score: [0.13622926]
quoted price: 94.0
traded prob: [0.29976913]
score: [0.22402269]
quoted price: 94.01
traded prob: [0.32385433]
score: [0.2385739]
quoted price: 94.02
traded prob: [0.30696565]
score: [0.22470358]
quoted price: 94.03
traded prob: [0.3004764]
score: [0.21752764]
quoted price: 94.04
traded prob: [0.3930202]
score: [0.2748897]
quoted price: 94.05
traded prob: [0.41628227]
score: [0.28509542]
quoted price: 94.06
traded prob: [0.393984]
score: [0.267475]
quoted price: 94.07
traded prob: [0.47361818]
score: [0.30726358]
quoted price: 94.08
traded prob: [0.47361818]
score: [0.30067462]
quoted price: 94.09
traded prob: [0.6756602]
score: [0.37820262]
quoted price: 94.1
traded prob: [0.73703617]
score: [0.38511032]
quoted price: 94.11
traded prob: [0.8006026

In [25]:
comp_data

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,nextMidPrice,OptimalQuotedPrice
0,4881,US Treasury 30Y,BID,2500000,SniperFund,94.198,94.014,1,,94.162255,94.1
1,4885,US Treasury 2Y,BID,10000000,CountrysideBroker,101.921,101.745,7,,101.89622,101.86
2,4885,US Treasury 2Y,BID,500000,SleepyManager,99.615,99.548,7,,99.595189,99.55
3,4894,US Treasury 3Y,ASK,2500000,RelativeValueStrategies,100.768,100.869,4,,100.784031,100.83
4,4903,US Treasury 5Y,ASK,5000000,SniperFund,99.96,100.077,4,,99.973545,100.03
5,4911,US Treasury 5Y,ASK,250000,SleepyManager,97.621,97.699,5,,97.63669,97.69
6,4913,US Treasury 30Y,BID,250000,RelativeValueStrategies,93.568,93.407,1,,93.533314,93.48
7,4920,US Treasury 30Y,BID,100000,RelativeValueStrategies,96.49,96.334,4,,96.461947,96.42
8,4925,US Treasury 10Y,BID,100000,HF-Fortress,94.95,94.839,7,,94.92478,94.88
9,4934,US Treasury 5Y,ASK,2500000,TankerAssetManagement,97.995,98.096,2,,98.00593,98.08


# Insample Test

In [26]:
is_data=pd.read_csv('/content/drive/MyDrive/My Projects/FX/insample.csv')

In [27]:
is_data_test=is_data.head(100)
is_data_test

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,nextMidPrice
0,5,US Treasury 5Y,ASK,500000,SniperFund,98.629,98.717,7,MISSED,98.686
1,14,US Treasury 5Y,BID,2500000,SleepyManager,98.502,98.443,5,DONE,98.419
2,21,US Treasury 3Y,BID,100000,RelativeValueStrategies,102.080,102.027,6,MISSED,101.971
3,26,US Treasury 10Y,BID,5000000,HF-Fortress,97.753,97.651,3,MISSED,97.788
4,31,US Treasury 3Y,ASK,100000,TankerAssetManagement,101.279,101.398,3,MISSED,101.374
...,...,...,...,...,...,...,...,...,...,...
95,418,US Treasury 2Y,ASK,500000,TankerAssetManagement,100.953,101.053,5,MISSED,100.931
96,426,US Treasury 2Y,BID,100000,HF-Fortress,101.810,101.753,7,MISSED,101.856
97,433,US Treasury 2Y,ASK,500000,TankerAssetManagement,100.779,100.829,4,DONE,100.727
98,436,US Treasury 3Y,ASK,500000,CountrysideBroker,100.643,100.731,5,MISSED,100.895


In [28]:
# Process the dataset to calculate optimal prices
def findOptimalPrices(data):
    optimal_prices = []
    sigma=mean_squared_error(y_test, linear_pred)**0.5

    for i, row in data.iterrows():
        next_mid_price = row['nextMidPrice']
        optimizer = RFQOptimizer(
            next_mid_price=next_mid_price,
            side=row['Side'],
            traded_prob_model=lambda qp: wrapped_traded_prob_model(qp, row),
            prob_thres=0.1,
            alpha=1.0,
            beta=0.5,
            gamma=0.5,
            epsilon=1e-6,
            bound=0.2
        )
        optimal_price = optimizer.optimize()
        if optimal_price is None:
            print(f'row{i} failed!')
            side=1 if row['Side']=='ASK' else -1
            optimal_price=row['nextMidPrice']+2*sigma*side
        else:
            print(f'row{i} succeed!')
        optimal_prices.append(round(optimal_price,2))


    # Add results to the dataset
    data['OptimalQuotedPrice'] = optimal_prices
    return data



In [29]:
def pred_next_mid(df):
    df_processed = prepare_features(df)
    X=df_processed[features]
    next_mid_pred=linear_model.predict(X)
    df['predNextMidPrice']=pd.Series(next_mid_pred)
    return df

In [30]:
is_data_test=pred_next_mid(is_data_test)
is_data_test

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,nextMidPrice,predNextMidPrice
0,5,US Treasury 5Y,ASK,500000,SniperFund,98.629,98.717,7,MISSED,98.686,98.647663
1,14,US Treasury 5Y,BID,2500000,SleepyManager,98.502,98.443,5,DONE,98.419,98.477862
2,21,US Treasury 3Y,BID,100000,RelativeValueStrategies,102.080,102.027,6,MISSED,101.971,102.062006
3,26,US Treasury 10Y,BID,5000000,HF-Fortress,97.753,97.651,3,MISSED,97.788,97.723574
4,31,US Treasury 3Y,ASK,100000,TankerAssetManagement,101.279,101.398,3,MISSED,101.374,101.295329
...,...,...,...,...,...,...,...,...,...,...,...
95,418,US Treasury 2Y,ASK,500000,TankerAssetManagement,100.953,101.053,5,MISSED,100.931,100.971464
96,426,US Treasury 2Y,BID,100000,HF-Fortress,101.810,101.753,7,MISSED,101.856,101.792806
97,433,US Treasury 2Y,ASK,500000,TankerAssetManagement,100.779,100.829,4,DONE,100.727,100.797146
98,436,US Treasury 3Y,ASK,500000,CountrysideBroker,100.643,100.731,5,MISSED,100.895,100.661393


In [31]:
def insample_findOptimalPrices(data):
    optimal_prices = []
    sigma=mean_squared_error(y_test, linear_pred)**0.5

    for i, row in data.iterrows():
        next_mid_price = row['predNextMidPrice']
        optimizer = RFQOptimizer(
            next_mid_price=next_mid_price,
            side=row['Side'],
            traded_prob_model=lambda qp: wrapped_traded_prob_model(qp, row),
            prob_thres=0.1,
            alpha=1.0,
            beta=0.5,
            gamma=0.5,
            epsilon=1e-6,
            bound=0.2
        )
        optimal_price = optimizer.optimize()
        if optimal_price is None:
            print(f'row{i} failed!')
            side=1 if row['Side']=='ASK' else -1
            optimal_price=row['predNextMidPrice']+2*sigma*side
        else:
            print(f'row{i} succeed!')
        optimal_prices.append(round(optimal_price,2))


    # Add results to the dataset
    data['OptimalQuotedPrice'] = optimal_prices
    return data


In [32]:
df=insample_findOptimalPrices(is_data_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
quoted price: 98.19
traded prob: [0.02408075]
quoted price: 98.2
traded prob: [0.02408075]
quoted price: 98.21
traded prob: [0.02408075]
quoted price: 98.22
traded prob: [0.04194593]
quoted price: 98.23
traded prob: [0.054688]
quoted price: 98.24
traded prob: [0.06139598]
quoted price: 98.25
traded prob: [0.06520059]
quoted price: 98.26
traded prob: [0.10107385]
score: [0.04602162]
quoted price: 98.27
traded prob: [0.10669578]
score: [0.04551702]
quoted price: 98.28
traded prob: [0.24120654]
score: [0.1497033]
quoted price: 98.29
traded prob: [0.24120654]
score: [0.14080136]
quoted price: 98.3
traded prob: [0.34253868]
score: [0.20069367]
quoted price: 98.31
traded prob: [0.29994628]
score: [0.15518507]
quoted price: 98.32
traded prob: [0.53179747]
score: [0.26574093]
quoted price: 98.33
traded prob: [0.5190366]
score: [0.21828811]
quoted price: 98.34
traded prob: [0.6856455]
score: [0.20197774]
quoted price: 98.35
traded

In [33]:
df

Unnamed: 0,Time,Bond,Side,Notional,Counterparty,MidPrice,QuotedPrice,Competitors,Traded,nextMidPrice,predNextMidPrice,OptimalQuotedPrice
0,5,US Treasury 5Y,ASK,500000,SniperFund,98.629,98.717,7,MISSED,98.686,98.647663,98.69
1,14,US Treasury 5Y,BID,2500000,SleepyManager,98.502,98.443,5,DONE,98.419,98.477862,98.44
2,21,US Treasury 3Y,BID,100000,RelativeValueStrategies,102.080,102.027,6,MISSED,101.971,102.062006,102.01
3,26,US Treasury 10Y,BID,5000000,HF-Fortress,97.753,97.651,3,MISSED,97.788,97.723574,97.67
4,31,US Treasury 3Y,ASK,100000,TankerAssetManagement,101.279,101.398,3,MISSED,101.374,101.295329,101.34
...,...,...,...,...,...,...,...,...,...,...,...,...
95,418,US Treasury 2Y,ASK,500000,TankerAssetManagement,100.953,101.053,5,MISSED,100.931,100.971464,101.02
96,426,US Treasury 2Y,BID,100000,HF-Fortress,101.810,101.753,7,MISSED,101.856,101.792806,101.74
97,433,US Treasury 2Y,ASK,500000,TankerAssetManagement,100.779,100.829,4,DONE,100.727,100.797146,100.86
98,436,US Treasury 3Y,ASK,500000,CountrysideBroker,100.643,100.731,5,MISSED,100.895,100.661393,100.71


In [34]:
df.loc[:, "OptimalPriceBeat"] = np.where(
    ((df["OptimalQuotedPrice"] > df["QuotedPrice"]) & (df["Side"] == "BID")) |
    ((df["OptimalQuotedPrice"] < df["QuotedPrice"]) & (df["Side"] == "ASK")),
    1,
    0
)
df.loc[:, "OptimalPriceTakeProfit"] = np.where(
    ((df["OptimalQuotedPrice"] < df["nextMidPrice"]) & (df["Side"] == "BID")) |
    ((df["OptimalQuotedPrice"] > df["nextMidPrice"]) & (df["Side"] == "ASK")),
    1,
    0
)
df.loc[:, "OptimalPriceWin"] = df["OptimalPriceBeat"] & df["OptimalPriceTakeProfit"]

In [37]:
sum(df["OptimalPriceWin"]) / 100

0.39

In [38]:
sum(df["OptimalPriceTakeProfit"]) / 100

0.65

In [39]:
sum(df["OptimalPriceBeat"]) / 100

0.67

In [40]:
df.loc[:, "UBSBeat"] = np.where(
    ((df["OptimalQuotedPrice"] < df["QuotedPrice"]) & (df["Side"] == "BID")) |
    ((df["OptimalQuotedPrice"] > df["QuotedPrice"]) & (df["Side"] == "ASK")),
    1,
    0
)
df.loc[:, "UBSTakeProfit"] = np.where(
    ((df["QuotedPrice"] < df["nextMidPrice"]) & (df["Side"] == "BID")) |
    ((df["QuotedPrice"] > df["nextMidPrice"]) & (df["Side"] == "ASK")),
    1,
    0
)
df.loc[:, "UBSWin"] = df["UBSBeat"] & df["UBSTakeProfit"]

In [41]:
sum(df["UBSWin"]) / 100

0.2

In [43]:
(0.39-0.2)/0.22

0.8636363636363636

## performance improved 73% than UBS!