# Feature extraction

In [None]:
# Feature extraction

# Needed features - (Bid-Ask Spread + , Bid-Ask Volume Imbalance + , Signed Transaction Volume + )

def get_order_book(info, exchange_id):
    return info.exchanges[exchange_id].order_book

def get_bids(info, exchange_id):
    return get_order_book(info, exchange_id)['bid'].to_list()

def get_asks(info, exchange_id):
    return get_order_book(info, exchange_id)['ask'].to_list()

def get_spread(info, exchange_id):
    
    """
    getting bid-ask spread as a difference between best ask and bid prices 
    for every time tick in a simulation
    """
    
    bid_ask_spread = info.spreads[exchange_id]
    spreads = pd.DataFrame(bid_ask_spread, columns=['bid', 'ask'])
    return list(map(lambda x: round(x, 4), (spreads['ask'] - spreads['bid'])))

def get_bid_ask_volume_imbalance(info, exchange_id, depth=1000, class_division=False):
    
    """
    Bid-Ask Volume Imbalance: A signed quantity indicating the number of shares at the bid
    minus the number of shares at the ask in the current order books.
    """
    
    bids = get_bids(info, exchange_id)
    asks = get_asks(info, exchange_id)
    
    bids_qty = pd.DataFrame(bids)
    asks_qty = pd.DataFrame(asks)
    
    bids_qty['trader_link'] = bids_qty['trader_link'].apply(lambda x: x.type if x else None)
    asks_qty['trader_link'] = asks_qty['trader_link'].apply(lambda x: x.type if x else None)

    bids_level = bids_qty['price'].unique()
    bids_level = bids_level[min(depth, len(bids_level) - 1)]
    
    asks_level = asks_qty['price'].unique()
    asks_level = asks_level[min(depth, len(asks_level) - 1)]
    
    bids_qty = bids_qty[bids_qty.price >= bids_level]
    asks_qty = asks_qty[asks_qty.price <= asks_level]
    
    if class_division:
        
        bid_ask_volume_imbalance_list = {}
        
        all_bids_qty = bids_qty['qty'].sum()
        all_asks_qty = asks_qty['qty'].sum()
        
        for trader_class in bids_qty['trader_link'].unique():
            if trader_class:
                cur_bids = bids_qty[bids_qty['trader_link'] == trader_class]['qty'].sum()
                cur_asks = asks_qty[asks_qty['trader_link'] == trader_class]['qty'].sum()

                bid_ask_volume_imbalance_list[trader_class] = (cur_bids - cur_asks)/(all_bids_qty + all_asks_qty)
                
            else:
                cur_bids = bids_qty[bids_qty['trader_link'].isnull()]['qty'].sum()
                cur_asks = asks_qty[asks_qty['trader_link'].isnull()]['qty'].sum()

                bid_ask_volume_imbalance_list[trader_class] = (cur_bids - cur_asks)/(all_bids_qty + all_asks_qty)
            
        return bid_ask_volume_imbalance_list
    
    else:
        bids_qty = bids_qty['qty'].sum()
        asks_qty = asks_qty['qty'].sum()
    
        return (bids_qty - asks_qty)/(bids_qty + asks_qty)

def get_transaction_volume(info, exchange_id, limit=15):
    
    """
    Signed Transaction Volume: A signed quantity indicating the number of shares bought in the
    last 15 seconds minus the number of shares sold in the last 15 seconds.
    """
    
    transactions = pd.DataFrame(info.exchanges[exchange_id].transactions.transactions, 
                                columns=['time', 'price', 'quantity', 'side', 'trader_link'])
    
    time_values = sorted(transactions['time'].unique())
    
    limit = min(limit, len(time_values))
    
    if limit == 0:
        return 0
    
    transactions = transactions[transactions['time'] >= time_values[-limit]]

    total_buy = transactions[transactions['side'] == 'bid']['quantity'].sum()      # fulfilled bid orders - buy
    total_sell = transactions[transactions['side'] == 'ask']['quantity'].sum()     # fulfilled ask orders - sell
    
    return (total_buy - total_sell) / (total_buy + total_sell)

def get_all_transaction_volume(info, exchange_id, limit=15):
    
    """
    Signed Transaction Volume: A signed quantity indicating the number of shares bought in the
    last 15 seconds minus the number of shares sold in the last 15 seconds.
    """
    
    transactions = pd.DataFrame(info.exchanges[exchange_id].transactions.transactions, 
                                columns=['time', 'price', 'quantity', 'side', 'trader_link'])
    
    time_values = sorted(transactions['time'].unique())
    
    limit = min(limit, len(time_values))
    
    if limit == 0:
        return 0
    
    transactions = transactions[transactions['time'] >= time_values[-limit]]

    total_buy = transactions[transactions['side'] == 'bid']['quantity'].sum()      # fulfilled bid orders - buy
    total_sell = transactions[transactions['side'] == 'ask']['quantity'].sum()     # fulfilled ask orders - sell
    
    return (total_buy + total_sell)

"""
0 - feature, посмотреть аномалии по группам
1 - сгенерить данные (1 конфиг) соотнести качество

"""

def simulation_with_data(simulator, exchange_id, n, limit=15, random_state=None, 
                         bid_ask_volume_imbalance_window = [1, 3, 5, 10], sign_transaction_volume_window = [1, 5, 10, 20],
                         class_division=False):
    
    feature_list = []
        
    if random_state:
        random.seed(random_state)
    
    for i in tqdm(range(n)):
        
        feature_list.append([])
        
        info = simulator.info
        
#         random.seed(random_state)
#         print(random.random())
        
        simulator.simulate(1, silent=True)
        
        col_names = []
        
        
        for w in bid_ask_volume_imbalance_window:
            
            bid_ask_volume_imbalance_w = get_bid_ask_volume_imbalance(info, exchange_id, depth=w, class_division=class_division)
            
            if class_division:
                
                present_classes = set(list(map(lambda x: x.type, simulator.traders)))
                current_keys = bid_ask_volume_imbalance_w.keys()
                
                for cls in present_classes:
                    
                    if cls in current_keys:
                        feature_list[i].append(bid_ask_volume_imbalance_w[cls])
                        col_names.append(f"bid_ask_volume_imbalance_{cls.replace(' ', '_')}_{w}")
                    else:
                        feature_list[i].append(0)
                        col_names.append(f"bid_ask_volume_imbalance_{cls.replace(' ', '_')}_{w}")                        
            
            else:
                feature_list[i].append(bid_ask_volume_imbalance_w)
                col_names.append(f"bid_ask_volume_imbalance_{w}")
        
        for w in sign_transaction_volume_window:
            
            sign_transaction_volume_w = get_transaction_volume(info, exchange_id, limit=w)
            
            feature_list[i].append(sign_transaction_volume_w)
            col_names.append(f"sign_transaction_volume_{w}")
        
        feature_list[i].append(get_all_transaction_volume(info, exchange_id, limit=1))
        col_names.append("all_transaction_volume_1")
    
#     print(col_names)
    result_df = pd.DataFrame(feature_list, columns=col_names)

        
    bid_ask_spread = get_spread(info, exchange_id)
    
    last_vals = len(bid_ask_spread)
    
    bid_ask_spread = bid_ask_spread[last_vals - n:]
    
    result_df['bid_ask_spread'] = bid_ask_spread
    result_df['dividends'] = info.dividends[exchange_id][last_vals - n:] 
    result_df['price'] = info.prices[exchange_id][last_vals - n:]
    result_df['dividends_previous_divided'] = (result_df['dividends'] / (result_df['price'] * simulator.exchanges[0].risk_free_rate).shift(1))
    
    fundamental_value_data = info.fundamental_value(exchange_id)
    n_past_iterations = len(fundamental_value_data)
    
    result_df['fundamental_v'] = fundamental_value_data[n_past_iterations - n:]
    
    result_df['return_1'] = ((result_df['price'] - result_df['price'].shift(1))/result_df['price']) #*100
    result_df['return_5'] = ((result_df['price'] - result_df['price'].shift(5))/result_df['price']) #*100
    result_df['return_10'] = ((result_df['price'] - result_df['price'].shift(10))/result_df['price']) #*100
    result_df['return_20'] = ((result_df['price'] - result_df['price'].shift(20))/result_df['price']) #*100
    
    return result_df

def make_graphs(data, columns):
    
    colors = ['blue', 'orange', 'green', 'crimson']
    
    n = len(columns)
    
    if len(columns) % 2 == 0:
        nrows = len(columns) // 2
    else:
        nrows = (len(columns) // 2) + 1
    
    if n == 1:
        fig1, axs1 = plt.subplots()
        
    else:
        fig1, axs1 = plt.subplots(nrows=nrows, ncols=2)
        
    fig1.tight_layout()
    
    for i in range(n):
        axs1[i//2, i%2].plot(data[columns[i]], label=columns[i], color=plt.cm.tab10(i)) #color=colors[i%4])
        axs1[i//2, i%2].legend()
    
    plt.show()

def make_many_simulations(n_sim=5, n_iter=100, risk_free_rate=0.05, price=100, 
                          dividend=None, random_state=None, silent=False, market_agents=[6, 6, 6, 1],
                          class_division=False, sim_return=False, mm_softlimit=100):
    
    if not dividend:
        dividend = price * risk_free_rate
    
    risk_free_rate = risk_free_rate
    price = price
    dividend = dividend
    
    resulting_df = []
    
    simulators=[]
    
    for i in range(n_sim):
        
        random.seed(random_state)
        
        # print(random_state)
    
        assets = [Stock(dividend)]

        simple_exchange = ExchangeAgent(assets[0], risk_free_rate, mean=price)
        
        simple_traders = [
            *[Random(simple_exchange) for _ in range(market_agents[0])],
            *[Chartist1D(simple_exchange) for _ in range(market_agents[1])],
            *[Fundamentalist(simple_exchange) for _ in range(market_agents[2])],
            *[MarketMaker1D(simple_exchange, softlimit = mm_softlimit) for _ in range(market_agents[3])]
        ]
        
        # *[Universalist(market=simple_exchange) for _ in range(100)],

        simple_sim = Simulator(**{
            'assets': [assets[0]],
            'exchanges': [simple_exchange],
            'traders': simple_traders
        })
        
        exchange_id = simple_exchange.id
        
        simulators.append(simple_sim)
        
        # bug report
#         if i == 0:
#             features_simulation_data = simulation_with_data(simple_sim, exchange_id, n_iter, random_state=None)
#             make_graphs(features_simulation_data, features_simulation_data.columns)
#             continue
        
#         try:

        features_simulation_data = simulation_with_data(simple_sim, exchange_id, n_iter, random_state=None, class_division=class_division)
        print(f"--step {i + 1} of simulation results with random state {random_state}--")

        if not silent:
            make_graphs(features_simulation_data, features_simulation_data.columns)

        resulting_df.append(features_simulation_data)
        
        if random_state:
            random_state = (random_state + (i + 1 + (i + 2)**2) + 100 * (i + 3)) % 11221
        
#         except:
#             print('no orders bla bla')
#             print(random_state)
#             return
#     plot_book(simple_sim.info, exchange_id)
    
    if sim_return:
        return resulting_df, simulators
    else:
        return resulting_df

def make_graphs_column(data, columns, figsize=[8, 10]):
    
    colors = ['blue', 'orange', 'green', 'crimson']
    
    n = len(columns)
    
    fig1, axs1 = plt.subplots(nrows=n, ncols=1, figsize=figsize)
        
    fig1.tight_layout()
    
    for i in range(n):
        axs1[i].plot(data[columns[i]], label=columns[i], color=plt.cm.tab10(i)) #color=colors[i%4])
        axs1[i].legend()
    
    plt.show()

# Mean-variance analysis of trajectories

In [None]:
### Mean-variance analysis of trajectories

def find_mean_variance(trajectories, variable):
    n_traj = len(trajectories)
    n_time = len(trajectories[0])
    
    all_trajectories = pd.DataFrame()
    
    rename={}
    
    for i in range(n_traj):
        all_trajectories = pd.concat([all_trajectories, trajectories[i][variable]], axis=1)
#         all_trajectories[f"{variable}_trajectory_{i+1}"] = trajectories[i][variable]
        
        rename[variable] = f"{variable}_trajectory_{i+1}"
        
        all_trajectories = all_trajectories.rename(mapper=rename, axis=1)
    
    all_trajectories = all_trajectories.T
    
    mean_vector = []
    variance_vector = []
    
    for i in range(all_trajectories.shape[1]):
        mean_vector.append((all_trajectories[i]).mean())
        variance_vector.append((all_trajectories[i]).var())
    
    return mean_vector, variance_vector

def draw_mean_variance(data, variable, x=250):
    
    mean_variance = find_mean_variance(data, variable)

    fig, ax = plt.subplots()

    x = range(x)
    y = mean_variance[0]


    lower_part = y - (1.96 * np.array(mean_variance[1]) ** 0.5)
    upper_part = y + (1.96 * np.array(mean_variance[1]) ** 0.5)

    ax.plot(x,y)
    ax.fill_between(x, lower_part, upper_part, color='b', alpha=.1)
    
    plt.show()
    
    return mean_variance

def find_mean_quantile(trajectories, variable, quantile):
    n_traj = len(trajectories)
    n_time = len(trajectories[0])
    
    all_trajectories = pd.DataFrame()
    
    rename={}
    
    for i in range(n_traj):
        all_trajectories = pd.concat([all_trajectories, trajectories[i][variable]], axis=1)
#         all_trajectories[f"{variable}_trajectory_{i+1}"] = trajectories[i][variable]
        
        rename[variable] = f"{variable}_trajectory_{i+1}"
        
        all_trajectories = all_trajectories.rename(mapper=rename, axis=1)
    
    all_trajectories = all_trajectories.T
    
    mean_vector = []
    lower_quantile = []
    higher_quantile = []
    
    for i in range(all_trajectories.shape[1]):
        mean_vector.append((all_trajectories[i]).mean())
        lower_quantile.append(all_trajectories[i].quantile(q=(1-quantile), interpolation='lower'))
        higher_quantile.append(all_trajectories[i].quantile(q=quantile, interpolation='higher'))
    
    return mean_vector, lower_quantile, higher_quantile

def draw_mean_quantile(data, variable, quantile, x=250, title=None, y_lim=None):
    
    mean, lower_quantiles, higher_quantiles = find_mean_quantile(data, variable, quantile)

    fig, ax = plt.subplots()

    x = range(x)
    y = mean
    
#     if title:
#         ax.title=title
    
    ax.plot(x,y, label=title)
    
    if y_lim:
        ax.set_ylim([y_lim[0], y_lim[1]])
    
    ax.fill_between(x, lower_quantiles, higher_quantiles, color='b', alpha=.1)
    
    plt.legend()
    plt.show()
    
    return mean, lower_quantiles, higher_quantiles

def draw_many_mean_quantile(dataset, variable, quantile, x=250, title=None, y_lim=None, figsize=[12, 10]):
    
    fig, ax = plt.subplots(figsize=figsize)
    
    colors = ['b', 'g', 'r', 'y', 'm']
    color_ind = 0
    
    x = range(x)
    
    for data in dataset:
        mean, lower_quantiles, higher_quantiles = find_mean_quantile(data, variable, quantile)        
        y = mean
        
        print(mean[-1], lower_quantiles[-1], higher_quantiles[-1])

    #     if title:
    #         ax.title=title

        ax.plot(x,y, label=title[color_ind])

        if y_lim:
            ax.set_ylim([y_lim[0], y_lim[1]])

        ax.fill_between(x, lower_quantiles, higher_quantiles, color=colors[color_ind], alpha=.05)
        
        color_ind += 1

    plt.legend()
    plt.show()

def draw_many_mean_variance(data, variable, labels):
    print(f"GRAPHS OF {variable}")
    fig, ax = plt.subplots(2, 2, figsize=(10,5))
    
    i = 0
    
    for d in data:
        mean_variance = find_mean_variance(d, variable)
        
        x = range(250)
        y = mean_variance[0]
        
        lower_part = y - (1.96 * np.array(mean_variance[1]) ** 0.5)
        upper_part = y + (1.96 * np.array(mean_variance[1]) ** 0.5)
        
        ax[int(i>1), i%2].plot(x,y, label=labels[i])
        ax[int(i>1), i%2].fill_between(x, lower_part, upper_part, color='b', alpha=.1)
        
        ax[int(i>1), i%2].legend()
        
        print(x, y, lower_part, upper_part)
        
        i+=1
    
    
    plt.show()

def draw_all_features_mean_var(data, labels):
    variables = data[0][0].columns
    
    for var in variables:
        draw_many_mean_variance(data, var, labels=labels)

# Predictions

In [None]:
# Predictions

def difference_split(data, price, percentage):
    if data > price * percentage:
        return 1
    elif data < -price * percentage:
        return -1
    else:
        return 0

def prepare_data(data, shift=5, percentage_down=0.0015, percentage_up=None, two_classes_only=False, rf_rate=0.05):
    
    if not percentage_up:
        percentage_up = percentage_down
    
    n = data.shape[0]
    
    data = data.copy()
    data['future_price'] = data['price'].shift(-shift)
    data = data[data.index < data.shape[0] - shift]
    
    difference = (data['future_price'] - data['price']) / data['price']
    
    price_movement=[]
    
    for i in range(len(difference)):
        if difference[i] > percentage_up:
            price_movement.append(1)
        elif difference[i] < -1 * percentage_down:
            price_movement.append(-1)
        else:
            price_movement.append(0)
        
    data['difference'] = price_movement
    data = data.dropna()
    
    # additional normalization
    data['bid_ask_spread_percentage'] = data['bid_ask_spread'] / data['price']
    
    data['fundamental_v_norm'] = data['fundamental_v'] / data['price'] - 1
#     data['dividends_previous'] = data['dividends_previous'] 
    
    data = data.drop(['bid_ask_spread'], axis=1)
    
    return data.reset_index()

def prepare_huge_data(data: list(), shift=5, percentage_down=0.0015, percentage_up=None, two_classes_only=False):
    result = pd.DataFrame()
    
    iteration = 1
    
    for d in data:
        d = prepare_data(d, shift, percentage_down, percentage_up, two_classes_only)
        d["trajectory_number"] = iteration
        result = pd.concat([result, d])
        
        iteration+=1
    
    return result

def classifier_test(test_classifier, passed_data, shift=10, percentage=0.0010, xgb=True, silent=False, x_columns=None, trajectory_test_split=0):
    
    # data preparation
    
    data_xy = prepare_huge_data(passed_data, shift=shift, percentage=percentage)
    
    if trajectory_test_split == 0:
        
        data_xy = data_xy.drop(['trajectory_number'], axis=1)
    
        n_iterations = max(data_xy['index'].unique())
        train_border = n_iterations * 0.8

        data_xy = data_xy[data_xy.index % shift == 0]

        if not silent:
            print('data shape -', data_xy.shape)

        data_xy_train = data_xy[data_xy['index'] < train_border]
        data_xy_test = data_xy[data_xy['index'] >= train_border]
        
    else:
    
        n_iterations = max(data_xy['trajectory_number'].unique())
        train_border = n_iterations * 0.8

        data_xy = data_xy[data_xy.index % shift == 0]

        if not silent:
            print('data shape -', data_xy.shape)

        data_xy_train = data_xy[data_xy['trajectory_number'] < train_border]
        data_xy_test = data_xy[data_xy['trajectory_number'] >= train_border]
        
        data_xy_train = data_xy_train.drop(['trajectory_number'], axis=1)
        data_xy_test = data_xy_test.drop(['trajectory_number'], axis=1)

#     data_xy_train = data_xy_train.sample(frac = 1)

    data_y_train = data_xy_train['difference']
    data_x_train = data_xy_train.drop(['future_price', 'index', 'difference', 'dividends', 'price', 'fundamental_v'], axis=1) # price fundamental_v
    
#     data_x_train['price_m_fund'] = data_x_train['price'] - data_x_train['fundamental_v']

    data_y_test = data_xy_test['difference']
    data_x_test = data_xy_test.drop(['future_price', 'index', 'difference', 'dividends', 'price', 'fundamental_v'], axis=1) # price fundamental_v
    
#     data_x_test['price_m_fund'] = data_x_test['price'] - data_x_test['fundamental_v']
    
    if x_columns:
        data_x_train = data_x_train[x_columns]
        data_x_test = data_x_test[x_columns]
    
    if not silent:
#         plt.title('classes distribution')
        data_xy_train['difference'].hist()
        data_xy_test['difference'].hist()
        plt.show()
    
#     else:
#         distribution = data_xy['difference'].value_counts()
#         total = data_xy['difference'].count()
        
#         print(f"classes distribution: \n-1\t{round(distribution[-1]/total, 2) * 100}%\n0\t{round(distribution[0]/total, 2) * 100}%\n1\t{round(distribution[1]*100/total, 2)}%")
    
    # predictions
    
    X_train, X_test, y_train, y_test = data_x_train, data_x_test, data_y_train, data_y_test
    
    if xgb and min(y_train)  == -1:
        y_train += 1
        y_test += 1

    tested_model = test_classifier
    tested_model.fit(X_train, y_train)

    # training accuracy
    predictions = tested_model.predict(X_train)
    train_accuracy = accuracy_score(y_train, predictions)
    
    if not silent:
        print("TRAIN accuracy score - ", round(train_accuracy, 2)) #, recall_score(y_test, predictions))

    # test accuracy
    predictions = tested_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, predictions)
    
    if not silent:
        print("TEST  accuracy score - ", round(test_accuracy, 2)) #, recall_score(y_test, predictions))
    
    if not silent:
        disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(y_test, predictions), #/confusion_matrix(y_test, predictions).sum(),
                                    display_labels=['down', 'nothing', 'up'])
        disp.plot(cmap=plt.cm.Blues)
        plt.show()
    
    return tested_model, train_accuracy, test_accuracy

### Functions classifications

def replace_name(x):
    return x

def draw_importance_xgb(models):
    
    xgb_model_1, xgb_model_2, xgb_model_3, xgb_model_4 = models[0], models[1], models[2], models[3]
    
    xxx = pd.DataFrame(columns = replace_name(xgb_model_1.feature_names_in_))
    xxx.loc[len(xxx.index)] = (xgb_model_1.feature_importances_)

    xxy = pd.DataFrame(columns = replace_name(xgb_model_2.feature_names_in_))
    xxy.loc[1] = (xgb_model_2.feature_importances_)

    xyy = pd.DataFrame(columns = replace_name(xgb_model_3.feature_names_in_))
    xyy.loc[2] = (xgb_model_3.feature_importances_)

    yyy = pd.DataFrame(columns = replace_name(xgb_model_4.feature_names_in_))
    yyy.loc[3] = (xgb_model_4.feature_importances_)

    zzz = pd.concat([xxx, xxy, xyy, yyy])

    fig, ax = plt.subplots(figsize=(16, 8))

    zzz.T.rename({0:"default simulation", 1:"simulation with no MM", 2:"simulation with undervalued stock", 3:"simulation with undervalued stock, no MM"}, axis=1).sort_values("default simulation").plot.barh(ax=ax, title="Importance of microstructural features for different configurations of synthetic market") #, color=(plt.cm.Blues(0.99), plt.cm.Blues(0.8), plt.cm.Blues(0.61), plt.cm.Blues(0.42)))
    
    plt.plot()

def new_model(name):
    models={
    'xgb': XGBClassifier(random_state=2114, importance_type='weight'),
    'xgb_depth_3': XGBClassifier(random_state=2114, importance_type='weight', max_depth=3),
    'xgb_iteration_restrict': XGBClassifier(random_state=2114, importance_type='weight', n_estimators=5),
    'xgb_lower_lr': XGBClassifier(random_state=2114, importance_type='weight', max_depth=6, learning_rate = 0.01),
    'xgb_l2_reg': XGBClassifier(random_state=2114, importance_type='weight', max_depth=6, reg_lambda = 100),
    'logreg': LogisticRegression(random_state=99)
    }
    
    return models[name]

# Testing classificator in simulation 

In [None]:
# Testing classificator in simulation 

class FeatureCollector():
    def __init__(self, sim_info, exchange):
        self.info = sim_info
        self.exchange = exchange
        self.exchange_id = exchange.id
    
    def exchange_get_order_book(self):
        return self.exchange.order_book

    def exchange_get_bids(self):
        return self.exchange_get_order_book()['bid'].to_list()

    def exchange_get_asks(self):
        return self.exchange_get_order_book()['ask'].to_list()

    def exchange_get_spread(self):

        """
        getting bid-ask spread as a difference between best ask and bid prices 
        for last tick in a simulation
        """

        bid_ask_spread = self.exchange.spread()
        return round(bid_ask_spread['ask'] - bid_ask_spread['bid'], 4)

    def exchange_get_bid_ask_volume_imbalance(self, depth=1000):

        """
        Bid-Ask Volume Imbalance: A signed quantity indicating the number of shares at the bid
        minus the number of shares at the ask in the current order books.
        """

        bids = self.exchange_get_bids()
        asks = self.exchange_get_asks()

        bids_qty = pd.DataFrame(bids)
        asks_qty = pd.DataFrame(asks)

        bids_level = bids_qty['price'].unique()
        bids_level = bids_level[min(depth, len(bids_level) - 1)]

        asks_level = asks_qty['price'].unique()
        asks_level = asks_level[min(depth, len(asks_level) - 1)]

        bids_qty = bids_qty[bids_qty.price >= bids_level]
        asks_qty = asks_qty[asks_qty.price <= asks_level]

        bids_qty = bids_qty['qty'].sum()
        asks_qty = asks_qty['qty'].sum()

        return (bids_qty - asks_qty)/(bids_qty + asks_qty)

    def exchange_get_transaction_volume(self, limit=15):

        """
        Signed Transaction Volume: A signed quantity indicating the number of shares bought in the
        last 15 seconds minus the number of shares sold in the last 15 seconds.
        """

        transactions = pd.DataFrame(self.exchange.transactions.transactions, 
                                    columns=['time', 'price', 'quantity', 'side', 'trader_link'])

        time_values = sorted(transactions['time'].unique())

        limit = min(limit, len(time_values))

        if limit == 0:
            return 0

        transactions = transactions[transactions['time'] >= time_values[-limit]]

        total_buy = transactions[transactions['side'] == 'bid']['quantity'].sum()      # fulfilled bid orders - buy
        total_sell = transactions[transactions['side'] == 'ask']['quantity'].sum()     # fulfilled ask orders - sell

        return (total_buy - total_sell) / (total_buy + total_sell)

    def exchange_get_all_transaction_volume(self, limit=15):

        """
        Signed Transaction Volume: A signed quantity indicating the number of shares bought in the
        last 15 seconds minus the number of shares sold in the last 15 seconds.
        """

        transactions = pd.DataFrame(self.exchange.transactions.transactions, 
                                    columns=['time', 'price', 'quantity', 'side', 'trader_link'])

        time_values = sorted(transactions['time'].unique())

        limit = min(limit, len(time_values))

        if limit == 0:
            return 0

        transactions = transactions[transactions['time'] >= time_values[-limit]]

        total_buy = transactions[transactions['side'] == 'bid']['quantity'].sum()      # fulfilled bid orders - buy
        total_sell = transactions[transactions['side'] == 'ask']['quantity'].sum()     # fulfilled ask orders - sell

        return (total_buy + total_sell)
    
    def get_features(self, limit=15, 
                         bid_ask_volume_imbalance_window = [1, 3, 5, 10], sign_transaction_volume_window = [1, 5, 10, 20]):
        
        exchange=self.exchange
        info=self.info
        
        feature_list = {}
        col_names = []

        for w in bid_ask_volume_imbalance_window:

            bid_ask_volume_imbalance_w = self.exchange_get_bid_ask_volume_imbalance(depth=w)
            
            feature_list[f"bid_ask_volume_imbalance_{w}"] = bid_ask_volume_imbalance_w

        for w in sign_transaction_volume_window:

            sign_transaction_volume_w = self.exchange_get_transaction_volume(limit=w)
            
            feature_list[f"sign_transaction_volume_{w}"] = sign_transaction_volume_w
        
        feature_list["all_transaction_volume_1"] = self.exchange_get_all_transaction_volume(limit=1)
        
        result_df = pd.DataFrame(feature_list, index=[0])

        bid_ask_spread = self.exchange_get_spread()

        result_df['bid_ask_spread'] = [bid_ask_spread]

        result_df['dividends'] = [exchange.dividend()]

        result_df['price'] = [exchange.price()]

        result_df['dividends_previous_divided'] = (result_df['dividends'] / (exchange.price() * exchange.risk_free_rate))
    
        fundamental_value_data = info.fundamental_value(exchange.id)[-1]

        result_df['fundamental_v'] = [fundamental_value_data]

        prices = info.prices[exchange.id]

        if len(prices) >= 20:
            result_df['return_1'] = ((result_df['price'] - prices[-1])/result_df['price']) #*100
            result_df['return_5'] = ((result_df['price'] - prices[-5])/result_df['price']) #*100
            result_df['return_10'] = ((result_df['price'] - prices[-10])/result_df['price']) #*100
            result_df['return_20'] = ((result_df['price'] - prices[-20])/result_df['price']) #*100

        elif len(prices) >= 10:
            result_df['return_1'] = ((result_df['price'] - prices[-1])/result_df['price']) #*100
            result_df['return_5'] = ((result_df['price'] - prices[-5])/result_df['price']) #*100
            result_df['return_10'] = ((result_df['price'] - prices[-10])/result_df['price']) #*100
            result_df['return_20'] = [0]

        elif len(prices) >= 5:
            result_df['return_1'] = ((result_df['price'] - prices[-1])/result_df['price']) #*100
            result_df['return_5'] = ((result_df['price'] - prices[-5])/result_df['price']) #*100
            result_df['return_10'] = [0]
            result_df['return_20'] = [0]

        elif len(prices) >= 1:
            result_df['return_1'] = ((result_df['price'] - prices[-1])/result_df['price']) #*100
            result_df['return_5'] = [0]
            result_df['return_10'] = [0]
            result_df['return_20'] = [0]

        else:
            result_df['return_1'] = [0]
            result_df['return_5'] = [0]
            result_df['return_10'] = [0]
            result_df['return_20'] = [0]
            
            
        result_df['bid_ask_spread_percentage'] = result_df['bid_ask_spread'] / result_df['price']
        
        result_df['fundamental_v_norm'] = result_df['fundamental_v'] / result_df['price'] - 1

        return result_df.drop(['bid_ask_spread', 'dividends', 'price', 'fundamental_v'], axis=1)