In [None]:
# GENERATE THE COMBINED DATA (MERGING)
from merge_df import get_df_market_data, get_df_resized_trade_data

all_data = []
for i in range(1, 16):
    combined_data = get_df_market_data(f"raw/Period{i}")
    merged_data = get_df_resized_trade_data(f"raw/Period{i}", combined_data)
    all_data.append(merged_data)


In [None]:
# CREATING THE CSV FILES
import os
base_path = r'C:\Users\agree\PycharmProjects\NBIStockForecast\input'
if not os.path.exists(base_path):
    os.makedirs(base_path)

stocks = ['A', 'B', 'C', 'D', 'E']

for stock in stocks:
    base_path_stock = os.path.join(base_path, stock)
    for i in range(15):
        # create a folder for each stock
        if not os.path.exists(base_path_stock):
            os.makedirs(base_path_stock)
        all_data[i][stock].to_csv(os.path.join(base_path_stock, f"{i+1}.csv"))


In [1]:
import pandas as pd
import os

file_path = r"C:\Users\agree\PycharmProjects\NBIStockForecast\input\A\2.csv"

# Read CSV with explicit parameters to avoid ambiguity
a_1 = pd.read_csv(
    file_path,
    index_col=0,  # Use the first column (0,1,2,...) as the index
    parse_dates=["timestamp"],  # Explicitly parse timestamps
    date_format="%Y-%m-%d %H:%M:%S.%f",  # Match your timestamp format
)

# Drop duplicate rows (if any exist due to CSV corruption)
a_1 = a_1[~a_1.index.duplicated(keep="first")]

# Display the cleaned DataFrame
a_1

Unnamed: 0,bidVolume,bidPrice,askVolume,askPrice,timestamp,price,volume
0,44,117.25,40,117.27,1900-01-01 09:45:00.034415,117.26,3
1,44,117.25,39,117.27,1900-01-01 09:45:00.049591,117.26,3
2,44,117.25,40,117.27,1900-01-01 09:45:00.062074,117.26,3
3,44,117.25,39,117.27,1900-01-01 09:45:00.073500,117.26,3
4,4,117.26,39,117.27,1900-01-01 09:45:00.183971,117.26,3
...,...,...,...,...,...,...,...
96913,4,117.23,76,117.24,1900-01-01 10:44:59.879366,117.23,1
96914,4,117.23,83,117.24,1900-01-01 10:44:59.932730,117.23,1
96915,4,117.23,86,117.24,1900-01-01 10:44:59.932829,117.23,1
96916,5,117.23,86,117.24,1900-01-01 10:44:59.968963,117.23,1


In [26]:
from add_label import add_label
import plotly

df = add_label(a_1, horizon=5000, down_threshold=-0.05, up_threshold=0.05)
df


Unnamed: 0,bidVolume,bidPrice,askVolume,askPrice,timestamp,price,volume,future_price,price_change,label
0,44,117.25,40,117.27,1900-01-01 09:45:00.034415,117.26,3,117.22,-0.034112,1
1,44,117.25,39,117.27,1900-01-01 09:45:00.049591,117.26,3,117.22,-0.034112,1
2,44,117.25,40,117.27,1900-01-01 09:45:00.062074,117.26,3,117.22,-0.034112,1
3,44,117.25,39,117.27,1900-01-01 09:45:00.073500,117.26,3,117.22,-0.034112,1
4,4,117.26,39,117.27,1900-01-01 09:45:00.183971,117.26,3,117.22,-0.034112,1
...,...,...,...,...,...,...,...,...,...,...
91913,85,117.20,24,117.21,1900-01-01 10:41:52.984893,117.21,1,117.23,0.017063,1
91914,85,117.20,23,117.21,1900-01-01 10:41:53.029495,117.21,1,117.23,0.017063,1
91915,85,117.20,22,117.21,1900-01-01 10:41:53.029654,117.21,1,117.23,0.017063,1
91916,85,117.20,21,117.21,1900-01-01 10:41:54.037125,117.21,1,117.23,0.017063,1


In [27]:
from resample import resample_data
resampled_df = resample_data(df, "3s")

Unnamed: 0,timestamp,bidVolume,bidPrice,askVolume,askPrice,volume,price,label
0,1900-01-01 09:45:00,21.217391,117.252029,26.913043,117.263043,99,117.26,1.0
1,1900-01-01 09:45:03,23.687500,117.256875,34.593750,117.268906,172,117.26,1.0
2,1900-01-01 09:45:06,34.846154,117.260000,21.153846,117.270000,31,117.27,1.0
3,1900-01-01 09:45:09,31.909091,117.260000,27.818182,117.270000,99,117.27,1.0
4,1900-01-01 09:45:12,26.898305,117.254576,26.966102,117.267797,165,117.26,1.0
...,...,...,...,...,...,...,...,...
1133,1900-01-01 10:41:42,32.041096,117.207945,69.945205,117.219863,115,117.22,1.0
1134,1900-01-01 10:41:45,54.755102,117.205102,45.785714,117.215816,100,117.21,1.0
1135,1900-01-01 10:41:48,87.204545,117.200000,23.636364,117.210000,44,117.21,1.0
1136,1900-01-01 10:41:51,85.000000,117.200000,21.090909,117.210000,11,117.21,1.0


In [24]:
import plotly.express as px

# Assuming resampled_df is your DataFrame with 'timestamp' and 'price' columns
fig = px.line(
    resampled_df,
    x='timestamp',
    y='price',
    title='Resampled Stock Price Over Time',
    labels={'price': 'Price (USD)', 'timestamp': 'Time'},
    markers=True  # Add markers to data points
)

# Customize hover and layout
fig.update_layout(
    hovermode='x unified',
    xaxis=dict(
        tickformat='%H:%M:%S.%f',  # Show microseconds
        dtick='L1',  # Automatic tick spacing
        rangeslider_visible=True  # Add range slider for zooming
    ),
    yaxis_title='Price (USD)',
    xaxis_title='Timestamp',
    template='plotly_white'  # Clean theme
)

# Improve hover tooltip
fig.update_traces(
    hovertemplate='<br>'.join([
        '<b>Time</b>: %{x|%H:%M:%S.%f}',
        '<b>Price</b>: $%{y:.2f}'
    ])
)

fig.show()

In [25]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Create subplots with shared x-axis
fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.1,
    subplot_titles=('Price Trend', 'Movement Labels')
)

# Add price trace (line + markers)
fig.add_trace(
    go.Scatter(
        x=df['timestamp'],
        y=df['price'],
        mode='lines+markers',
        name='Price',
        marker=dict(size=5),
        hovertemplate='Time: %{x|%H:%M:%S.%f}<br>Price: $%{y:.2f}<extra></extra>'
    ),
    row=1, col=1
)

# Add labels trace (colored markers)
for label, color in zip([0, 1, 2], ['red', 'gray', 'green']):
    label_df = df[df['label'] == label]
    fig.add_trace(
        go.Scatter(
            x=label_df['timestamp'],
            y=label_df['label'],
            mode='markers',
            marker=dict(color=color, size=8),
            name=['Down', 'Hold', 'Up'][label],
            hovertemplate='Time: %{x|%H:%M:%S.%f}<br>Label: %{y}<extra></extra>'
        ),
        row=2, col=1
    )

# Update layout
fig.update_layout(
    height=600,
    legend_title='Legend',
    yaxis2=dict(
        tickvals=[0, 1, 2],
        ticktext=['Down (0)', 'Hold (1)', 'Up (2)']
    )
)

fig.update_xaxes(
    tickformat='%H:%M:%S.%f',
    dtick='L1',
    row=2, col=1
)

# Assuming resampled_df has 'timestamp' and 'label' columns
fig_labels = px.scatter(
    resampled_df,
    x='timestamp',
    y='label',
    color='label',
    color_discrete_map={0: 'red', 1: 'gray', 2: 'green'},  # Map labels to colors
    labels={'label': 'Price Movement', 'timestamp': 'Time'},
    title='Resampled Price Movement Labels Over Time',
    hover_data={
        'timestamp': '|%H:%M:%S.%f',  # Custom timestamp format in hover
        'label': False  # Hide numerical label in hover
    }
)

# Customize layout
fig_labels.update_layout(
    yaxis=dict(
        tickmode='array',
        tickvals=[0, 1, 2],
        ticktext=['Down (0)', 'Hold (1)', 'Up (2)']
    ),
    xaxis_title='Timestamp',
    yaxis_title='Price Movement',
    legend_title='Direction',
    hovermode='x unified',
    template='plotly_white'
)

# Improve time axis formatting
fig_labels.update_xaxes(
    tickformat='%H:%M:%S.%f',
    dtick='L1',  # Automatic tick spacing
    rangeslider_visible=True  # Add range slider for zooming
)

# Add custom hover template
fig_labels.update_traces(
    hovertemplate='<br>'.join([
        '<b>Time</b>: %{x|%H:%M:%S.%f}',
        '<b>Direction</b>: %{marker.color}'
    ])
)

fig_labels.show()
fig.show()