In [None]:
import numpy as np
import pandas as pd

In [None]:
intraday_data = pd.read_csv("IDEA/minute_OHLCV.csv")
intraday_data = intraday_data[['Timestamp', 'Close', 'Volume']]
intraday_data

In [None]:
# Convert timestamp to datetime
intraday_data['Timestamp'] = pd.to_datetime(intraday_data['Timestamp'])
intraday_data

# Extract date from timestamp
intraday_data['Date'] = intraday_data['Timestamp'].dt.date

# Calculate day_number (days since 2024-10-14 + 1)
reference_date = pd.to_datetime('2024-10-14').date()
intraday_data['day_number'] = intraday_data['Date'].apply(lambda date: (date - reference_date).days + 1)

# Extract time for checking completeness
intraday_data['Time'] = intraday_data['Timestamp'].dt.time

# Check for each day if we have all timestamps from 09:15:00 to 15:29:00
day_stats = intraday_data.groupby('Date').agg(
    min_time=('Time', 'min'),
    max_time=('Time', 'max'),
    count=('Time', 'count')
)

# Check if each day has the complete range
day_stats['has_complete_range'] = (
    (day_stats['min_time'] <= pd.to_datetime('09:15:00').time()) & 
    (day_stats['max_time'] >= pd.to_datetime('15:29:00').time())
)

# Merge has_complete_range and count from day_stats into intraday_data
intraday_data = intraday_data.merge(day_stats[['has_complete_range', 'count']], on='Date', how='left')

# Display the first few rows of the modified dataframe
intraday_data

In [None]:
day_stats['has_complete_range'].value_counts()

In [None]:
day_stats[day_stats['has_complete_range'] == False].head()

In [None]:
ohlcv_data = pd.read_csv("IDEA/daily_OHLCV.csv")

# Convert Date to datetime and then Date
ohlcv_data['Date'] = pd.to_datetime(ohlcv_data['Date']).dt.date
# Reverse the dataframe (newest dates first)
ohlcv_data = ohlcv_data.iloc[::-1].reset_index(drop=True)

# Remove all dates after the last date in intraday_data
last_intraday_date = intraday_data['Date'].min()
ohlcv_data = ohlcv_data[ohlcv_data['Date'] >= last_intraday_date]
ohlcv_data

In [None]:
from GenerateLTP import simulate_linear, simulate_random_walk, simulate_geometric_brownian_motion, simulate_bezier_noise

# Simulate LTP using different methods
simulated_prices_linear= []
simulated_volumes_linear = []
simulated_prices_random_walk = []
simulated_volumes_random_walk = []
simulated_prices_geometric_brownian_motion = []
simulated_volumes_geometric_brownian_motion = []
simulated_prices_bezier_noise = []
simulated_volumes_bezier_noise = []
for i in range(len(ohlcv_data)):
    date = ohlcv_data['Date'].iloc[i]
    minutes = intraday_data[intraday_data['Date'] == date]["count"].values[0]
    O, H, L, C, V= ohlcv_data.iloc[i][['Open', 'High', 'Low', 'Close', 'Volume']].values

    linear, volumes = simulate_linear(O, H, L, C, V, minutes)
    simulated_prices_linear.extend(linear)
    simulated_volumes_linear.extend(volumes)

    random_walk, volumes = simulate_random_walk(O, H, L, C, V, minutes)
    simulated_prices_random_walk.extend(random_walk)
    simulated_volumes_random_walk.extend(volumes)

    geometric_brownian_motion, volumes = simulate_geometric_brownian_motion(O, H, L, C, V, minutes)
    simulated_prices_geometric_brownian_motion.extend(geometric_brownian_motion)
    simulated_volumes_geometric_brownian_motion.extend(volumes)

    bezier_noise, volumes = simulate_bezier_noise(O, H, L, C, V, minutes)
    simulated_prices_bezier_noise.extend(bezier_noise)
    simulated_volumes_bezier_noise.extend(volumes)

# Convert to numpy arrays
simulated_prices_linear = np.array(simulated_prices_linear)
simulated_volumes_linear = np.array(simulated_volumes_linear)
simulated_prices_random_walk = np.array(simulated_prices_random_walk)
simulated_volumes_random_walk = np.array(simulated_volumes_random_walk)
simulated_prices_geometric_brownian_motion = np.array(simulated_prices_geometric_brownian_motion)
simulated_volumes_geometric_brownian_motion = np.array(simulated_volumes_geometric_brownian_motion)
simulated_prices_bezier_noise = np.array(simulated_prices_bezier_noise)
simulated_volumes_bezier_noise = np.array(simulated_volumes_bezier_noise)

assert len(simulated_prices_linear) == len(intraday_data)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import skew, kurtosis
import statsmodels.api as sm


def evaluate_simulation(real_prices, simulated_prices):
    mae = mean_absolute_error(real_prices, simulated_prices)
    mape = np.mean(np.abs((real_prices - simulated_prices) / real_prices)) * 100
    rmse = np.sqrt(mean_squared_error(real_prices, simulated_prices))
    r2 = r2_score(real_prices, simulated_prices)

    print(f"MAE: {mae:.2f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"RMSE: {rmse:.2f}")
    print(f"R2: {r2:.2f}")

In [None]:
def max_drawdown(prices):
    peak = np.maximum.accumulate(prices)
    drawdown = (prices - peak) / peak
    return drawdown.min()

def autocorrelation(x, lag=1):
    return sm.tsa.acf(x, nlags=lag)[-1]

def evaluate_simulation_advanced(real_prices, simulated_prices):
    real_returns = np.diff(real_prices) / real_prices[:-1]
    sim_returns = np.diff(simulated_prices) / simulated_prices[:-1]

    real_volatility = np.std(real_returns)
    sim_volatility = np.std(sim_returns)

    print(f"Real Volatility: {real_volatility:.5f}")
    print(f"Simulated Volatility: {sim_volatility:.5f}")
    print(f"Real Max Drawdown: {max_drawdown(real_prices):.2%}")
    print(f"Sim Max Drawdown: {max_drawdown(simulated_prices):.2%}")
    print(f"Real Return Autocorrelation (lag=1): {autocorrelation(real_returns):.4f}")
    print(f"Sim Return Autocorrelation (lag=1): {autocorrelation(sim_returns):.4f}")
    print(f"Real Returns Skewness: {skew(real_returns):.4f}")
    print(f"Sim Returns Skewness: {skew(sim_returns):.4f}")

    print(f"Real Returns Kurtosis: {kurtosis(real_returns):.4f}")
    print(f"Sim Returns Kurtosis: {kurtosis(sim_returns):.4f}")

In [None]:
print("Linear Simulation Price Evaluation")
evaluate_simulation(intraday_data['Close'].values, simulated_prices_linear)
evaluate_simulation_advanced(intraday_data['Close'].values, simulated_prices_linear)
print("==========================\n")

print("Random Walk Simulation Price Evaluation")
evaluate_simulation(intraday_data['Close'].values, simulated_prices_random_walk)
evaluate_simulation_advanced(intraday_data['Close'].values, simulated_prices_random_walk)
print("==========================\n")

print("Geometric Brownian Motion Simulation Price Evaluation")
evaluate_simulation(intraday_data['Close'].values, simulated_prices_geometric_brownian_motion)
evaluate_simulation_advanced(intraday_data['Close'].values, simulated_prices_geometric_brownian_motion)
print("==========================\n")

print("Bezier Noise Simulation Price Evaluation")
evaluate_simulation(intraday_data['Close'].values, simulated_prices_bezier_noise)
evaluate_simulation_advanced(intraday_data['Close'].values, simulated_prices_bezier_noise)
print("==========================\n")

In [None]:
# Check if any volumes are 0
zero_volumes = intraday_data[intraday_data["Volume"] == 0]
print(f"Found {len(zero_volumes)} rows with volume = 0")

# Replace 0 volumes with 1
if len(zero_volumes) > 0:
    intraday_data.loc[intraday_data["Volume"] == 0, "Volume"] = 1
    print("Replaced all 0 volumes with 1")
    
# Verify the replacement
print(f"Minimum volume after replacement: {intraday_data['Volume'].min()}")

In [None]:
print("Linear Simulation Volume Evaluation")
evaluate_simulation(intraday_data['Volume'].values, simulated_volumes_linear)
print("==========================\n")

print("Random Walk Simulation Volume Evaluation")
evaluate_simulation(intraday_data['Volume'].values, simulated_volumes_random_walk)
print("==========================\n")

print("Geometric Brownian Motion Simulation Volume Evaluation")
evaluate_simulation(intraday_data['Volume'].values, simulated_volumes_geometric_brownian_motion)
print("==========================\n")

print("Bezier Noise Simulation Volume Evaluation")
evaluate_simulation(intraday_data['Volume'].values, simulated_volumes_bezier_noise)
print("==========================\n")