In [3]:
pip install dask[dataframe]


Collecting dask-expr<1.2,>=1.1 (from dask[dataframe])
  Downloading dask_expr-1.1.19-py3-none-any.whl.metadata (2.6 kB)
INFO: pip is looking at multiple versions of dask-expr to determine which version is compatible with other requirements. This could take a while.
  Downloading dask_expr-1.1.18-py3-none-any.whl.metadata (2.6 kB)
  Downloading dask_expr-1.1.16-py3-none-any.whl.metadata (2.5 kB)
Downloading dask_expr-1.1.16-py3-none-any.whl (243 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.2/243.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dask-expr
Successfully installed dask-expr-1.1.16


In [4]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import lightgbm as lgb

# Load the dataset (ensure that it's not too large for memory)
file_path = '/content/tesla_stock_data_final_cleaneddata(noduplciates_nomissingvalues).csv'  # Adjust path if needed
df = pd.read_csv(file_path)

# Optimize the data types of columns to reduce memory usage
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['year'] = df['timestamp'].dt.year.astype('int16')
df['month'] = df['timestamp'].dt.month.astype('int8')
df['day_of_week'] = df['timestamp'].dt.dayofweek.astype('int8')
df['week_of_year'] = df['timestamp'].dt.isocalendar().week.astype('int8')

# Use 'float32' or 'int32' where applicable to save memory
df['close'] = df['close'].astype('float32')
df['high'] = df['high'].astype('float32')
df['low'] = df['low'].astype('float32')
df['open'] = df['open'].astype('float32')

# Feature Engineering: Keep it simple with lag features and rolling stats
# 1. Lag Features (7, 14, and 30 days)
df['high_lag_7'] = df['high'].shift(7)
df['low_lag_7'] = df['low'].shift(7)
df['close_lag_7'] = df['close'].shift(7)

df['high_lag_14'] = df['high'].shift(14)
df['low_lag_14'] = df['low'].shift(14)
df['close_lag_14'] = df['close'].shift(14)

df['high_lag_30'] = df['high'].shift(30)
df['low_lag_30'] = df['low'].shift(30)
df['close_lag_30'] = df['close'].shift(30)

# 2. Rolling Features (30-day rolling mean and std)
df['close_rolling_mean_30'] = df['close'].rolling(window=30).mean().astype('float32')
df['high_rolling_mean_30'] = df['high'].rolling(window=30).mean().astype('float32')
df['low_rolling_mean_30'] = df['low'].rolling(window=30).mean().astype('float32')

df['close_rolling_std_30'] = df['close'].rolling(window=30).std().astype('float32')
df['high_rolling_std_30'] = df['high'].rolling(window=30).std().astype('float32')
df['low_rolling_std_30'] = df['low'].rolling(window=30).std().astype('float32')

# 3. Price Differences
df['price_diff'] = (df['high'] - df['low']).astype('float32')
df['close_open_diff'] = (df['close'] - df['open']).astype('float32')

# 4. Moving Averages (50 and 200 days)
df['close_50ma'] = df['close'].rolling(window=50).mean().astype('float32')
df['close_200ma'] = df['close'].rolling(window=200).mean().astype('float32')

# Drop rows with NaN values (created by rolling and lagging)
df = df.dropna()

In [5]:
# Prepare Features and Target Variables
X = df[['year', 'month', 'day_of_week', 'week_of_year',
        'close_rolling_mean_30', 'high_rolling_mean_30', 'low_rolling_mean_30',
        'close_rolling_std_30', 'high_rolling_std_30', 'low_rolling_std_30',
        'price_diff', 'close_open_diff', 'close_50ma', 'close_200ma',
        'high_lag_7', 'low_lag_7', 'close_lag_7',
        'high_lag_14', 'low_lag_14', 'close_lag_14',
        'high_lag_30', 'low_lag_30', 'close_lag_30']]

# Target Variables (high and low prices)
y_high = df['high']
y_low = df['low']


In [7]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [8]:
# Prepare Features and Target Variables
X = df[['year', 'month', 'day_of_week', 'week_of_year',
        'close_rolling_mean_30', 'high_rolling_mean_30', 'low_rolling_mean_30',
        'close_rolling_std_30', 'high_rolling_std_30', 'low_rolling_std_30',
        'price_diff', 'close_open_diff', 'close_50ma', 'close_200ma',
        'high_lag_7', 'low_lag_7', 'close_lag_7',
        'high_lag_14', 'low_lag_14', 'close_lag_14',
        'high_lag_30', 'low_lag_30', 'close_lag_30']]

# Target Variables (high and low prices)
y_high = df['high']
y_low = df['low']

# Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low = train_test_split(
    X, y_high, y_low, test_size=0.2, shuffle=False
)

# Initialize LightGBM models
model_high = lgb.LGBMRegressor(random_state=42)
model_low = lgb.LGBMRegressor(random_state=42)

# Train the models
model_high.fit(X_train, y_train_high)
model_low.fit(X_train, y_train_low)

# Make predictions
y_pred_high = model_high.predict(X_test)
y_pred_low = model_low.predict(X_test)

# Calculate evaluation metrics: R², MSE, and MAE

# High price model evaluation
r2_high = r2_score(y_test_high, y_pred_high)
mse_high = mean_squared_error(y_test_high, y_pred_high)
mae_high = mean_absolute_error(y_test_high, y_pred_high)

# Low price model evaluation
r2_low = r2_score(y_test_low, y_pred_low)
mse_low = mean_squared_error(y_test_low, y_pred_low)
mae_low = mean_absolute_error(y_test_low, y_pred_low)

# Print the evaluation metrics for both models
print(f"High Price Model Evaluation Metrics:")
print(f"R²: {r2_high:.4f}")
print(f"MSE: {mse_high:.4f}")
print(f"MAE: {mae_high:.4f}")

print(f"\nLow Price Model Evaluation Metrics:")
print(f"R²: {r2_low:.4f}")
print(f"MSE: {mse_low:.4f}")
print(f"MAE: {mae_low:.4f}")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.171183 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4925
[LightGBM] [Info] Number of data points in the train set: 1193406, number of used features: 23
[LightGBM] [Info] Start training from score 458.317211
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.169710 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4925
[LightGBM] [Info] Number of data points in the train set: 1193406, number of used features: 23
[LightGBM] [Info] Start training from score 457.637015
High Price Model Evaluation Metrics:
R²: 0.9990
MSE: 16.9495
MAE: 0.9197

Low Price Model Evaluation Metrics:
R²: 0.9974
MSE: 45.5177
MAE: 4.0472


In [19]:
import joblib
import lightgbm as lgb

# Assuming you have already trained your LightGBM models, for example:
# model_high and model_low are your trained LightGBM models

# Save the LightGBM models using joblib
joblib.dump(model_high, '/content/best_lgbm_high_model.pkl')
joblib.dump(model_low, '/content/best_lgbm_low_model.pkl')

print("Models saved successfully!")


Models saved successfully!


In [23]:
import pandas as pd

# Assuming df is already loaded as you did earlier

def prepare_features_for_date(input_date, df):
    """
    This function prepares the features for a given input date based on your
    feature engineering approach.
    """
    # Convert input date to datetime and localize to UTC if necessary
    input_date = pd.to_datetime(input_date)

    # Ensure the timestamp column is timezone-aware in UTC
    if df['timestamp'].dt.tz is None:
        df['timestamp'] = df['timestamp'].dt.tz_localize('UTC')

    # If input_date is naive (without timezone), convert it to UTC as well
    if input_date.tz is None:
        input_date = input_date.tz_localize('UTC')

    # Filter data for rows until the input date
    df_filtered = df[df['timestamp'] <= input_date]
    df_filtered = df_filtered.sort_values(by='timestamp')

    # Get the last 30 rows for rolling features (or you can choose a different window)
    historical_data = df_filtered.tail(30)

    # Compute lag features and rolling statistics for the input date
    features = {}

    # Extract date-based features
    features['year'] = input_date.year
    features['month'] = input_date.month
    features['day_of_week'] = input_date.weekday()
    features['week_of_year'] = input_date.isocalendar().week

    # Get rolling statistics (30-day window)
    features['close_rolling_mean_30'] = historical_data['close'].mean()
    features['high_rolling_mean_30'] = historical_data['high'].mean()
    features['low_rolling_mean_30'] = historical_data['low'].mean()

    features['close_rolling_std_30'] = historical_data['close'].std()
    features['high_rolling_std_30'] = historical_data['high'].std()
    features['low_rolling_std_30'] = historical_data['low'].std()

    # Compute price differences
    features['price_diff'] = historical_data['high'].iloc[-1] - historical_data['low'].iloc[-1]
    features['close_open_diff'] = historical_data['close'].iloc[-1] - historical_data['open'].iloc[-1]

    # Moving averages (50-day and 200-day) for close prices
    features['close_50ma'] = historical_data['close'].tail(50).mean() if len(historical_data) >= 50 else np.nan
    features['close_200ma'] = historical_data['close'].tail(200).mean() if len(historical_data) >= 200 else np.nan

    # Lag features (e.g., for 7, 14, 30 days)
    features['high_lag_7'] = historical_data['high'].iloc[-7] if len(historical_data) >= 7 else np.nan
    features['low_lag_7'] = historical_data['low'].iloc[-7] if len(historical_data) >= 7 else np.nan
    features['close_lag_7'] = historical_data['close'].iloc[-7] if len(historical_data) >= 7 else np.nan

    features['high_lag_14'] = historical_data['high'].iloc[-14] if len(historical_data) >= 14 else np.nan
    features['low_lag_14'] = historical_data['low'].iloc[-14] if len(historical_data) >= 14 else np.nan
    features['close_lag_14'] = historical_data['close'].iloc[-14] if len(historical_data) >= 14 else np.nan

    features['high_lag_30'] = historical_data['high'].iloc[-30] if len(historical_data) >= 30 else np.nan
    features['low_lag_30'] = historical_data['low'].iloc[-30] if len(historical_data) >= 30 else np.nan
    features['close_lag_30'] = historical_data['close'].iloc[-30] if len(historical_data) >= 30 else np.nan

    return features

# Predict the high and low prices for the input date
def predict_for_date(input_date, df, model_high, model_low):
    # Prepare the features for the given input date
    features = prepare_features_for_date(input_date, df)

    # Convert features into DataFrame for prediction
    input_features = pd.DataFrame([features])

    # Make predictions for high and low prices
    predicted_high = model_high.predict(input_features)
    predicted_low = model_low.predict(input_features)

    # Adjust predictions if low price is greater than high price
    if predicted_low > predicted_high:
        # Swap the predictions if low price is greater than high price
        predicted_high, predicted_low = predicted_low, predicted_high

    # Return the predictions
    return predicted_high[0], predicted_low[0]

# Example usage
input_date = '2025-01-22'  # Replace with the date you want to predict for
df = pd.read_csv('/content/tesla_stock_data_final_cleaneddata(noduplciates_nomissingvalues).csv', parse_dates=['timestamp'])

predicted_high, predicted_low = predict_for_date(input_date, df, model_high, model_low)

# Print the results
print(f"Predicted High Price for {input_date}: {predicted_high:.4f}")
print(f"Predicted Low Price for {input_date}: {predicted_low:.4f}")


Predicted High Price for 2025-01-22: 240.1710
Predicted Low Price for 2025-01-22: 239.7123


In [24]:
# Print features for debugging
input_date = '2025-01-22'  # Use any date for testing
features = prepare_features_for_date(input_date, df)
print("Features for input date:", features)


Features for input date: {'year': 2025, 'month': 1, 'day_of_week': 2, 'week_of_year': 4, 'close_rolling_mean_30': 239.71108666666663, 'high_rolling_mean_30': 239.73929333333328, 'low_rolling_mean_30': 239.68667000000002, 'close_rolling_std_30': 0.060427116163324364, 'high_rolling_std_30': 0.047124925195831974, 'low_rolling_std_30': 0.06470932965830609, 'price_diff': 0.09999999999999432, 'close_open_diff': 0.09999999999999432, 'close_50ma': nan, 'close_200ma': nan, 'high_lag_7': 239.77, 'low_lag_7': 239.75, 'close_lag_7': 239.75, 'high_lag_14': 239.61, 'low_lag_14': 239.58, 'close_lag_14': 239.6, 'high_lag_30': 239.84, 'low_lag_30': 239.8, 'close_lag_30': 239.81}


In [25]:
# Test with different dates
dates_to_test = ['2025-01-22', '2025-02-01', '2025-03-01']
for date in dates_to_test:
    predicted_high, predicted_low = predict_for_date(date, df, model_high, model_low)
    print(f"Predicted High for {date}: {predicted_high:.4f}")
    print(f"Predicted Low for {date}: {predicted_low:.4f}")


Predicted High for 2025-01-22: 240.1710
Predicted Low for 2025-01-22: 239.7123
Predicted High for 2025-02-01: 240.1710
Predicted Low for 2025-02-01: 239.7123
Predicted High for 2025-03-01: 240.1710
Predicted Low for 2025-03-01: 239.7123


In [26]:
# Print model evaluation metrics (MSE, MAE, etc.)
y_pred_high_train = model_high.predict(X_train)
y_pred_low_train = model_low.predict(X_train)

# High price evaluation
print(f"Training High Price MAE: {mean_absolute_error(y_train_high, y_pred_high_train)}")
print(f"Training High Price MSE: {mean_squared_error(y_train_high, y_pred_high_train)}")

# Low price evaluation
print(f"Training Low Price MAE: {mean_absolute_error(y_train_low, y_pred_low_train)}")
print(f"Training Low Price MSE: {mean_squared_error(y_train_low, y_pred_low_train)}")


Training High Price MAE: 1.742615892138584
Training High Price MSE: 18.767362042956403
Training Low Price MAE: 1.7522237167479906
Training Low Price MSE: 19.494061697537926


In [27]:
# Example of using cross-validation for LightGBM
from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(model_high, X, y_high, cv=5, scoring='neg_mean_squared_error')
print(f"High price model cross-validation scores (MSE): {-cv_scores}")


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.125381 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4928
[LightGBM] [Info] Number of data points in the train set: 1193406, number of used features: 23
[LightGBM] [Info] Start training from score 481.039790
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.113953 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4928
[LightGBM] [Info] Number of data points in the train set: 1193406, number of used features: 23
[LightGBM] [Info] Start training from score 360.921766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112652 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is n


It seems like the cross-validation results for your high-price model are highly variable, with some values being very large (e.g., 1.46782586e+04 and 2.57570674e+04) and others being smaller (e.g., 1.49967317e+01 and 1.49609884e+01). This kind of inconsistency suggests that the model's performance is not stable across different subsets of your data, which could indicate that the model is either overfitting to some subsets or underfitting to others.

In [30]:
from sklearn.model_selection import RandomizedSearchCV
import lightgbm as lgb
import numpy as np

# Define the parameter grid for randomized search
param_dist = {
    'num_leaves': np.arange(20, 101, 10),  # Try different values for the number of leaves
    'max_depth': [-1, 5, 10],               # Max depth of the trees
    'learning_rate': [0.01, 0.05, 0.1],     # Learning rate
    'n_estimators': [100, 200, 500],        # Number of trees in the model
    'min_child_samples': [20, 50],          # Minimum number of samples required to form a leaf
    'subsample': [0.7, 0.8, 0.9],           # Fraction of samples used for fitting each tree
    'colsample_bytree': [0.7, 0.8, 0.9]     # Fraction of features used for fitting each tree
}

# Initialize the model (this model will be used for RandomizedSearchCV)
model_high = lgb.LGBMRegressor(random_state=42)
model_low = lgb.LGBMRegressor(random_state=42)

# Setup RandomizedSearchCV for hyperparameter tuning
random_search_high = RandomizedSearchCV(estimator=model_high, param_distributions=param_dist,
                                         n_iter=10, cv=3, n_jobs=-1,
                                         scoring='neg_mean_squared_error', verbose=1, random_state=42)
random_search_low = RandomizedSearchCV(estimator=model_low, param_distributions=param_dist,
                                        n_iter=10, cv=3, n_jobs=-1,
                                        scoring='neg_mean_squared_error', verbose=1, random_state=42)

# Fit RandomizedSearchCV to find the best hyperparameters for the high price model
random_search_high.fit(X_train, y_train_high)
random_search_low.fit(X_train, y_train_low)

# Get the best parameters and best score for both models
print(f"Best Parameters for High Price Model: {random_search_high.best_params_}")
print(f"Best Score for High Price Model: {-random_search_high.best_score_}")

print(f"Best Parameters for Low Price Model: {random_search_low.best_params_}")
print(f"Best Score for Low Price Model: {-random_search_low.best_score_}")

# Retrieve the best models from randomized search
best_model_high = random_search_high.best_estimator_
best_model_low = random_search_low.best_estimator_

# Train the best models on the entire training set
best_model_high.fit(X_train, y_train_high)
best_model_low.fit(X_train, y_train_low)

# Make predictions with the best models
y_pred_high = best_model_high.predict(X_test)
y_pred_low = best_model_low.predict(X_test)

# Evaluate the models using R², MSE, and MAE
r2_high = r2_score(y_test_high, y_pred_high)
mse_high = mean_squared_error(y_test_high, y_pred_high)
mae_high = mean_absolute_error(y_test_high, y_pred_high)

r2_low = r2_score(y_test_low, y_pred_low)
mse_low = mean_squared_error(y_test_low, y_pred_low)
mae_low = mean_absolute_error(y_test_low, y_pred_low)

# Print the evaluation metrics for both models
print(f"High Price Model Evaluation Metrics:")
print(f"R²: {r2_high:.4f}")
print(f"MSE: {mse_high:.4f}")
print(f"MAE: {mae_high:.4f}")

print(f"\nLow Price Model Evaluation Metrics:")
print(f"R²: {r2_low:.4f}")
print(f"MSE: {mse_low:.4f}")
print(f"MAE: {mae_low:.4f}")


Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093439 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4925
[LightGBM] [Info] Number of data points in the train set: 1193406, number of used features: 23
[LightGBM] [Info] Start training from score 457.637015
Best Parameters for High Price Model: {'subsample': 0.7, 'num_leaves': 70, 'n_estimators': 200, 'min_child_samples': 20, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.9}
Best Score for High Price Model: 6709.415584653631
Best Parameters for Low Price Model: {'subsample': 0.9, 'num_leaves': 90, 'n_estimators': 200, 'min_child_samples': 20, 'max_depth': 10, 'learning_rate': 0.05, 'colsample_bytree': 0.7}
Best Score for Low Price Model: 6827.374731158

In [31]:
import joblib

# Save the best models for both high and low price predictions
joblib.dump(best_model_high, '/content/best_model_high_Tuned.pkl')
joblib.dump(best_model_low, '/content/best_model_low_Tuned.pkl')

print("Models have been saved successfully.")


Models have been saved successfully.


In [36]:
import pandas as pd
import numpy as np
import joblib

# Load the saved models
model_high = joblib.load('/content/best_model_high_Tuned.pkl')  # Adjust path if needed
model_low = joblib.load('/content/best_model_low_Tuned.pkl')    # Adjust path if needed

# Prepare the features for the given input date
def prepare_features_for_date(input_date, df):
    """
    This function prepares the features for a given input date based on your
    feature engineering approach.
    """
    # Convert input date to datetime and localize to UTC if necessary
    input_date = pd.to_datetime(input_date)

    # Ensure the timestamp column is timezone-aware in UTC
    if df['timestamp'].dt.tz is None:
        df['timestamp'] = df['timestamp'].dt.tz_localize('UTC')

    # If input_date is naive (without timezone), convert it to UTC as well
    if input_date.tz is None:
        input_date = input_date.tz_localize('UTC')

    # Filter data for rows until the input date
    df_filtered = df[df['timestamp'] <= input_date]
    df_filtered = df_filtered.sort_values(by='timestamp')

    # Get the last 30 rows for rolling features (or you can choose a different window)
    historical_data = df_filtered.tail(30)

    # Compute lag features and rolling statistics for the input date
    features = {}

    # Extract date-based features
    features['year'] = input_date.year
    features['month'] = input_date.month
    features['day_of_week'] = input_date.weekday()
    features['week_of_year'] = input_date.isocalendar().week

    # Get rolling statistics (30-day window)
    features['close_rolling_mean_30'] = historical_data['close'].mean()
    features['high_rolling_mean_30'] = historical_data['high'].mean()
    features['low_rolling_mean_30'] = historical_data['low'].mean()

    features['close_rolling_std_30'] = historical_data['close'].std()
    features['high_rolling_std_30'] = historical_data['high'].std()
    features['low_rolling_std_30'] = historical_data['low'].std()

    # Compute price differences
    features['price_diff'] = historical_data['high'].iloc[-1] - historical_data['low'].iloc[-1]
    features['close_open_diff'] = historical_data['close'].iloc[-1] - historical_data['open'].iloc[-1]

    # Moving averages (50-day and 200-day) for close prices
    features['close_50ma'] = historical_data['close'].tail(50).mean() if len(historical_data) >= 50 else np.nan
    features['close_200ma'] = historical_data['close'].tail(200).mean() if len(historical_data) >= 200 else np.nan

    # Lag features (e.g., for 7, 14, 30 days)
    features['high_lag_7'] = historical_data['high'].iloc[-7] if len(historical_data) >= 7 else np.nan
    features['low_lag_7'] = historical_data['low'].iloc[-7] if len(historical_data) >= 7 else np.nan
    features['close_lag_7'] = historical_data['close'].iloc[-7] if len(historical_data) >= 7 else np.nan

    features['high_lag_14'] = historical_data['high'].iloc[-14] if len(historical_data) >= 14 else np.nan
    features['low_lag_14'] = historical_data['low'].iloc[-14] if len(historical_data) >= 14 else np.nan
    features['close_lag_14'] = historical_data['close'].iloc[-14] if len(historical_data) >= 14 else np.nan

    features['high_lag_30'] = historical_data['high'].iloc[-30] if len(historical_data) >= 30 else np.nan
    features['low_lag_30'] = historical_data['low'].iloc[-30] if len(historical_data) >= 30 else np.nan
    features['close_lag_30'] = historical_data['close'].iloc[-30] if len(historical_data) >= 30 else np.nan

    return features

# Predict the high and low prices for the input date
def predict_for_date(input_date, df, model_high, model_low):
    # Prepare the features for the given input date
    features = prepare_features_for_date(input_date, df)

    # Convert features into DataFrame for prediction
    input_features = pd.DataFrame([features])

    # Make predictions for high and low prices
    predicted_high = model_high.predict(input_features)
    predicted_low = model_low.predict(input_features)

    # Adjust predictions if low price is greater than high price
    if predicted_low > predicted_high:
        # Swap the predictions if low price is greater than high price
        predicted_high, predicted_low = predicted_low, predicted_high

    # Return the predictions
    return predicted_high[0], predicted_low[0]

# Example usage
input_date = '2026-11-22'  # Replace with the date you want to predict for
df = pd.read_csv('/content/tesla_stock_data_final_cleaneddata(noduplciates_nomissingvalues).csv', parse_dates=['timestamp'])

predicted_high, predicted_low = predict_for_date(input_date, df, model_high, model_low)

# Print the results
print(f"Predicted High Price for {input_date}: {predicted_high:.4f}")
print(f"Predicted Low Price for {input_date}: {predicted_low:.4f}")


Predicted High Price for 2026-11-22: 240.0223
Predicted Low Price for 2026-11-22: 238.1804
