In [None]:
import pandas as pd

# Load the dataset
file_path = "/content/tesla_stock_data_final_cleaneddata(noduplciates_nomissingvalues).csv"
df = pd.read_csv(file_path, parse_dates=['timestamp'])

# Check the first few rows to understand the structure of your data
df.head()

# Check for any missing values (if any)
print("Missing Values:")
print(df.isnull().sum())

# Check the column data types
print("Data Types:")
print(df.dtypes)

# Ensure that the timestamp column is of datetime type
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Check for duplicates again just to confirm
duplicates = df.duplicated().sum()
print(f"Duplicate rows: {duplicates}")


Missing Values:
timestamp      0
close          0
high           0
low            0
trade_count    0
open           0
volume         0
vwap           0
dtype: int64
Data Types:
timestamp      datetime64[ns, UTC]
close                      float64
high                       float64
low                        float64
trade_count                float64
open                       float64
volume                     float64
vwap                       float64
dtype: object
Duplicate rows: 0


In [None]:
df['year'] = df['timestamp'].dt.year
df['month'] = df['timestamp'].dt.month
df['day_of_month'] = df['timestamp'].dt.day
df['day_of_week'] = df['timestamp'].dt.dayofweek
df['week_of_year'] = df['timestamp'].dt.isocalendar().week
df['quarter'] = df['timestamp'].dt.quarter


In [None]:
df['close_rolling_mean_30'] = df['close'].rolling(window=30).mean()
df['high_rolling_mean_30'] = df['high'].rolling(window=30).mean()
df['low_rolling_mean_30'] = df['low'].rolling(window=30).mean()

df['close_rolling_std_30'] = df['close'].rolling(window=30).std()
df['high_rolling_std_30'] = df['high'].rolling(window=30).std()
df['low_rolling_std_30'] = df['low'].rolling(window=30).std()

# Drop NA created by rolling windows (for the initial rows)
df = df.dropna()


In [None]:
df['price_diff'] = df['high'] - df['low']
df['close_open_diff'] = df['close'] - df['open']
df['price_range'] = df['high'] - df['low']


In [None]:
df['close_50ma'] = df['close'].rolling(window=50).mean()
df['close_200ma'] = df['close'].rolling(window=200).mean()


In [None]:
df['high_lag_1'] = df['high'].shift(1)
df['low_lag_1'] = df['low'].shift(1)
df['close_lag_1'] = df['close'].shift(1)

# Drop any rows with missing lag values (the first row in the dataset)
df = df.dropna()


In [None]:
from sklearn.model_selection import train_test_split

# Features (X)
X = df[['year', 'month', 'day_of_month', 'day_of_week', 'week_of_year', 'quarter',
        'close_rolling_mean_30', 'high_rolling_mean_30', 'low_rolling_mean_30',
        'close_rolling_std_30', 'high_rolling_std_30', 'low_rolling_std_30',
        'price_diff', 'close_open_diff', 'price_range',
        'close_50ma', 'close_200ma', 'high_lag_1', 'low_lag_1', 'close_lag_1']]

# Target variables
y_high = df['high']
y_low = df['low']



In [None]:
import gc
import pandas as pd
import time  # <-- Add this line
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Model for predicting high price
rf_high = RandomForestRegressor(n_estimators=100, random_state=42)
rf_high.fit(X_train, y_train_high)

# Model for predicting low price
rf_low = RandomForestRegressor(n_estimators=100, random_state=42)
rf_low.fit(X_train, y_train_low)

# Make predictions
y_pred_high = rf_high.predict(X_test)
y_pred_low = rf_low.predict(X_test)


In [None]:
from sklearn.ensemble import RandomForestRegressor
# Function to train on a mini-batch of data
def train_on_mini_batch(X, y_high, y_low, batch_size, model_high, model_low):
    total_samples = len(X)
    for start in range(0, total_samples, batch_size):
        end = min(start + batch_size, total_samples)
        batch_X = X[start:end]
        batch_y_high = y_high[start:end]
        batch_y_low = y_low[start:end]

        # Train-test split for this batch
        X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low = train_test_split(
            batch_X, batch_y_high, batch_y_low, test_size=0.2, shuffle=False)

        print(f"Training on batch from sample {start} to {end} (Batch size: {len(batch_X)} samples)")

        # Train the model for high prices
        start_time = time.time()
        model_high.fit(X_train, y_train_high)
        training_time_high = time.time() - start_time

        # Train the model for low prices
        start_time = time.time()
        model_low.fit(X_train, y_train_low)
        training_time_low = time.time() - start_time

        # Print training times for this batch
        print(f"Training time for high price model (Batch): {training_time_high:.2f} seconds")
        print(f"Training time for low price model (Batch): {training_time_low:.2f} seconds")

        # Free memory after each batch
        del batch_X, batch_y_high, batch_y_low, X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low
        gc.collect()

# Instantiate the Random Forest models
rf_high = RandomForestRegressor(n_estimators=100, random_state=42)
rf_low = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model in mini-batches (e.g., batch size of 1000)
train_on_mini_batch(X, y_high, y_low, batch_size=1000, model_high=rf_high, model_low=rf_low)

Training on batch from sample 0 to 1000 (Batch size: 1000 samples)
Training time for high price model (Batch): 1.08 seconds
Training time for low price model (Batch): 1.45 seconds
Training on batch from sample 1000 to 2000 (Batch size: 1000 samples)
Training time for high price model (Batch): 1.43 seconds
Training time for low price model (Batch): 1.13 seconds
Training on batch from sample 2000 to 3000 (Batch size: 1000 samples)
Training time for high price model (Batch): 0.94 seconds
Training time for low price model (Batch): 0.94 seconds
Training on batch from sample 3000 to 4000 (Batch size: 1000 samples)
Training time for high price model (Batch): 0.98 seconds
Training time for low price model (Batch): 0.95 seconds
Training on batch from sample 4000 to 5000 (Batch size: 1000 samples)
Training time for high price model (Batch): 0.96 seconds
Training time for low price model (Batch): 0.95 seconds
Training on batch from sample 5000 to 6000 (Batch size: 1000 samples)
Training time for 

In [None]:
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# After training, make predictions using the final trained models
# Final train-test split (using the entire dataset)
X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low = train_test_split(
    X, y_high, y_low, test_size=0.2, shuffle=False)

# Make predictions for high prices and low prices
y_pred_high = rf_high.predict(X_test)
y_pred_low = rf_low.predict(X_test)

# Evaluate the models for high and low prices
# High price model evaluation
mae_high = mean_absolute_error(y_test_high, y_pred_high)
mse_high = mean_squared_error(y_test_high, y_pred_high)
r2_high = r2_score(y_test_high, y_pred_high)

# Low price model evaluation
mae_low = mean_absolute_error(y_test_low, y_pred_low)
mse_low = mean_squared_error(y_test_low, y_pred_low)
r2_low = r2_score(y_test_low, y_pred_low)

# Print the evaluation results
print("High Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_high:.4f}")
print(f"Mean Squared Error: {mse_high:.4f}")
print(f"R-squared: {r2_high:.4f}")

print("\nLow Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_low:.4f}")
print(f"Mean Squared Error: {mse_low:.4f}")
print(f"R-squared: {r2_low:.4f}")

High Price Model Evaluation:
Mean Absolute Error: 76.2621
Mean Squared Error: 21042.8353
R-squared: -0.2020

Low Price Model Evaluation:
Mean Absolute Error: 76.3551
Mean Squared Error: 21025.2940
R-squared: -0.2043


In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
import time
import gc
import numpy as np
from sklearn.model_selection import train_test_split

# Reduced hyperparameter search space
param_dist = {
    'n_estimators': [50, 100],  # Less variation
    'max_depth': [10, 20],  # Less depth
    'min_samples_split': [2, 5],  # Fewer splits
    'min_samples_leaf': [1, 2],  # Fewer leaves
    'max_features': ['sqrt']  # Single feature option
}

# Function for batch training with hyperparameter tuning
def train_on_mini_batch_with_tuning(X, y_high, y_low, batch_size, param_dist, n_iter=3):
    total_samples = len(X)

    best_model_high = None
    best_model_low = None
    best_score_high = float('inf')
    best_score_low = float('inf')

    for start in range(0, total_samples, batch_size):
        end = min(start + batch_size, total_samples)
        batch_X = X[start:end]
        batch_y_high = y_high[start:end]
        batch_y_low = y_low[start:end]

        # Train-test split for this batch (80-20 split)
        X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low = train_test_split(
            batch_X, batch_y_high, batch_y_low, test_size=0.2, shuffle=False
        )

        print(f"Training on batch from sample {start} to {end} (Batch size: {len(batch_X)} samples)")

        # Hyperparameter tuning for high price model using RandomizedSearchCV
        rf_high = RandomForestRegressor(random_state=42)
        random_search_high = RandomizedSearchCV(
            estimator=rf_high,
            param_distributions=param_dist,
            n_iter=n_iter,
            cv=2,  # Reduced cross-validation folds
            scoring='neg_mean_absolute_error',
            n_jobs=-1,
            random_state=42,
            verbose=2,
            error_score='raise'
        )

        try:
            start_time = time.time()
            random_search_high.fit(X_train, y_train_high)
            training_time_high = time.time() - start_time
            print(f"Training time for high price model (Batch): {training_time_high:.2f} seconds")

            # Track the best high price model
            if random_search_high.best_score_ < best_score_high:
                best_score_high = random_search_high.best_score_
                best_model_high = random_search_high.best_estimator_
        except Exception as e:
            print(f"Error training high price model: {e}")

        # Hyperparameter tuning for low price model using RandomizedSearchCV
        rf_low = RandomForestRegressor(random_state=42)
        random_search_low = RandomizedSearchCV(
            estimator=rf_low,
            param_distributions=param_dist,
            n_iter=n_iter,
            cv=2,  # Reduced cross-validation folds
            scoring='neg_mean_absolute_error',
            n_jobs=-1,
            random_state=42,
            verbose=2,
            error_score='raise'
        )

        try:
            start_time = time.time()
            random_search_low.fit(X_train, y_train_low)
            training_time_low = time.time() - start_time
            print(f"Training time for low price model (Batch): {training_time_low:.2f} seconds")

            # Track the best low price model
            if random_search_low.best_score_ < best_score_low:
                best_score_low = random_search_low.best_score_
                best_model_low = random_search_low.best_estimator_
        except Exception as e:
            print(f"Error training low price model: {e}")

        # Free memory after each batch
        del batch_X, batch_y_high, batch_y_low, X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low
        gc.collect()

    return best_model_high, best_model_low

# Call the function with a reduced batch size and iteration count
best_model_high, best_model_low = train_on_mini_batch_with_tuning(
    X, y_high, y_low, batch_size=1000, param_dist=param_dist, n_iter=3
)

print("Best model for high prices:", best_model_high)
print("Best model for low prices:", best_model_low)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Training time for low price model (Batch): 1.15 seconds
Training on batch from sample 493000 to 494000 (Batch size: 1000 samples)
Fitting 2 folds for each of 3 candidates, totalling 6 fits
Training time for high price model (Batch): 1.04 seconds
Fitting 2 folds for each of 3 candidates, totalling 6 fits
Training time for low price model (Batch): 1.03 seconds
Training on batch from sample 494000 to 495000 (Batch size: 1000 samples)
Fitting 2 folds for each of 3 candidates, totalling 6 fits
Training time for high price model (Batch): 1.04 seconds
Fitting 2 folds for each of 3 candidates, totalling 6 fits
Training time for low price model (Batch): 1.04 seconds
Training on batch from sample 495000 to 496000 (Batch size: 1000 samples)
Fitting 2 folds for each of 3 candidates, totalling 6 fits
Training time for high price model (Batch): 1.04 seconds
Fitting 2 folds for each of 3 candidates, totalling 6 fits
Training time for lo

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# After training and hyperparameter tuning, make predictions using the best models
# Use the entire dataset to evaluate the models (you can also use test data, but here we'll use the full dataset for simplicity)
y_pred_high = best_model_high.predict(X_test)
y_pred_low = best_model_low.predict(X_test)

# Evaluate the models for high and low prices
# High price model evaluation
mae_high = mean_absolute_error(y_test_high, y_pred_high)
mse_high = mean_squared_error(y_test_high, y_pred_high)
r2_high = r2_score(y_test_high, y_pred_high)

# Low price model evaluation
mae_low = mean_absolute_error(y_test_low, y_pred_low)
mse_low = mean_squared_error(y_test_low, y_pred_low)
r2_low = r2_score(y_test_low, y_pred_low)

# Print the evaluation results
print("High Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_high:.4f}")
print(f"Mean Squared Error: {mse_high:.4f}")
print(f"R-squared: {r2_high:.4f}")

print("\nLow Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_low:.4f}")
print(f"Mean Squared Error: {mse_low:.4f}")
print(f"R-squared: {r2_low:.4f}")


High Price Model Evaluation:
Mean Absolute Error: 322.6513
Mean Squared Error: 117477.3503
R-squared: -5.7105

Low Price Model Evaluation:
Mean Absolute Error: 320.4178
Mean Squared Error: 115985.7376
R-squared: -5.6437


In [None]:
import joblib

# Save the best models to disk
joblib.dump(best_model_high, 'best_model_rfhigh.pkl')
joblib.dump(best_model_low, 'best_model_rflow.pkl')

print("Models saved as 'best_model_high.pkl' and 'best_model_low.pkl'.")


Models saved as 'best_model_high.pkl' and 'best_model_low.pkl'.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import gc
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, TimeSeriesSplit, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
file_path = "/content/tesla_stock_data_final_cleaneddata(noduplciates_nomissingvalues).csv"
df = pd.read_csv(file_path, parse_dates=['timestamp'])

# Ensure that timestamp is of datetime type
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Check for missing values and duplicates
print("Missing Values:", df.isnull().sum())
print("Duplicates:", df.duplicated().sum())

# Add date-related features
df['year'] = df['timestamp'].dt.year
df['month'] = df['timestamp'].dt.month
df['day_of_month'] = df['timestamp'].dt.day
df['day_of_week'] = df['timestamp'].dt.dayofweek
df['week_of_year'] = df['timestamp'].dt.isocalendar().week
df['quarter'] = df['timestamp'].dt.quarter

# Add rolling features
df['close_rolling_mean_30'] = df['close'].rolling(window=30).mean()
df['high_rolling_mean_30'] = df['high'].rolling(window=30).mean()
df['low_rolling_mean_30'] = df['low'].rolling(window=30).mean()

df['close_rolling_std_30'] = df['close'].rolling(window=30).std()
df['high_rolling_std_30'] = df['high'].rolling(window=30).std()
df['low_rolling_std_30'] = df['low'].rolling(window=30).std()

# Calculate additional features
df.loc[:, 'price_diff'] = df['high'] - df['low']
df.loc[:, 'close_open_diff'] = df['close'] - df['open']
df.loc[:, 'price_range'] = df['high'] - df['low']

df['close_50ma'] = df['close'].rolling(window=50).mean()
df['close_200ma'] = df['close'].rolling(window=200).mean()

# Add lag features
df.loc[:, 'high_lag_1'] = df['high'].shift(1)
df.loc[:, 'low_lag_1'] = df['low'].shift(1)
df.loc[:, 'close_lag_1'] = df['close'].shift(1)

# Drop rows with NaN values created by rolling windows or lagging
df = df.dropna()


Missing Values: timestamp      0
close          0
high           0
low            0
trade_count    0
open           0
volume         0
vwap           0
dtype: int64
Duplicates: 0


In [4]:
# Define the features and target variables
X = df[['year', 'month', 'day_of_month', 'day_of_week', 'week_of_year', 'quarter',
        'close_rolling_mean_30', 'high_rolling_mean_30', 'low_rolling_mean_30',
        'close_rolling_std_30', 'high_rolling_std_30', 'low_rolling_std_30',
        'price_diff', 'close_open_diff', 'price_range',
        'close_50ma', 'close_200ma', 'high_lag_1', 'low_lag_1', 'close_lag_1']]

y_high = df['high']
y_low = df['low']

# Split the data into training and testing sets
X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low = train_test_split(
    X, y_high, y_low, test_size=0.2, shuffle=False
)

# Instantiate RandomForestRegressor for high and low price predictions
rf_high = RandomForestRegressor(n_estimators=100, random_state=42)
rf_low = RandomForestRegressor(n_estimators=100, random_state=42)

# Mini-batch training function
def train_on_mini_batch(X_train, y_train_high, y_train_low, batch_size=1000):
    total_samples = len(X_train)
    for start in range(0, total_samples, batch_size):
        end = min(start + batch_size, total_samples)
        batch_X = X_train[start:end]
        batch_y_high = y_train_high[start:end]
        batch_y_low = y_train_low[start:end]

        print(f"Training on batch from sample {start} to {end} (Batch size: {len(batch_X)} samples)")

        # Train on the high price model (batch)
        rf_high.fit(batch_X, batch_y_high)

        # Train on the low price model (batch)
        rf_low.fit(batch_X, batch_y_low)

        # Free memory after each batch
        del batch_X, batch_y_high, batch_y_low
        gc.collect()

# Train models in mini-batches
train_on_mini_batch(X_train, y_train_high, y_train_low, batch_size=1000)

# Make predictions on the test set
y_pred_high = rf_high.predict(X_test)
y_pred_low = rf_low.predict(X_test)

# Evaluate the models
mae_high = mean_absolute_error(y_test_high, y_pred_high)
mse_high = mean_squared_error(y_test_high, y_pred_high)
r2_high = r2_score(y_test_high, y_pred_high)

mae_low = mean_absolute_error(y_test_low, y_pred_low)
mse_low = mean_squared_error(y_test_low, y_pred_low)
r2_low = r2_score(y_test_low, y_pred_low)

print("High Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_high:.4f}")
print(f"Mean Squared Error: {mse_high:.4f}")
print(f"R-squared: {r2_high:.4f}")

print("\nLow Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_low:.4f}")
print(f"Mean Squared Error: {mse_low:.4f}")
print(f"R-squared: {r2_low:.4f}")

Training on batch from sample 0 to 1000 (Batch size: 1000 samples)
Training on batch from sample 1000 to 2000 (Batch size: 1000 samples)
Training on batch from sample 2000 to 3000 (Batch size: 1000 samples)
Training on batch from sample 3000 to 4000 (Batch size: 1000 samples)
Training on batch from sample 4000 to 5000 (Batch size: 1000 samples)
Training on batch from sample 5000 to 6000 (Batch size: 1000 samples)
Training on batch from sample 6000 to 7000 (Batch size: 1000 samples)
Training on batch from sample 7000 to 8000 (Batch size: 1000 samples)
Training on batch from sample 8000 to 9000 (Batch size: 1000 samples)
Training on batch from sample 9000 to 10000 (Batch size: 1000 samples)
Training on batch from sample 10000 to 11000 (Batch size: 1000 samples)
Training on batch from sample 11000 to 12000 (Batch size: 1000 samples)
Training on batch from sample 12000 to 13000 (Batch size: 1000 samples)
Training on batch from sample 13000 to 14000 (Batch size: 1000 samples)
Training on ba

In [7]:
import time
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import gc

param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False]
}

# Split the data into training and testing sets
X = df[['year', 'month', 'day_of_month', 'day_of_week', 'week_of_year', 'quarter',
        'close_rolling_mean_30', 'high_rolling_mean_30', 'low_rolling_mean_30',
        'close_rolling_std_30', 'high_rolling_std_30', 'low_rolling_std_30',
        'price_diff', 'close_open_diff', 'price_range',
        'close_50ma', 'close_200ma', 'high_lag_1', 'low_lag_1', 'close_lag_1']]

y_high = df['high']
y_low = df['low']

X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low = train_test_split(
    X, y_high, y_low, test_size=0.2, shuffle=False
)

# Define batch size
batch_size = 1000  # You can adjust the batch size as per your dataset size

# Instantiate the RandomForestRegressor
rf_high = RandomForestRegressor(random_state=42)
rf_low = RandomForestRegressor(random_state=42)

# Function to train in mini-batches
def batch_train_random_search(X_train, y_train_high, y_train_low, batch_size=1000):
    total_samples = len(X_train)

    # Placeholder for the best models after hyperparameter tuning in each batch
    best_model_high = None
    best_model_low = None

    # Instantiate RandomizedSearchCV for high price prediction
    random_search_high = RandomizedSearchCV(estimator=rf_high, param_distributions=param_dist,
                                            n_iter=5, cv=3, verbose=2, random_state=42,
                                            n_jobs=-1, scoring='neg_mean_absolute_error')

    # Instantiate RandomizedSearchCV for low price prediction
    random_search_low = RandomizedSearchCV(estimator=rf_low, param_distributions=param_dist,
                                           n_iter=5, cv=3, verbose=2, random_state=42,
                                           n_jobs=-1, scoring='neg_mean_absolute_error')

    # Train in batches
    for start in range(0, total_samples, batch_size):
        end = min(start + batch_size, total_samples)
        batch_X = X_train[start:end]
        batch_y_high = y_train_high[start:end]
        batch_y_low = y_train_low[start:end]

        print(f"Training on batch from sample {start} to {end} (Batch size: {len(batch_X)} samples)")

        # Apply RandomizedSearchCV for the current batch
        random_search_high.fit(batch_X, batch_y_high)
        random_search_low.fit(batch_X, batch_y_low)

        # Get the best models after hyperparameter tuning for each batch
        best_model_high = random_search_high.best_estimator_
        best_model_low = random_search_low.best_estimator_

        # Clear memory after each batch
        del batch_X, batch_y_high, batch_y_low
        gc.collect()

    return best_model_high, best_model_low

# Train models on mini-batches
start_time = time.time()
best_model_high, best_model_low = batch_train_random_search(X_train, y_train_high, y_train_low, batch_size=batch_size)
training_time = time.time() - start_time
print(f"Training time (batch-wise): {training_time:.2f} seconds")

# Make predictions on the test set
y_pred_high = best_model_high.predict(X_test)
y_pred_low = best_model_low.predict(X_test)

# Evaluate the models
mae_high = mean_absolute_error(y_test_high, y_pred_high)
mse_high = mean_squared_error(y_test_high, y_pred_high)
r2_high = r2_score(y_test_high, y_pred_high)

mae_low = mean_absolute_error(y_test_low, y_pred_low)
mse_low = mean_squared_error(y_test_low, y_pred_low)
r2_low = r2_score(y_test_low, y_pred_low)

print("\nHigh Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_high:.4f}")
print(f"Mean Squared Error: {mse_high:.4f}")
print(f"R-squared: {r2_high:.4f}")

print("\nLow Price Model Evaluation:")
print(f"Mean Absolute Error: {mae_low:.4f}")
print(f"Mean Squared Error: {mse_low:.4f}")
print(f"R-squared: {r2_low:.4f}")


Training on batch from sample 0 to 1000 (Batch size: 1000 samples)
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Training on batch from sample 1000 to 2000 (Batch size: 1000 samples)
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Training on batch from sample 2000 to 3000 (Batch size: 1000 samples)
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Training on batch from sample 3000 to 4000 (Batch size: 1000 samples)
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Training on batch from sample 4000 to 5000 (Batch size: 1000 samples)
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Fitting 3 folds for each of 5 candidates, totalling 15 fits
Training on batch from sample 5000 to 6000 (Batch siz