In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score

# Load the datasets
consumption_battery_data = pd.read_csv('consumption_battery_data.csv')  # consumption and battery data
solar_tariff_data = pd.read_csv(r'C:\Users\DELL\Desktop\luminous\solar_tariff_generated_data.csv')  # solar generation and tariff data
predicted_data = pd.read_csv('daily_pred.csv')  # predicted data for future input

# Convert 'datetime' columns
consumption_battery_data['datetime'] = pd.to_datetime(consumption_battery_data['datetime'], errors='coerce', dayfirst=True)
solar_tariff_data['datetime'] = pd.to_datetime(solar_tariff_data['datetime'], errors='coerce', dayfirst=True)
predicted_data['datetime'] = pd.to_datetime(predicted_data['datetime'], errors='coerce', dayfirst=True)

# Feature selection
features_consumption = ['power_usage_kW']  # from consumption_battery_data
features_solar_tariff = ['electricity_tariff_INR_per_kWh', 'ac_power_output_kW']  # from solar_tariff_data
target = 'battery_level_kWh'  # from consumption_battery_data

# Combine the features for both datasets
combined_features = pd.concat([consumption_battery_data[features_consumption], solar_tariff_data[features_solar_tariff]], axis=1)

# Normalize the combined features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(combined_features)

# Prepare training data (first 1016 data points)
X_train = scaled_data[:1016]
y_train = consumption_battery_data[target][:1016]

# Train initial Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict battery remaining for the next 24 data points
index = 1016
batch_size = 24
interval = 10  # Time in seconds for retraining

while index + batch_size <= len(consumption_battery_data):
    # Select next batch of 24 data points
    X_batch = scaler.transform(pd.concat([
        consumption_battery_data[features_consumption].iloc[index:index + batch_size],
        solar_tariff_data[features_solar_tariff].iloc[index:index + batch_size]
    ], axis=1))

    # Predict battery remaining for the next 24 data points
    y_pred = model.predict(X_batch)

    # Save the predictions
    predictions_df = pd.DataFrame({
        'datetime': consumption_battery_data['datetime'].iloc[index:index + batch_size],
        'predicted_battery_remaining': y_pred
    })
    
    # Append the predictions to the predicted data file
    predicted_data['pred_batteryleft'] = y_pred
    predicted_data.to_csv('daily_pred.csv', mode='w', index=False)

    # Simulate waiting for 10 seconds
    time.sleep(interval)

    # Incremental retraining with the new 24 data points
    X_new = scaler.transform(pd.concat([
        consumption_battery_data[features_consumption].iloc[index:index + batch_size],
        solar_tariff_data[features_solar_tariff].iloc[index:index + batch_size]
    ], axis=1))
    
    y_new = consumption_battery_data[target].iloc[index:index + batch_size]

    # Combine old and new data for retraining
    X_train = np.vstack([X_train, X_new])
    y_train = np.concatenate([y_train, y_new])

    # Retrain the model
    model.fit(X_train, y_train)

    # Move to the next batch
    index += batch_size


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  predicted_data['datetime'] = pd.to_datetime(predicted_data['datetime'], errors='coerce', dayfirst=True)
