In [7]:
import pandas as pd
import time
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from datetime import datetime

# Function to parse multiple date formats
def parse_date(date_str):
    for fmt in ('%d-%m-%Y %H:%M', '%d/%m/%Y %H:%M'):
        try:
            return datetime.strptime(date_str, fmt)
        except ValueError:
            continue
    raise ValueError(f"Date format not recognized: {date_str}")

# Load data from file
data = pd.read_csv('solar_weather_data.csv')
# Convert 'datetime' column to datetime type with multiple formats handling
data['datetime'] = data['datetime'].apply(parse_date)

# Convert 'datetime' to numerical features (day, month, hour, minute)
data['day'] = data['datetime'].dt.day
data['month'] = data['datetime'].dt.month
data['hour'] = data['datetime'].dt.hour
data['minute'] = data['datetime'].dt.minute

# Drop original datetime column as it's not in a numerical format for the model
data = data.drop(columns=['datetime'])

# Handle categorical columns with OneHotEncoding
categorical_columns = ['conditions']  # Add any other string-based columns here
encoder = OneHotEncoder(sparse_output=False)  # Use sparse_output instead of sparse
encoded_categories = encoder.fit_transform(data[categorical_columns])

# Convert encoded categories to DataFrame and concatenate with original data
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))
data = pd.concat([data, encoded_df], axis=1)

# Drop the original categorical columns after encoding
data = data.drop(columns=categorical_columns)

# Separate features and target (using all features for prediction)
X = data.drop(columns=['temp', 'humidity', 'windspeed', 'cloudcover', 'solar_radiation', 'solar_energy', 'uv_index'])
y = data[['temp', 'humidity', 'windspeed', 'cloudcover', 'solar_radiation', 'solar_energy', 'uv_index']]

# Function to train model
def train_model(X_train, y_train):
    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    return model

# Initial training with first 1000 points
X_train = X.iloc[:1000]
y_train = y.iloc[:1000]
model = train_model(X_train, y_train)

# Start the loop for predictions and incremental training
index = 1000
while index + 24 <= len(X):
    # Predict next 24 data points
    X_next = X.iloc[index:index+24]
    y_pred = model.predict(X_next)
    
    # (Optional) Save predictions
    print("Predictions for next 24 points:", y_pred)
    
    # Simulate waiting for 10 seconds
    time.sleep(1)
    
    # Add the real 24 points from the data to the training set
    X_new = X.iloc[index:index+24]
    y_new = y.iloc[index:index+24]
    X_train = pd.concat([X_train, X_new])
    y_train = pd.concat([y_train, y_new])
    
    # Retrain the model
    model = train_model(X_train, y_train)
    
    # Move to the next chunk
    index += 24


Predictions for next 24 points: [[2.85300e+01 9.27652e+01 4.82100e+00 9.30310e+01 7.06300e+01 2.53000e-01
  8.50000e-01]
 [2.73680e+01 9.51532e+01 1.18730e+01 9.48170e+01 1.68650e+02 6.15000e-01
  1.82000e+00]
 [3.06100e+01 8.55068e+01 1.11820e+01 8.71150e+01 5.24230e+02 1.87900e+00
  5.05000e+00]
 [3.29370e+01 7.22601e+01 7.34900e+00 6.66670e+01 6.96530e+02 2.50900e+00
  7.01000e+00]
 [3.32880e+01 7.06008e+01 9.04900e+00 6.31650e+01 7.97840e+02 2.86100e+00
  7.92000e+00]
 [3.36020e+01 6.86338e+01 8.74400e+00 6.16340e+01 8.00830e+02 2.86400e+00
  7.92000e+00]
 [3.22960e+01 7.59234e+01 1.07040e+01 7.97480e+01 6.43710e+02 2.29500e+00
  6.68000e+00]
 [3.28920e+01 7.21253e+01 1.35250e+01 8.81090e+01 4.81940e+02 1.73700e+00
  4.95000e+00]
 [3.27830e+01 7.00009e+01 1.00840e+01 6.58520e+01 5.17620e+02 1.87400e+00
  5.23000e+00]
 [3.18800e+01 7.73063e+01 4.54700e+00 6.88390e+01 3.83860e+02 1.38400e+00
  3.57000e+00]
 [3.17200e+01 7.53418e+01 7.29200e+00 6.46570e+01 2.73820e+02 1.01800e+00
  2.