In [1]:
#Import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Function to generate random datetime within a given range
def random_datetime(start_date, end_date):
    delta = end_date - start_date
    random_delta = np.random.randint(delta.days)
    random_time = start_date + timedelta(days=random_delta)
    return random_time

In [3]:
# Generate random dates within a range
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 12, 31)
dates = [random_datetime(start_date, end_date) for _ in range(1000)]

In [4]:
# Generate random times
times = [datetime.strftime(datetime.strptime("08:00:00", "%H:%M:%S") + timedelta(minutes=np.random.randint(720)), "%H:%M:%S") for _ in range(1000)]

In [5]:
# Generate random elevation, temperature, humidity, solar irradiance,
# cloud cover, wind speed, and solar production
elevations = np.random.randint(0, 10, size=1000)
temperatures = np.random.randint(10, 40, size=1000)
humidity = np.random.randint(30, 90, size=1000)
solar_irradiance = np.random.randint(100, 800, size=1000)
cloud_cover = np.random.uniform(0, 0.5, size=1000)
wind_speed = np.random.uniform(1, 10, size=1000)
solar_production = np.random.randint(100, 1000, size=1000)

In [6]:
# Create DataFrame
data = {
    'Date': dates,
    'Time': times,
    'Elevation': elevations,
    'Temperature': temperatures,
    'Humidity': humidity,
    'Solar_Irradiance': solar_irradiance,
    'Cloud_Cover': cloud_cover,
    'Wind_Speed': wind_speed,
    'Solar_Production': solar_production
}

df = pd.DataFrame(data)

In [7]:
# Save DataFrame to CSV
df.to_csv('solar_energy_prediction_dataset.csv', index=False)

In [None]:
# Install scikit-learn if not already installed
!pip install scikit-learn

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [None]:
# Load the dataset
df = pd.read_csv('solar_energy_prediction_dataset.csv')

In [None]:
# Split data into features and target variable
X = df[['Elevation', 'Temperature', 'Humidity', 'Solar_Irradiance', 'Cloud_Cover', 'Wind_Speed']]
y = df['Solar_Production']

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize and train the Random Forest Regression model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [None]:
# Predict on the test set
y_pred = rf_model.predict(X_test)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)