In [4]:
# import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import LabelEncoder

# Load dataset (Replace 'your_dataset.csv' with actual file path)
df = pd.read_csv( "solar_wind_energy_weather_2021_2025.csv")

# Drop the 'Date' column since it's not needed for prediction
df = df.drop(columns=['Date'])

# Encode the 'State' column as it's categorical
le = LabelEncoder()
df['State'] = le.fit_transform(df['State'])

# Define features (X) and targets (y)
features = ['State', 'Temperature_C', 'Humidity_%', 'Wind_Speed_mps', 'Cloud_Cover_%']
target_solar = 'Solar_Energy_MWh'
target_wind = 'Wind_Energy_MWh'

X = df[features]
y_solar = df[target_solar]
y_wind = df[target_wind]

# Split the dataset into training and testing sets
X_train_solar, X_test_solar, y_train_solar, y_test_solar = train_test_split(X, y_solar, test_size=0.2, random_state=42)
X_train_wind, X_test_wind, y_train_wind, y_test_wind = train_test_split(X, y_wind, test_size=0.2, random_state=42)

# Train models
solar_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
solar_model.fit(X_train_solar, y_train_solar)

wind_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
wind_model.fit(X_train_wind, y_train_wind)

# Evaluate models
solar_predictions = solar_model.predict(X_test_solar)
wind_predictions = wind_model.predict(X_test_wind)

solar_accuracy = r2_score(y_test_solar, solar_predictions) * 100  # Convert to percentage
wind_accuracy = r2_score(y_test_wind, wind_predictions) * 100

print(f"Solar Model Accuracy: {solar_accuracy:.2f}%")
print(f"Wind Model Accuracy: {wind_accuracy:.2f}%")

# Function for user input prediction
def predict_energy():
    state_name = input("Enter State (Maharashtra, Rajasthan, Tamil Nadu, Karnataka, Gujarat): ")
    
    # Encode user-input state
    if state_name not in le.classes_:
        print("Error: Invalid state name!")
        return
    
    state_encoded = le.transform([state_name])[0]

    temp = float(input("Enter Temperature (°C): "))
    humidity = float(input("Enter Humidity (%): "))
    wind_speed = float(input("Enter Wind Speed (mps): "))
    cloud_cover = float(input("Enter Cloud Cover (%): "))

    input_data = pd.DataFrame([[state_encoded, temp, humidity, wind_speed, cloud_cover]], columns=features)

    solar_prediction = solar_model.predict(input_data)[0]
    wind_prediction = wind_model.predict(input_data)[0]

    print(f"Predicted Solar Energy Generation: {solar_prediction:.2f} MWh")
    print(f"Predicted Wind Energy Generation: {wind_prediction:.2f} MWh")

# Call the function to get user predictions
predict_energy()


Solar Model Accuracy: -0.12%
Wind Model Accuracy: 16.64%


Enter State (Maharashtra, Rajasthan, Tamil Nadu, Karnataka, Gujarat):  Gujarat
Enter Temperature (°C):  56
Enter Humidity (%):  58
Enter Wind Speed (mps):  1.37
Enter Cloud Cover (%):  12.77


Predicted Solar Energy Generation: 264.75 MWh
Predicted Wind Energy Generation: 201.19 MWh


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("solar_wind_energy_weather_2021_2025.csv")

# Drop the 'Date' column since it's not needed for prediction
df = df.drop(columns=['Date'])

# Encode the 'State' column as it's categorical
le = LabelEncoder()
df['State'] = le.fit_transform(df['State'])

# Define features and targets
features = ['State', 'Temperature_C', 'Humidity_%', 'Wind_Speed_mps', 'Cloud_Cover_%']
target_solar = 'Solar_Energy_MWh'
target_wind = 'Wind_Energy_MWh'

X = df[features]
y_solar = df[target_solar]
y_wind = df[target_wind]

# Split dataset
X_train_solar, X_test_solar, y_train_solar, y_test_solar = train_test_split(X, y_solar, test_size=0.2, random_state=42)
X_train_wind, X_test_wind, y_train_wind, y_test_wind = train_test_split(X, y_wind, test_size=0.2, random_state=42)

# Define models
models = {
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Linear Regression": LinearRegression()
}

# Train and evaluate models
best_solar_model, best_wind_model = None, None
best_solar_acc, best_wind_acc = float('-inf'), float('-inf')

for name, model in models.items():
    # Train solar model
    model.fit(X_train_solar, y_train_solar)
    solar_pred = model.predict(X_test_solar)
    solar_acc = r2_score(y_test_solar, solar_pred) * 100

    if solar_acc > best_solar_acc:
        best_solar_acc = solar_acc
        best_solar_model = model

    # Train wind model
    model.fit(X_train_wind, y_train_wind)
    wind_pred = model.predict(X_test_wind)
    wind_acc = r2_score(y_test_wind, wind_pred) * 100

    if wind_acc > best_wind_acc:
        best_wind_acc = wind_acc
        best_wind_model = model

print(f"Best Solar Model: {best_solar_model.__class__.__name__} with Accuracy: {best_solar_acc:.2f}%")
print(f"Best Wind Model: {best_wind_model.__class__.__name__} with Accuracy: {best_wind_acc:.2f}%")

# Function for user input prediction
def predict_energy():
    state_name = input("Enter State (Maharashtra, Rajasthan, Tamil Nadu, Karnataka, Gujarat): ")

    if state_name not in le.classes_:
        print("Error: Invalid state name!")
        return
    
    state_encoded = le.transform([state_name])[0]

    temp = float(input("Enter Temperature (°C): "))
    humidity = float(input("Enter Humidity (%): "))
    wind_speed = float(input("Enter Wind Speed (mps): "))
    cloud_cover = float(input("Enter Cloud Cover (%): "))

    input_data = pd.DataFrame([[state_encoded, temp, humidity, wind_speed, cloud_cover]], columns=features)

    solar_prediction = best_solar_model.predict(input_data)[0]
    wind_prediction = best_wind_model.predict(input_data)[0]

    print(f"Predicted Solar Energy Generation: {solar_prediction:.2f} MWh")
    print(f"Predicted Wind Energy Generation: {wind_prediction:.2f} MWh")

# Call the function to get user predictions
predict_energy()


Best Solar Model: LinearRegression with Accuracy: 0.02%
Best Wind Model: GradientBoostingRegressor with Accuracy: 16.64%


In [None]:
# Save the best models
with open("best_solar_model.pkl", "wb") as f:
    pickle.dump(best_solar_model, f)

with open("best_wind_model.pkl", "wb") as f:
    pickle.dump(best_wind_model, f)

# Load saved models
with open("best_solar_model.pkl", "rb") as f:
    best_solar_model = pickle.load(f)

with open("best_wind_model.pkl", "rb") as f:
    best_wind_model = pickle.load(f)
