In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle

In [2]:
def train_model():
    data = pd.read_csv('../data/spg.csv')
    
    X = data.drop('generated_power_kw', axis=1)
    y = data['generated_power_kw']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = RandomForestRegressor(
        n_estimators=100,
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        random_state=42
    )
    
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Model Performance Metrics:")
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R2 Score: {r2:.2f}")
    
    model_data = {
        'model': model,
        'scaler': scaler,
        'feature_names': list(X.columns)
    }
    
    with open('../solar_power_model.pkl', 'wb') as f:
        pickle.dump(model_data, f)
    
    print("\nModel and scaler saved to 'solar_power_model.pkl'")

In [3]:
train_model()

Model Performance Metrics:
MAE: 254.97
RMSE: 407.83
R2 Score: 0.82

Model and scaler saved to 'solar_power_model.pkl'
