In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [10]:
df = pd.read_csv('solar_panel_efficiency.csv')
print(df.columns)

Index(['Temperature', 'Humidity', 'Solar_Irradiance', 'Efficiency'], dtype='object')


In [12]:
#Split data into features and target
X = df.drop('Efficiency', axis =1)
y =df['Efficiency']


In [14]:
#Split data into train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42
)

In [18]:
#Model Training
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

In [22]:
#Initialize RandomForestRegressor
rf_model = RandomForestRegressor(random_state = 42)
rf_model.fit = (X_train, y_train)

#Initialize XGBoost Model
xgb_model = XGBRegressor(random_state = 42)
xgb_model.fit = (X_train, y_train)

In [32]:
#Model Evaluation
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Ensure that rf_model and xgb_model are instantiated correctly
rf_model = RandomForestRegressor()  
xgb_model = XGBRegressor()

# Fit the RandomForestRegressor model with training data
rf_model.fit(X_train, y_train)

# Evaluate RandomForestRegressor
rf_predictions = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_predictions)
print(f"RandomForest MSE: {rf_mse:.2f}")

# Fit the XGBoostRegressor model with training data
xgb_model.fit(X_train, y_train)

# Evaluate XGBoostRegressor
xgb_predictions = xgb_model.predict(X_test)
xgb_mse = mean_squared_error(y_test, xgb_predictions)
print(f"XGBoost MSE: {xgb_mse:.2f}")



RandomForest MSE: 0.49
XGBoost MSE: 0.83


In [36]:
#Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV

#Define Parameters for random forest
param_grid = {
    'n_estimators' : [100, 200, 300],
    'max_depth' : [None, 10, 20, 30]
}

#Set up GridSearchCV
grid_search = GridSearchCV(
    estimator = RandomForestRegressor(random_state =42),
    param_grid = param_grid,
    cv = 5,
    scoring = 'neg_mean_squared_error'
)
grid_search.fit(X_train, y_train)

#Best Model from gridsearch
best_rf_model = grid_search.best_estimator_
best_rf_predictions = best_rf_model.predict(X_test)
best_rf_mse = mean_squared_error(y_test, best_rf_predictions)
print(f"Best RandomForest MSE: {best_rf_mse:.2f}")



Best RandomForest MSE: 0.49


In [42]:
#Comparison and Selection
#Output Initial XGBoost MSE
print(f"Initial XGBoost MSE: {xgb_mse:.2f}")

#Output Tuned RandomForest MSE
print(f"Tuned RandomForest MSE: {best_rf_mse:.2f}")

#Decision Logic for recommending a model
if best_rf_mse < xgb_mse:
    print('Recommendation : Use Tuned RandomForest for predicting Solar Panel Efficiency')

else:
    print('Recommendation : Use XGBoost for predicting Solar Panel Efficiency')


Initial XGBoost MSE: 0.83
Tuned RandomForest MSE: 0.49
Recommendation : Use Tuned RandomForest for predicting Solar Panel Efficiency
