# Week-2: Solar Power Prediction (ML)
Predicting PowerOutput using Linear Regression and Random Forest

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

df = pd.read_csv('clean_solar_dataset.csv')
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df = df.dropna(subset=['Date'])
df['Hour'] = df['Date'].dt.hour
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month

features = ['SolarIrradiance', 'Temperature', 'Hour', 'Day', 'Month']
X = df[features]
y = df['PowerOutput']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print('Linear Regression MAE:', mean_absolute_error(y_test, y_pred_lr))
print('Linear Regression RMSE:', mean_squared_error(y_test, y_pred_lr, squared=False))
print('Linear Regression R2:', r2_score(y_test, y_pred_lr))

print('\nRandom Forest MAE:', mean_absolute_error(y_test, y_pred_rf))
print('Random Forest RMSE:', mean_squared_error(y_test, y_pred_rf, squared=False))
print('Random Forest R2:', r2_score(y_test, y_pred_rf))

plt.figure(figsize=(10,4))
plt.plot(y_test.values[:200], label='Actual')
plt.plot(y_pred_lr[:200], label='Predicted LR')
plt.plot(y_pred_rf[:200], label='Predicted RF')
plt.legend()
plt.show()
