# ðŸŒž Solar Power Output Prediction Using Machine Learning
### Predicting Solar Energy Generation from Plant_1_Generation_Data.csv

In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Step 2: Load Dataset
from google.colab import files
uploaded = files.upload()
df = pd.read_csv('Plant_1_Generation_Data.csv')
df.head()

In [None]:
# Step 3: Explore Dataset
df.info()
print('
Missing Values:', df.isnull().sum().sum())
df.describe()

In [None]:
# Step 4: Data Preprocessing
df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'], format='%Y-%m-%d %H:%M:%S')
df['Hour'] = df['DATE_TIME'].dt.hour
df['Day'] = df['DATE_TIME'].dt.day
df['Month'] = df['DATE_TIME'].dt.month
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
print('âœ… Data preprocessing completed!')

In [None]:
# Step 5: Exploratory Data Analysis
plt.figure(figsize=(10,5))
sns.lineplot(x='Hour', y='AC_POWER', data=df, estimator='mean')
plt.title('Average AC Power by Hour')
plt.show()
plt.figure(figsize=(8,5))
sns.scatterplot(x='DC_POWER', y='AC_POWER', data=df, alpha=0.5)
plt.title('AC vs DC Power Relationship')
plt.show()

In [None]:
# Step 6: Feature Selection
features = ['DC_POWER', 'Hour', 'Day', 'Month']
target = 'AC_POWER'
X = df[features]
y = df[target]

In [None]:
# Step 7: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print('Training Samples:', X_train.shape)
print('Testing Samples:', X_test.shape)

In [None]:
# Step 8: Model Training
lr = LinearRegression().fit(X_train, y_train)
rf = RandomForestRegressor(random_state=42).fit(X_train, y_train)
xgb = XGBRegressor(random_state=42).fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
y_pred_rf = rf.predict(X_test)
y_pred_xgb = xgb.predict(X_test)

In [None]:
# Step 9: Evaluation
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    print(f'
ðŸ“Š {model_name} Performance:')
    print(f'MAE : {mae:.4f}')
    print(f'RMSE: {rmse:.4f}')
    print(f'RÂ²  : {r2:.4f}')

evaluate_model(y_test, y_pred_lr, 'Linear Regression')
evaluate_model(y_test, y_pred_rf, 'Random Forest')
evaluate_model(y_test, y_pred_xgb, 'XGBoost')

In [None]:
# Step 10: Visualization
plt.figure(figsize=(7,6))
plt.scatter(y_test, y_pred_xgb, alpha=0.5, color='orange')
plt.xlabel('Actual AC Power')
plt.ylabel('Predicted AC Power')
plt.title('Actual vs Predicted Power (XGBoost)')
plt.show()
sns.barplot(x=rf.feature_importances_, y=features)
plt.title('Feature Importance - Random Forest')
plt.show()

In [None]:
# Step 11: Conclusion
print('âœ… Solar Power Output Prediction Completed Successfully!')