## Yield Prediction for Plant 1

In [22]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the data
df = pd.read_csv('Plant1_filtered.csv')

# Convert DATE_TIME to datetime and extract relevant features
try:
    df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'], format='%Y-%m-%d %H:%M')
except ValueError:
    df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'], format='%d-%m-%Y %H:%M')

# Extracting features from DATE_TIME
df['day'] = df['DATE_TIME'].dt.day
df['month'] = df['DATE_TIME'].dt.month
df['hour'] = df['DATE_TIME'].dt.hour

# Features and target variable
X = df[['DC_POWER', 'AC_POWER', 'day', 'month', 'hour']]
y = df['DAILY_YIELD']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)

# K-Nearest Neighbors model
knn_model = KNeighborsRegressor(n_neighbors=5)  # Default n_neighbors is 5
knn_model.fit(X_train, y_train)
knn_y_pred = knn_model.predict(X_test)

# Evaluation Metrics
print("Random Forest RMSE:", mean_squared_error(y_test, rf_y_pred, squared=False))
print("Random Forest MAE:", mean_absolute_error(y_test, rf_y_pred))
print("Random Forest R-squared:", r2_score(y_test, rf_y_pred))

print("KNN RMSE:", mean_squared_error(y_test, knn_y_pred, squared=False))
print("KNN MAE:", mean_absolute_error(y_test, knn_y_pred))
print("KNN R-squared:", r2_score(y_test, knn_y_pred))

# Predicting the next day's yield
next_day_features = [[500, 1000, 15, 5, 12]]  # Example: [DC_POWER, AC_POWER, day, month, hour]
predicted_next_day_yield_rf = rf_model.predict(next_day_features)
predicted_next_day_yield_knn = knn_model.predict(next_day_features)

print(f"Predicted daily yield for the next day (RF): {predicted_next_day_yield_rf[0]:.2f}")
print(f"Predicted daily yield for the next day (KNN): {predicted_next_day_yield_knn[0]:.2f}")


Random Forest RMSE: 232.06451634892744
Random Forest MAE: 134.62860463331143
Random Forest R-squared: 0.9924687939424953
KNN RMSE: 1393.8913794464297
KNN MAE: 1001.2060697016404
KNN R-squared: 0.7282904428982597
Predicted daily yield for the next day (RF): 2999.81
Predicted daily yield for the next day (KNN): 4934.65




## Plant 1 Weather Data

In [24]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the data
df = pd.read_csv('Plant1_Weather_filtered.csv')

# Convert DATE_TIME to datetime and extract relevant features
df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'], format='%Y-%m-%d %H:%M:%S')

# Extracting features from DATE_TIME
df['day'] = df['DATE_TIME'].dt.day
df['month'] = df['DATE_TIME'].dt.month
df['hour'] = df['DATE_TIME'].dt.hour

# Features and target variable
X = df[['AMBIENT_TEMPERATURE','MODULE_TEMPERATURE', 'day', 'month', 'hour']]
y = df['IRRADIATION']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)

# K-Nearest Neighbors model
knn_model = KNeighborsRegressor(n_neighbors=5)  # Default n_neighbors is 5
knn_model.fit(X_train, y_train)
knn_y_pred = knn_model.predict(X_test)

# Evaluation Metrics
print("Random Forest RMSE:", mean_squared_error(y_test, rf_y_pred, squared=False))
print("Random Forest MAE:", mean_absolute_error(y_test, rf_y_pred))
print("Random Forest R-squared:", r2_score(y_test, rf_y_pred))

print("KNN RMSE:", mean_squared_error(y_test, knn_y_pred, squared=False))
print("KNN MAE:", mean_absolute_error(y_test, knn_y_pred))
print("KNN R-squared:", r2_score(y_test, knn_y_pred))

# Example features for next day prediction, update them based on what you know will be the conditions
next_day_features = [[25, 35, 16, 5, 12]]  # Example: [AMBIENT_TEMPERATURE, MODULE_TEMPERATURE, day, month, hour]
predicted_next_day_yield_rf = rf_model.predict(next_day_features)
predicted_next_day_yield_knn = knn_model.predict(next_day_features)

print(f"Predicted irradiation for the next day (RF): {predicted_next_day_yield_rf[0]:.2f}")
print(f"Predicted irradiation for the next day (KNN): {predicted_next_day_yield_knn[0]:.2f}")


Random Forest RMSE: 0.0843608508731092
Random Forest MAE: 0.05877069253441458
Random Forest R-squared: 0.9161772042860348
KNN RMSE: 0.08604615542815472
KNN MAE: 0.058796208722952686
KNN R-squared: 0.9127946398360871
Predicted irradiation for the next day (RF): 0.43
Predicted irradiation for the next day (KNN): 0.49


