In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# TensorFlow libraries for LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


# ---- Load and preprocess data ----

data = pd.read_excel("data.xlsx")

# Handle missing values (adjust based on specific patterns)
if any(data.isnull().sum()):  # Check for any missing values
    data.dropna(inplace=True)  # Simple case: drop all rows with missing values

# Handle duplicates (adjust based on desired behavior)
if data.duplicated().sum():  # Check for duplicate rows
    data.drop_duplicates(inplace=True)  # Simple case: remove all duplicates

# Explore data with visualizations
plt.figure(figsize=(12, 6))

# Time series plot for PV power
plt.subplot(211)
plt.plot(data["Time"], data["PV power"])
plt.xlabel("Time")
plt.ylabel("PV power")
plt.title("PV Power over Time")

# Scatter plot of PV power vs. temperature (add more as needed)
plt.subplot(212)
plt.scatter(data["PV temperature"], data["PV power"])
plt.xlabel("PV temperature")
plt.ylabel("PV power")
plt.title("PV Power vs. Temperature")
plt.tight_layout()
plt.show()

# Feature normalization
scaler = StandardScaler()
data[["Ambient", "PV temperature", "Radiation"]] = scaler.fit_transform(data[["Ambient", "PV temperature", "Radiation"]])

# Separate features and target
X = data[["Ambient", "Radiation", "PV temperature"]]
y = data["PV power"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Reshape data for LSTM (time steps, features)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))


# ---- LSTM Model Building and Training ----

# Initial LSTM model (adjust based on evaluation and your choices)
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(1))

model.compile(loss="mse", optimizer="adam")

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2)


# ---- Model Evaluation and Refinement Options ----

# Evaluate model performance (replace with actual values)
mse = mean_squared_error(y_test, model.predict(X_test))
r2 = r2_score(y_test, model.predict(X_test))
print("MSE:", mse)
print("R-squared:", r2)

# **Potential refinements (based on your feedback):**
# - Hyperparameter tuning: adjust units, learning rate, etc.
# - Alternative models: Random Forest, Support Vector Machines
# - Regularization techniques: dropout, early stopping

# Choose and implement desired refinement options here (e.g., hyperparameter tuning with GridSearchCV)


# ---- Forecasting ----

# Prepare new input data (same format as X_test)
new_data = ...  # Replace with your specific data structure

# Make predictions and visualize (if desired)
new_predictions = model.predict(new_data.reshape((new_data.shape[0], 1, new_data.shape[1])))
plt.plot(new_predictions, label="Predicted")
plt.legend()
plt.show()

# Optionally, compare with real-world data (API implementation not provided)


# Remember to adapt and customize the code based on your specific data and desired functionalities. Don't


ModuleNotFoundError: No module named 'tensorflow'