In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.metrics import mean_squared_error, r2_score

# Load the validation dataset
data = pd.read_csv('validation_data.csv')  # Replace 'validation_data.csv' with your dataset filename

# Separate features (X) and target (y)
X = data[['Temperature (°C)', 'Relative Humidity (%)', 'Ambient Pressure (kPa)']]
y = data['Wind Speed (m/s)']

# Initialize a Linear Regression model
model = LinearRegression()

# Perform k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)  # You can adjust the number of folds
y_pred_cv = cross_val_predict(model, X, y, cv=kf)

# Calculate regression metrics for cross-validation
mse_cv = mean_squared_error(y, y_pred_cv)
rmse_cv = np.sqrt(mse_cv)
r2_cv = r2_score(y, y_pred_cv)

# Print cross-validation results
print("Cross-Validation Results:")
print(f"Mean Squared Error (MSE): {mse_cv:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_cv:.2f}")
print(f"R-squared (R2): {r2_cv:.2f}")

# Perform sensitivity analysis for each feature
for feature in X.columns:
    # Remove the feature temporarily
    X_temp = X.drop(columns=[feature])

    # Fit the model without the feature
    model.fit(X_temp, y)

    # Make predictions without the feature
    y_pred_sensitivity = model.predict(X_temp)

    # Calculate regression metrics without the feature
    mse_sensitivity = mean_squared_error(y, y_pred_sensitivity)
    rmse_sensitivity = np.sqrt(mse_sensitivity)
    r2_sensitivity = r2_score(y, y_pred_sensitivity)

    # Print sensitivity analysis results
    print(f"\nSensitivity Analysis for {feature}:")
    print(f"Mean Squared Error (MSE): {mse_sensitivity:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse_sensitivity:.2f}")
    print(f"R-squared (R2): {r2_sensitivity:.2f}")
