In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# Load your dataset
data_2019_to_2023 = pd.read_csv('grid_stability_data_2019-2023.csv')
data_2024 = pd.read_csv('stability-2024.csv')

data_2019_to_2023 = data_2019_to_2023.set_index('DateTime')
data_2019_to_2023.index = pd.to_datetime(data_2019_to_2023.index, format='%d-%m-%Y %H:%M')

# Encode categorical target variable
label_encoder = LabelEncoder()
data_2019_to_2023['stability'] = label_encoder.fit_transform(data_2019_to_2023['stability'])

# Assuming 'stability' is the column you want to predict
X = data_2019_to_2023.drop('stability', axis=1)  # Features
y = data_2019_to_2023['stability']  # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost classifier
xgboost_model = xgb.XGBClassifier(random_state=42)

# Train the model
xgboost_model.fit(X_train, y_train)

# Ensure columns match between 2024 dataset and trained model
data_2024_features = data_2024.drop('DateTime', axis=1)
data_2024_features = data_2024_features[X.columns]

# Predict stability for 2024 dataset
predictions_2024 = xgboost_model.predict(data_2024_features)

# Assuming you have a column 'id' to identify each row in the 2024 dataset
data_2024['stability'] = label_encoder.inverse_transform(predictions_2024)  # Inverse transform to get original labels

# Save or use the predictions as needed
data_2024.to_csv('predicted_stability_2024_xgboost.csv', index=False)


In [4]:
# Predict stability for the test set
predictions_test = xgboost_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions_test)
print("Accuracy of the XGBoost model:", accuracy)


Accuracy of the XGBoost model: 0.7043924700513405
