In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Load your dataset
data_2019_to_2023 = pd.read_csv('grid_stability_data_2019-2023.csv')
data_2024 = pd.read_csv('stability-2024.csv')

data_2019_to_2023 = data_2019_to_2023.set_index('DateTime')
data_2019_to_2023.index = pd.to_datetime(data_2019_to_2023.index, format='%d-%m-%Y %H:%M')

# Assuming 'stability' is the column you want to predict
X = data_2019_to_2023.drop('stability', axis=1)  # Features
y = data_2019_to_2023['stability']  # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Impute missing values
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Initialize the Logistic Regression classifier
logistic_regression_model = LogisticRegression(random_state=42)

# Train the model
logistic_regression_model.fit(X_train_imputed, y_train)

# Ensure columns match between 2024 dataset and trained model
data_2024_features = data_2024.drop('DateTime', axis=1)
data_2024_features = data_2024_features[X.columns]

# Impute missing values for 2024 dataset
data_2024_features_imputed = imputer.transform(data_2024_features)

# Predict stability for 2024 dataset
predictions_2024 = logistic_regression_model.predict(data_2024_features_imputed)

# Assuming you have a column 'id' to identify each row in the 2024 dataset
data_2024['stability'] = predictions_2024

# Save or use the predictions as needed
data_2024.to_csv('predicted_stability_2024_logistic_regression.csv', index=False)


In [3]:
# Predict stability for the test set
predictions_test = logistic_regression_model.predict(X_test_imputed)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions_test)
print("Accuracy of the Logistic Regression model:", accuracy)


Accuracy of the Logistic Regression model: 0.7171705647461495
