In [None]:
!pip install ucimlrepo pandas numpy scikit-learn

In [None]:
# ------- Import Dataset -------------------

from ucimlrepo import fetch_ucirepo

# fetch dataset
air_quality = fetch_ucirepo(id=360)

# data (as pandas dataframes)
X = air_quality.data.features
y = air_quality.data.targets # unused?

# Combine Date and Time into a single DateTime column -> only 14 features
import pandas as pd
import numpy as np

X['DateTime'] = pd.to_datetime(X['Date'] + ' ' + X['Time']) # Format (JJJJ-MM-DD HH:MM:SS, e.g.: 2004-03-10 18:00:00)
X = X.set_index('DateTime')
X.drop(['Date', 'Time'], axis=1, inplace=True)  # Remove the original Date and Time columns
X.replace(-200, np.nan, inplace=True)
X.replace(-200.0, np.nan, inplace=True)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np

# Example DataFrame 'df' with a time series column 'value'

# Create lagged features
df['lag_1'] = X['CO(GT)'].shift(1)
df['lag_2'] = X['CO(GT)'].shift(2)
# ... more lags as needed

# Create a binary target variable: 1 if value increased, 0 if decreased
df['target'] = np.where(df['CO(GT)'] > df['CO(GT)'].shift(1), 1, 0)

# Drop missing values created by lagging
df.dropna(inplace=True)

# Features and target
X2 = df[['lag_1', 'lag_2']]  # and any additional lag features
y = df['target']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.2, shuffle=False)

# Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

In [None]:
import matplotlib.pyplot as plt

predictions = []

# Walk-forward validation
for i in range(len(X_test)):
    current_prediction = model.predict(X_test.iloc[i].to_numpy().reshape(1, -1))
    predictions.append(current_prediction[0])

# Create a time index for the predictions
prediction_dates = df.index[-len(predictions):]

plt.figure(figsize=(15, 4))
plt.plot(prediction_dates, predictions, label='Predictions')
plt.title('Binary Predictions for Future Values')
plt.xlabel('Date')
plt.ylabel('Predicted Class')
plt.legend()
plt.show()