In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense


In [2]:
# Load datasets
finance_df = pd.read_csv('./Dataset/Monthly.csv')
energy_df = pd.read_csv('./Dataset/Hourly.csv')
environment_df = pd.read_csv('./Dataset/Daily.csv')

# Data cleaning
def clean_data(df):
    df.dropna(inplace=True)

clean_data(finance_df)
clean_data(energy_df)
clean_data(environment_df)

In [3]:
# Normalization/Standardization
scaler = MinMaxScaler()
finance_df[['open', 'high', 'low', 'close']] = scaler.fit_transform(finance_df[['open', 'high', 'low', 'close']])
energy_df['AEP_MW'] = scaler.fit_transform(energy_df[['AEP_MW']])
environment_df['value'] = scaler.fit_transform(environment_df[['value']])

In [4]:
# Stationarization
def stationarize_data(data):
    return data.diff().dropna()

finance_df['close_diff'] = stationarize_data(finance_df['close'])
energy_df['AEP_MW_diff'] = stationarize_data(energy_df['AEP_MW'])
environment_df['value_diff'] = stationarize_data(environment_df['value'])

In [6]:

# ARIMA model
def fit_arima(data):
    model = ARIMA(data.values, order=(5,1,0))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=5)[0]  # Forecast next 5 values
    return forecast

finance_forecast = fit_arima(finance_df['close'])
energy_forecast = fit_arima(energy_df['AEP_MW'])
environment_forecast = fit_arima(environment_df['value'])


# ANN model
def build_ann(X_train, y_train):
    model = Sequential()
    model.add(Dense(8, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train, y_train, epochs=100, batch_size=8, verbose=0)
    return model

def evaluate_ann(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse

# Hybrid model integration
def integrate_models(arima_forecast, ann_model):
    arima_forecast = arima_forecast.reshape(-1, 1)
    ann_residuals = ann_model.predict(arima_forecast)
    integrated_predictions = arima_forecast + ann_residuals
    return integrated_predictions



In [24]:
# Repeat the forecast value to match the length of finance_df['close']
# finance_forecast = np.repeat(finance_forecast, len(finance_df['close']))
finance_forecast_extended = np.repeat(finance_forecast[-1], len(finance_df['close']))
# Reshape finance_forecast to match the shape of y
finance_forecast = finance_forecast.reshape(-1, 1)

# Split data into features (X) and target variable (y)
X_train, X_test, y_train, y_test = train_test_split(finance_forecast, y, test_size=0.2, shuffle=False)

# Build ANN model
ann_model = build_ann(X_train, y_train)

# Evaluate ANN model
ann_evaluation = evaluate_ann(ann_model, X_test, y_test)

# Integrate models
integrated_predictions = integrate_models(finance_forecast, ann_model)

# Evaluate overall forecast accuracy
print("Mean Squared Error of ARIMA model:", mean_squared_error(finance_df['close'], finance_forecast))
print("Mean Squared Error of Integrated Model:", mean_squared_error(finance_df['close'], integrated_predictions.flatten()))


ValueError: Found input variables with inconsistent numbers of samples: [33884041, 5821]

In [18]:
print(finance_df)
print("Forcast: ", finance_forecast)
print(finance_df['close'])

            Date      open      high       low     close  close_diff
0       1/3/2000  0.191497  0.189829  0.187582  0.189001         NaN
1       1/4/2000  0.188096  0.184304  0.177631  0.175457   -0.013544
2       1/5/2000  0.174570  0.174130  0.172829  0.176110    0.000653
3       1/6/2000  0.175222  0.173798  0.176335  0.176436    0.000325
4       1/7/2000  0.175547  0.180969  0.178433  0.185664    0.009228
...          ...       ...       ...       ...       ...         ...
5816  02/14/2023  0.835692  0.840215  0.833458  0.839703   -0.000282
5817  02/15/2023  0.833946  0.837387  0.835638  0.842487    0.002784
5818  02/16/2023  0.832795  0.834581  0.832116  0.828606   -0.013881
5819  02/17/2023  0.823821  0.821235  0.822017  0.825858   -0.002748
5820  02/21/2023  0.817668  0.814163  0.809190  0.806016   -0.019842

[5821 rows x 6 columns]
Forcast:  [[0.80791096]]
0       0.189001
1       0.175457
2       0.176110
3       0.176436
4       0.185664
          ...   
5816    0.839703
581

In [5]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_absolute_error

# Load finance data
finance_df = pd.read_csv('./Dataset/Monthly.csv')
finance_df['Date'] = pd.to_datetime(finance_df['Date'])
finance_df.set_index('Date', inplace=True)

# Load energy data
energy_df = pd.read_csv('./Dataset/Hourly.csv')
energy_df['Datetime'] = pd.to_datetime(energy_df['Datetime'])
energy_df.set_index('Datetime', inplace=True)

# Load environment data
environment_df = pd.read_csv('./Dataset/Daily.csv')
environment_df['date'] = pd.to_datetime(environment_df['date'])
environment_df.set_index('date', inplace=True)

# Train ARIMA model
arima_model = ARIMA(finance_df['close'], order=(2, 1, 2))
arima_result = arima_model.fit()
arima_forecast = arima_result.forecast(steps=10)

# Calculate residuals
residuals = finance_df['close'] - arima_result.fittedvalues
energy_df.reset_index(inplace=True)
environment_df.reset_index(inplace=True)
# Combine input features
input_features = pd.concat([residuals, energy_df['AEP_MW'], environment_df['value']], axis=1)

# Scale input features
scaler = MinMaxScaler()
scaled_input_features = scaler.fit_transform(input_features)

# Define ANN model
ann_model = Sequential([
    Dense(64, activation='relu', input_shape=(scaled_input_features.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

# Compile ANN model
ann_model.compile(optimizer='adam', loss='mse')

# Train ANN model
ann_model.fit(scaled_input_features, finance_df['close'], epochs=100, batch_size=32)

# Generate forecasts using ANN model
ann_forecast = ann_model.predict(scaled_input_features)[-10:]

# Combine forecasts
final_forecast = arima_forecast + ann_forecast

# Evaluate performance
actual_values = finance_df['close'][-10:]
mae = mean_absolute_error(actual_values, final_forecast)
print("Mean Absolute Error (MAE):", mae)

print("Final Forecast:")
print(final_forecast)



  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(


TypeError: Feature names are only supported if all input features have string names, but your input has ['int', 'str'] as feature name / column name types. If you want feature names to be stored and validated, you must convert them all to strings, by using X.columns = X.columns.astype(str) for example. Otherwise you can remove feature / column names from your input data, or convert them all to a non-string data type.

In [19]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.optimizers import Adam
import numpy as np

# Load datasets
finance_df = pd.read_csv("./Dataset/Monthly.csv")
energy_df = pd.read_csv("./Dataset/Hourly.csv")
environment_df = pd.read_csv("./Dataset/Daily.csv")

# Preprocessing
def preprocess_finance_data(data):
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)
    data.fillna(method='ffill', inplace=True)
    z_scores = (data - data.mean()) / data.std()
    data = data[(z_scores < 3).all(axis=1)]
    return data

def preprocess_energy_data(data):
    data['Datetime'] = pd.to_datetime(data['Datetime'])
    data.set_index('Datetime', inplace=True)
    return data

def preprocess_environment_data(data):
    data['date'] = pd.to_datetime(data['date'])
    data.set_index('date', inplace=True)
    return data

finance_df = preprocess_finance_data(finance_df)
energy_df = preprocess_energy_data(energy_df)
environment_df = preprocess_environment_data(environment_df)

# ARIMA Model
def fit_arima(data):
    model = ARIMA(data, order=(2,2,1))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=12)  # Example forecast for next 12 steps
    return forecast

# ANN Model
def prepare_data_for_ann(data):
    forecast = fit_arima(data)
    X = np.array(forecast).reshape(-1, 1)
    y = np.random.rand(len(X))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test

# Build ANN Model
def build_ann_model(input_shape):
    model = Sequential([
        Dense(64, activation='relu', input_shape=input_shape),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Train ANN Model
def train_ann_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))
    return model

# Hybrid Model Integration
def integrate_hybrid_model(ann_model, data):
    # Fit ARIMA model and get forecast
    forecast = fit_arima(data)
    
    # Scale forecast using MinMaxScaler
    scaler = MinMaxScaler()
    forecast_scaled = scaler.fit_transform(np.array(forecast).reshape(-1, 1))
    
    # Predict using ANN model
    hybrid_prediction = ann_model.predict(forecast_scaled)
    
    return hybrid_prediction

# User input for choosing dataset
dataset_choice = input("Choose dataset for forecasting (finance, energy, environment): ")

# Perform forecasting based on user's choice
if dataset_choice == "finance":
    X_train, X_test, y_train, y_test = prepare_data_for_ann(finance_df['close'])
    ann_model = build_ann_model(input_shape=(X_train.shape[1],))
    trained_ann_model = train_ann_model(ann_model, X_train, y_train, X_test, y_test)
    hybrid_prediction = integrate_hybrid_model(trained_ann_model, finance_df['close'])
    actual_values = finance_df['close']
    print("Forecasting predictions for finance dataset:", hybrid_prediction)
elif dataset_choice == "energy":
    X_train, X_test, y_train, y_test = prepare_data_for_ann(energy_df['AEP_MW'])
    ann_model = build_ann_model(input_shape=(X_train.shape[1],))
    trained_ann_model = train_ann_model(ann_model, X_train, y_train, X_test, y_test)
    hybrid_prediction = integrate_hybrid_model(trained_ann_model, energy_df['AEP_MW'])
    actual_values = energy_df['AEP_MW']
    print("Forecasting predictions for energy dataset:", hybrid_prediction)
elif dataset_choice == "environment":
    X_train, X_test, y_train, y_test = prepare_data_for_ann(environment_df['value'])
    ann_model = build_ann_model(input_shape=(X_train.shape[1],))
    trained_ann_model = train_ann_model(ann_model, X_train, y_train, X_test, y_test)
    hybrid_prediction = integrate_hybrid_model(trained_ann_model, environment_df['value'])
    actual_values = environment_df['value']
    print("Forecasting predictions for environment dataset:", hybrid_prediction)
else:
    print("Invalid dataset choice. Please choose from finance, energy, or environment.")

# Evaluation
mae = mean_absolute_error(actual_values[-len(hybrid_prediction):], hybrid_prediction)
print("Mean Absolute Error (MAE):", mae)



  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Epoch 1/100


  return get_prediction_index(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Forecasting predictions for finance dataset: [[0.3533595 ]
 [0.29544684]
 [0.33944103]
 [0.34756675]
 [0.35627934]
 [0.36460847]
 [0.37302467]
 [0.38142213]
 [0.3898237 ]
 [0.39822444]
 [0.40662527]
 [0.4150261 ]]
Mean Absolute Error (MAE): 4107.0682626458


  return get_prediction_index(


In [17]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error

# Load and preprocess the data
# (Assuming the dataset is already preprocessed as per previous examples)

# Train ARIMA model
def fit_arima(data):
    model = ARIMA(data, order=(5,1,0))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=12)  # Example forecast for next 12 steps
    return forecast

# Train ANN model
def train_ann_model(X_train, y_train):
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    model.fit(X_train, y_train, epochs=100, batch_size=32)
    return model

# Generate predictions from both models
def generate_predictions_arima(data):
    forecast = fit_arima(data)
    return forecast

def generate_predictions_ann(X_train, y_train, X_test):
    model = train_ann_model(X_train, y_train)
    predictions = model.predict(X_test)
    return predictions

# Combine predictions using weighted average
def ensemble_predictions(predictions_arima, predictions_ann, weights=None):
    if weights is None:
        weights = [0.5, 0.5]  # Equal weighting for simplicity
    ensemble_prediction = (weights[0] * predictions_arima) + (weights[1] * predictions_ann)
    return ensemble_prediction

# User input for choosing dataset
dataset_choice = input("Choose dataset (finance, energy, environment): ")

# Load and preprocess the chosen dataset
if dataset_choice == "finance":
    # Generate predictions from ARIMA model
    predictions_arima = generate_predictions_arima(finance_df['close'])
    # Generate predictions from ANN model
    predictions_ann = generate_predictions_ann(X_train, y_train, X_test)
    # Combine predictions using ensemble method
    ensemble_prediction = ensemble_predictions(predictions_arima, predictions_ann)
elif dataset_choice == "energy":
    # Generate predictions from ARIMA model
    predictions_arima = generate_predictions_arima(energy_df['AEP_MW'])
    # Generate predictions from ANN model
    predictions_ann = generate_predictions_ann(X_train, y_train, X_test)
    # Combine predictions using ensemble method
    ensemble_prediction = ensemble_predictions(predictions_arima, predictions_ann)
elif dataset_choice == "environment":
    # Generate predictions from ARIMA model
    predictions_arima = generate_predictions_arima(environment_df['value'])
    # Generate predictions from ANN model
    predictions_ann = generate_predictions_ann(X_train, y_train, X_test)
    # Combine predictions using ensemble method
    ensemble_prediction = ensemble_predictions(predictions_arima, predictions_ann)
else:
    print("Invalid dataset choice.")

# Evaluate ensemble prediction
mae_ensemble = mean_absolute_error(actual_values[-len(ensemble_prediction):], ensemble_prediction)
print("Mean Absolute Error (MAE) for ensemble model:", mae_ensemble)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Epoch 1/100


  return get_prediction_index(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

ValueError: Length of values (3) does not match length of index (12)