In [1]:
import weka.core.jvm as jvm
import os
import traceback
import weka.core.jvm as jvm
from weka.core.converters import Loader
from weka.core.dataset import Instances
from weka.timeseries import TSEvaluation, TSEvalModule, WekaForecaster,Periodicity
from weka.classifiers import Classifier
from weka.core.classes import serialization_write, serialization_read
jvm.start(packages=True)

DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['C:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\Lib\\site-packages\\javabridge\\jars\\rhino-1.7R4.jar', 'C:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\Lib\\site-packages\\javabridge\\jars\\runnablequeue.jar', 'C:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\Lib\\site-packages\\javabridge\\jars\\cpython.jar', 'c:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\weka\\lib\\arpack_combined.jar', 'c:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\weka\\lib\\core.jar', 'c:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\weka\\lib\\mtj.jar', 'c:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\weka\\lib\\python-weka-wrapper.jar', 'c:\\Users\\nptu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\weka\\lib\\weka.jar']
DEBUG:weka.core.jvm:MaxH

In [2]:
# load a dataset
from weka.core.converters import Loader
loader = Loader(classname="weka.core.converters.ArffLoader")
airline_data = loader.load_file("data/airline.arff")
# airline_data.class_is_last()

In [3]:
# available evaluation modules
print("Evaluation modules")
modules = TSEvalModule.module_list()
print("Available modules")
for module in modules:
    print("-" + str(module))
print("Loading module by name")
print(TSEvalModule.module("MAE"))

Evaluation modules
Available modules
-Error
-MAE
-MSE
-RMSE
-MAPE
-DAC
-RAE
-RRSE
Loading module by name
MAE


In [5]:
# evaluate forecaster
print("Evaluate forecaster")
forecaster = WekaForecaster()
forecaster.fields_to_forecast = ["passenger_numbers"]
forecaster.base_forecaster = Classifier(classname="weka.classifiers.functions.LinearRegression")
forecaster.tslag_maker.timestamp_field = "Date"
forecaster.tslag_maker.adjust_for_variance = False
forecaster.tslag_maker.include_powers_of_time = True
forecaster.tslag_maker.include_timelag_products = True
forecaster.tslag_maker.remove_leading_instances_with_unknown_lag_values = False
forecaster.tslag_maker.add_month_of_year = True
forecaster.tslag_maker.add_quarter_of_year = True
print("algorithm name: " + str(forecaster.algorithm_name))
print("command-line: " + forecaster.to_commandline())
print("lag maker: " + forecaster.tslag_maker.to_commandline())

evaluation = TSEvaluation(airline_data, 0.0)
evaluation.evaluate_on_training_data = False
evaluation.evaluate_on_test_data = False
evaluation.prime_window_size = forecaster.tslag_maker.max_lag
evaluation.prime_for_test_data_with_test_data = True
evaluation.rebuild_model_after_each_test_forecast_step = False
evaluation.forecast_future = True
evaluation.horizon = 20
evaluation.evaluation_modules = "MAE,RMSE"
evaluation.evaluate(forecaster)
print(evaluation)
if evaluation.evaluate_on_training_data or evaluation.evaluate_on_test_data:
    print(evaluation.summary())
if evaluation.evaluate_on_training_data:
    print("Predictions (training data): " + evaluation.predictions_for_training_data(1).summary)
if evaluation.evaluate_on_test_data:
    print("Predictions (test data): " + evaluation.predictions_for_test_data(1).summary)
    preds = evaluation.predictions_for_test_data(1)
    print("Counts for targets: " + str(preds.counts_for_targets()))
    print("Errors for target 'passenger_numbers': " + str(preds.errors_for_target("passenger_numbers")))
    print("Errors for all targets: " + str(preds.predictions_for_all_targets()))
if evaluation.training_data is not None:
    print("Future forecasts (training)\n" + evaluation.print_future_forecast_on_training_data(forecaster))
if evaluation.test_data is not None:
    print("Future forecasts (test)\n" + evaluation.print_future_forecast_on_test_data(forecaster))
if evaluation.evaluate_on_training_data:
    print(evaluation.print_predictions_for_training_data("Predictions (training)", "passenger_numbers", 1))
if evaluation.evaluate_on_test_data:
    print(evaluation.print_predictions_for_test_data("Predictions (test)", "passenger_numbers", 1))

Evaluate forecaster
algorithm name: LinearRegression -S 0 -R 1.0E-8 -num-decimal-places 4
command-line: weka.classifiers.timeseries.WekaForecaster -F passenger_numbers -L 1 -M 12 -B 2 -C 2 -G Date -month -quarter -conf 0 -P 0.95 -W "weka.classifiers.functions.LinearRegression -S 0 -R 1.0E-8 -num-decimal-places 4"
lag maker: weka.filters.supervised.attribute.TSLagMaker -F passenger_numbers -L 1 -M 12 -G Date -month -quarter
=== Evaluation setup ===

Relation: airline_passengers
# Training instances: 144
Evaluate on training data: False
Evaluate on test data: False
Horizon: 20
Prime window size: 12
Prime for test data with test data: True
Rebuild model after each test forecast step: False
Forecast future: True
Evaluation modules: Error, MAE, RMSE


Future forecasts (training)
Time        passenger_numbers 
1949-01-01                112 
1949-02-01                118 
1949-03-01                132 
1949-04-01                129 
1949-05-01                121 
1949-06-01                135

In [6]:
# build forecaster
print("Build/use forecaster")
airline_train, airline_test = airline_data.train_test_split(90.0)
forecaster = WekaForecaster()
forecaster.fields_to_forecast = ["passenger_numbers"]
forecaster.base_forecaster = Classifier(classname="weka.classifiers.functions.LinearRegression")
forecaster.fields_to_forecast = "passenger_numbers"
forecaster.build_forecaster(airline_train)
num_prime_instances = 10
airline_prime = Instances.copy_instances(airline_train, airline_train.num_instances - num_prime_instances, num_prime_instances)
forecaster.prime_forecaster(airline_prime)
num_future_forecasts = airline_test.num_instances
preds = forecaster.forecast(num_future_forecasts)
print("Actual,Predicted,Error")
for i in range(num_future_forecasts):
    actual = airline_test.get_instance(i).get_value(0)
    predicted = preds[i][0].predicted
    error = actual - predicted
    print("%f,%f,%f" % (actual, predicted, error))

Build/use forecaster
Actual,Predicted,Error
362.000000,392.785104,-30.785104
405.000000,423.636320,-18.636320
417.000000,405.274725,11.725275
391.000000,403.901848,-12.901848
419.000000,432.343387,-13.343387
461.000000,436.123875,24.876125
472.000000,457.813499,14.186501
535.000000,497.410969,37.589031
622.000000,553.917650,68.082350
606.000000,555.585257,50.414743
508.000000,507.593599,0.406401
461.000000,470.893493,-9.893493
390.000000,458.595348,-68.595348
432.000000,461.913999,-29.913999


In [7]:
# serialization (if supported)
print("Serialization")
model_file = "base.model"
if forecaster.base_model_has_serializer:
    forecaster.save_base_model(model_file)
    forecaster2 = WekaForecaster()
    forecaster2.load_base_model(model_file)
    print(forecaster2.to_commandline())
else:
    print("Base model has no serializer, falling back to generic serialization")
    serialization_write(model_file, forecaster.base_forecaster)
    cls = Classifier(jobject=serialization_read(model_file))
    print(cls.to_commandline())

Serialization
Base model has no serializer, falling back to generic serialization
weka.classifiers.functions.LinearRegression -S 0 -R 1.0E-8 -num-decimal-places 4


In [8]:
# state management
print("State")
model_file = "state.ser"
if forecaster.uses_state:
    forecaster.serialize_state(model_file)
    forecaster2 = WekaForecaster()
    forecaster2.load_serialized_state(model_file)
    print(forecaster2.to_commandline())
else:
    print("Forecaster does not use state, falling back to generic serialization")
    serialization_write(model_file, forecaster)
    forecaster2 = WekaForecaster(jobject=serialization_read(model_file))
    print(forecaster2.to_commandline())

State
Forecaster does not use state, falling back to generic serialization
weka.classifiers.timeseries.WekaForecaster -F passenger_numbers -L 1 -M 12 -B 2 -C 2 -G ArtificialTimeIndex -conf 0 -P 0.95 -W "weka.classifiers.functions.LinearRegression -S 0 -R 1.0E-8 -num-decimal-places 4"


In [None]:
jvm.stop()