In [1]:
import pandas as pd
from perlib.forecaster import *
from perlib.datasets import *
"""
You can call different datasets from datasets.
"""
dataset = load_airpassengers()
data = pd.DataFrame(dataset)
data.index = pd.date_range(start="2022-01-01",periods=len(data),freq="d")

In [2]:
data.columns = ["Values"]

In [3]:
data

Unnamed: 0,Values
2022-01-01,112.00
2022-01-02,118.00
2022-01-03,132.00
2022-01-04,129.00
2022-01-05,121.00
...,...
2022-05-20,606.00
2022-05-21,508.00
2022-05-22,461.00
2022-05-23,390.00


In [4]:
#To read your own dataset;
#dataPrepration.read_data("../datasets/winequality-white.csv",delimiter=";")

In [5]:
"""
Model training operations are performed with different parameters.
"""

forecast,evaluate =  get_result(dataFrame=data,
                                y="Values",
                                modelName="ARIMA",
                                dateColumn=False,
                                process=False,
                                forecastNumber=24,
                                metric=["mape","mae","mse"],
                                #epoch=1,
                                forecastingStartDate=False,
                                verbose=1
                                )

Parameters created
The model training process has been started.


100%|██████████| 400/400 [00:23<00:00, 17.02it/s]


Model training process completed
The model is being saved


In [6]:
forecast

Unnamed: 0,Predicts,Actual
2022-05-01,228.23,360.0
2022-05-02,228.23,342.0
2022-05-03,228.23,406.0
2022-05-04,228.23,396.0
2022-05-05,228.23,420.0
2022-05-06,228.23,472.0
2022-05-07,228.23,548.0
2022-05-08,228.23,559.0
2022-05-09,228.23,463.0
2022-05-10,228.23,407.0


In [7]:
evaluate

{'mean_absolute_percentage_error': 48.24388951405902,
 'mean_absolute_error': 224.02137889193878,
 'mean_squared_error': 55762.26570064559}

In [4]:
#The Time Series module helps to create many basic models without using much code and helps to understand which models work better without any parameter adjustments.
from perlib.piplines.dpipline import Timeseries
pipline = Timeseries(dataFrame=data,
                       y="Values",
                       dateColumn=False,
                       process=False,
                       epoch=2,
                       forecastNumber= 7,
                       models="all")
predictions = pipline.fit()

  0%|          | 0/8 [00:00<?, ?it/s]

2023-01-08 10:11:54.678872: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-01-08 10:11:54.679035: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1 Pro
Parameters created
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 24, 1)]      0           []                               
                                                                                                  
 reshape (Reshape)              (None, 24, 1, 1)     0           ['input_1[0][0]']                
                                                                                                  
 conv2d (Conv2D)                (None, 19, 1, 100)   700         ['reshape[0][0]']                
                                                                                                  
 dropout (Dropout)              (None, 19, 1, 100)   0           ['conv2d[0][0]']                 
                                         

2023-01-08 10:11:56.436914: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-01-08 10:11:56.439458: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.

KeyboardInterrupt



In [8]:
predictions

Unnamed: 0,mean_absolute_percentage_error,mean_absolute_error,mean_squared_error
LSTNET,14.05,67.7,5990.35
LSTM,7.03,38.28,2250.69
BILSTM,13.21,68.22,6661.6
CONVLSTM,9.62,48.06,2773.69
TCN,12.03,65.44,6423.1
RNN,11.53,59.33,4793.62
ARIMA,50.18,261.14,74654.48
SARIMA,10.48,51.25,3238.2


In [4]:
"""
It allows you to get summary information about the data.
"""
summarize(dataFrame=data)

Skipped Bivariate Analysis: There are less than 2 numeric variables.


		Overview
Name: Values
Type: numeric
Unique Values: 118 -> [104.0, 112.0, 114.0, 115.0, 118.0, 119.0, [...]
Missing Values: None

	  Summary Statistics
	  ------------------
                             
Number of observations 144.00
Average                280.30
Standard Deviation     119.97
Minimum                104.00
Lower Quartile         180.00
Median                 265.50
Upper Quartile         360.50
Maximum                622.00
Skewness                 0.58
Kurtosis                -0.36

	  Tests for Normality
	  -------------------
                               p-value Conclusion at α = 0.05
D'Agostino's K-squared test  0.0131981  Unlikely to be normal
Kolmogorov-Smirnov test      0.0000000  Unlikely to be normal
Shapiro-Wilk test            0.0000683  Unlikely to be normal

In [None]:
"""
To manually create data preparation and preprocessing processes;
Each function has a "mode" parameter. It returns "auto" by default.
Otherwise, it will be enough to give the parameter you want.
"""

#It automatically performs the necessary preprocessing in the data.
data = preprocess.auto(dataFrame=data)

#Analyzes and organizes Missing values.
data = preprocess.missing_num(dataFrame=data,mode="auto")

#Finds outliers in data.
data = preprocess.find_outliers(dataFrame=data,mode="auto")

#Performs encoding operations
data = preprocess.encode_cat(dataFrame=data,mode="auto")

#
data = preprocess.dublicates(dataFrame=data,mode="auto")

#Allows you to select the start and end range for the training process
data = dataPrepration.trainingFordate_range(dataFrame=data,dt1="2013-01-01",dt2="2022-01-01")

Deep Learning create architect

In [5]:
"""
To build a model manually;
"""

#The layer you want to create must contain 'unit', 'activation', 'dropout'.
dict_ = {"Layer": {"unit":[150,100]
                  ,"activation":["tanh","tanh"],
                    "dropout"  :[0.2,0.2]
                  }}

req_info.layers = dict_
req_info.modelname = "lstm"
req_info.epoch  =  2
req_info.targetCol = "Values"
req_info.forecastingStartDate = False
req_info.period = "daily"
req_info.forecastNumber = 24
req_info.scaler = "standard"

In [6]:
from perlib.core.models.dmodels import models
s = models(req_info)

Metal device set to: Apple M1 Pro


2023-01-08 13:57:05.542531: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-01-08 13:57:05.542813: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [7]:
from perlib.core.train import dTrain
train = dTrain(dataFrame=data,object=s)
train.fit()

Epoch 1/2


2023-01-08 13:57:09.334660: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-01-08 13:57:09.340723: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-01-08 13:57:09.503066: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-01-08 13:57:09.621625: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-01-08 13:57:09.754091: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-01-08 13:57:09.929373: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.

KeyboardInterrupt



In [10]:
# models are saved in the ./models folder. Do not forget to give the correct model name you have trained.
from perlib.core.tester import dTester
t = dTester(dataFrame=data,object=s,path="Data-lstm-2023-01-06-09-22-27.h5",metric=["mape","mae"])
t.forecast()



2023-01-06 09:23:56.143692: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-01-06 09:23:56.191055: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-01-06 09:23:56.236973: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




Unnamed: 0,Values,Predicts
2022-05-01,360.0,410.8
2022-05-02,342.0,412.11
2022-05-03,406.0,435.25
2022-05-04,396.0,429.61
2022-05-05,420.0,453.42
2022-05-06,472.0,499.69
2022-05-07,548.0,530.49
2022-05-08,559.0,499.85
2022-05-09,463.0,473.45
2022-05-10,407.0,442.65


In [11]:
t.evaluate()

{'mean_absolute_percentage_error': 9.069431906163734,
 'mean_absolute_error': 38.74221547444662}