In [2]:
import numpy as np
from datetime import timedelta, datetime
import random
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
# Function to generate a random date within a range
def random_date(start, end):
    return start + timedelta(
        days=random.randint(0, int((end - start).days)))

# Define the start and end dates for the dataset period
start_date = datetime(2009, 1, 1)
end_date = datetime(2011, 12, 31)

In [4]:
# Sample values for categorical variables
customer_country_codes = ['USA', 'CAN', 'GBR', 'AUS', 'DEU']
product_codes = ['P001', 'P002', 'P003', 'P004', 'P005']
order_types = ['VO']
routes = ['Air', 'Sea', 'Road', 'Rail']
currencies = ['USD', 'CAD', 'GBP', 'AUD', 'EUR']

In [5]:
# Number of rows in the dataset
num_rows = 2421

# Generating the dataset
data = []
for _ in range(num_rows):
    order_date = random_date(start_date, end_date)
    delivery_date = order_date + timedelta(days=random.randint(15,180))
    country_code = random.choice(customer_country_codes)
    product_code = random.choice(product_codes)
    order_type = random.choice(order_types)
    route = random.choice(routes)
    currency = random.choice(currencies)
    value = round(random.uniform(100, 1000), 2)
    items = random.uniform(100, 1000)
    description = f"Parka - {product_code}"

    data.append([order_date, delivery_date, country_code, product_code, description,
                 order_type, f'CO-{random.randint(1000, 9999)}', value, currency, items, route])

# Creating DataFrame
df = pd.DataFrame(data, columns=['Order Date', 'Requested Delivery Date', 'Customer Country Code',
                                 'Product Code', 'Description', 'Order Type', 'Customer Order Code',
                                 'Value', 'Currency', 'Items', 'Route'])

print(df.head())  # Display the first few rows of the dataset

  Order Date Requested Delivery Date Customer Country Code Product Code  \
0 2011-10-11              2012-03-30                   CAN         P002   
1 2011-01-02              2011-03-02                   USA         P001   
2 2010-08-05              2010-12-31                   GBR         P005   
3 2011-10-21              2011-11-07                   DEU         P003   
4 2010-02-06              2010-08-04                   CAN         P003   

    Description Order Type Customer Order Code   Value Currency       Items  \
0  Parka - P002         VO             CO-7927  544.31      GBP  520.569006   
1  Parka - P001         VO             CO-2908  608.37      CAD  654.119064   
2  Parka - P005         VO             CO-2082  878.90      CAD  675.214340   
3  Parka - P003         VO             CO-9935  267.12      CAD  303.455423   
4  Parka - P003         VO             CO-6757  571.42      GBP  992.360187   

  Route  
0  Rail  
1   Sea  
2  Rail  
3   Sea  
4  Rail  


In [6]:
# Convert 'Requested Delivery Date' and 'Order Date' to datetime objects
df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d.%m.%Y')
df['Requested Delivery Date'] = pd.to_datetime(df['Requested Delivery Date'], format='%d.%m.%Y')

# Calculate the time difference in days
df['Time Difference'] = (df['Requested Delivery Date'] - df['Order Date']).dt.days

# Add 'Fast Demand' column
df['Fast Demand'] = np.where(df['Time Difference'] > 30, 1, 0)

# Drop the temporary 'Time Difference' column if not needed
df = df.drop('Time Difference', axis=1)


In [7]:
print(df.head())

  Order Date Requested Delivery Date Customer Country Code Product Code  \
0 2011-10-11              2012-03-30                   CAN         P002   
1 2011-01-02              2011-03-02                   USA         P001   
2 2010-08-05              2010-12-31                   GBR         P005   
3 2011-10-21              2011-11-07                   DEU         P003   
4 2010-02-06              2010-08-04                   CAN         P003   

    Description Order Type Customer Order Code   Value Currency       Items  \
0  Parka - P002         VO             CO-7927  544.31      GBP  520.569006   
1  Parka - P001         VO             CO-2908  608.37      CAD  654.119064   
2  Parka - P005         VO             CO-2082  878.90      CAD  675.214340   
3  Parka - P003         VO             CO-9935  267.12      CAD  303.455423   
4  Parka - P003         VO             CO-6757  571.42      GBP  992.360187   

  Route  Fast Demand  
0  Rail            1  
1   Sea            1  
2  Ra

In [8]:
# Convert date columns to datetime format
df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d.%m.%Y')
df['Requested Delivery Date'] = pd.to_datetime(df['Requested Delivery Date'],    format='%d.%m.%Y')
# Extract month and create a 'Season' column
df['DELIVERY MONTH'] = df['Requested Delivery Date'].dt.month
df['SEASON'] = df['Requested Delivery Date'].dt.month.map({1: 'Winter', 2:   'Winter', 3: 'Spring', 4: 'Spring', 5: 'Spring', 6: 'Summer', 7: 'Summer', 8: 'Summer', 9: 'Fall', 10: 'Fall', 11: 'Fall', 12: 'Winter'})


In [9]:
print(df.head())

  Order Date Requested Delivery Date Customer Country Code Product Code  \
0 2011-10-11              2012-03-30                   CAN         P002   
1 2011-01-02              2011-03-02                   USA         P001   
2 2010-08-05              2010-12-31                   GBR         P005   
3 2011-10-21              2011-11-07                   DEU         P003   
4 2010-02-06              2010-08-04                   CAN         P003   

    Description Order Type Customer Order Code   Value Currency       Items  \
0  Parka - P002         VO             CO-7927  544.31      GBP  520.569006   
1  Parka - P001         VO             CO-2908  608.37      CAD  654.119064   
2  Parka - P005         VO             CO-2082  878.90      CAD  675.214340   
3  Parka - P003         VO             CO-9935  267.12      CAD  303.455423   
4  Parka - P003         VO             CO-6757  571.42      GBP  992.360187   

  Route  Fast Demand  DELIVERY MONTH  SEASON  
0  Rail            1       

In [10]:
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [11]:
pip install pmdarima

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp38-cp38-macosx_10_9_x86_64.whl (653 kB)
[K     |████████████████████████████████| 653 kB 5.2 MB/s eta 0:00:01
Installing collected packages: pmdarima
Successfully installed pmdarima-2.0.4
Note: you may need to restart the kernel to use updated packages.


In [12]:
from pmdarima import auto_arima
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [13]:
arima_model = auto_arima( df['Fast Demand'], start_p=1, start_q=1, max_p=5
                         , max_q=5
                         , d=None
                         ,trace=True
                         , test='adf'
                         , error_action='ignore'
                         , suppress_warnings=True
                         , stepwise=True)


Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=993.851, Time=1.88 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=989.851, Time=0.28 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=991.851, Time=0.30 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=991.851, Time=0.33 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=6624.191, Time=0.13 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 2.987 seconds


In [14]:
print(arima_model.summary())

                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 2421
Model:                        SARIMAX   Log Likelihood                -492.925
Date:                Sun, 10 Dec 2023   AIC                            989.851
Time:                        23:11:20   BIC                           1001.435
Sample:                             0   HQIC                           994.063
                               - 2421                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      0.9025      0.000   3283.676      0.000       0.902       0.903
sigma2         0.0880      0.001     69.962      0.000       0.086       0.090
Ljung-Box (L1) (Q):                   0.00   Jarque-