In [43]:
import numpy as np
import pandas as pd
import matplotlib as mp
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [44]:
data = pd.read_csv('https://raw.githubusercontent.com/qmdismnp/Schulich_DS_MBAN/refs/heads/main/dataset.csv?')

In [45]:
data

Unnamed: 0,order_date,requested_delivery_date,Customer Country Code,Product Code,Description,order_type,Customer Order Code,value,Curr,items,Route
0,13.07.2009,28.01.2010,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435553,2337.00,RUB,6,RU0001
1,15.07.2009,24.03.2010,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435694,10160.25,RUB,23,RU0001
2,16.07.2009,04.02.2010,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435741,2992.50,RUB,7,RU0001
3,17.07.2009,04.02.2010,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435907,4061.25,RUB,9,RU0001
4,21.07.2009,01.02.2010,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435963,2208.75,RUB,5,RU0001
...,...,...,...,...,...,...,...,...,...,...,...
2415,13.07.2011,15.02.2012,HR,L12919200,Parka Outdoor Lifestyle STD,VO,3200819196,128.52,EUR,12,FI0003
2416,13.07.2011,15.02.2012,HR,L12919200,Parka Outdoor Lifestyle STD,VO,3200819201,128.52,EUR,12,FI0003
2417,13.07.2011,15.02.2012,HR,L12919200,Parka Outdoor Lifestyle STD,VO,3200819206,128.52,EUR,12,FI0003
2418,13.07.2011,15.02.2012,HR,L12919200,Parka Outdoor Lifestyle STD,VO,3200819210,107.10,EUR,10,FI0003


Data cleaning

In [46]:
# Handle missing or invalid values
data.replace(r'\\N', np.nan, regex=True, inplace=True)  # Replace invalid strings
data.fillna(0, inplace=True)  # Replace NaN values with 0 (or use appropriate imputation)

Q1

In [47]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt

def calculate_monthly_orders_with_sarima(data):
    """
    Groups transactional data by month to calculate the number of unique orders.
    Applies SARIMA model for forecasting and evaluates its performance.
    Forecasts for the next five months and the next two months.
    """
    # Step 1: Preprocessing
    data['order_date'] = pd.to_datetime(data['order_date'], format='%d.%m.%Y')
    data['year_month'] = data['order_date'].dt.to_period('M')
    monthly_orders = (
        data.groupby('year_month')['Customer Order Code']
        .nunique()
        .reset_index(name='distinct_orders')
    )

    # Prepare data for SARIMA
    monthly_orders['year_month'] = pd.to_datetime(monthly_orders['year_month'].astype(str))
    monthly_orders.set_index('year_month', inplace=True)

    # Split data into training and testing sets
    train_size = int(len(monthly_orders)) - 5
    train_data = monthly_orders.iloc[:train_size]
    test_data = monthly_orders.iloc[train_size:]

    # Step 2: Fit SARIMA model
    sarima_model = SARIMAX(train_data['distinct_orders'],
                           order=(1, 0, 2),
                           seasonal_order=(1, 1, 1, 12))
    sarima_result = sarima_model.fit(disp=False)

    # Step 3: Forecast for the test set (existing data)
    forecast_test = sarima_result.forecast(steps=len(test_data))
    
    # Step 4: Forecast for future months
    future_forecast_5_months = sarima_result.forecast(steps=5)


    return future_forecast_5_months

Q2

In [48]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

def classify_and_evaluate_product_demand(data):
    """
    Prepares data for a classification model by encoding features, trains a logistic regression model,
    and evaluates it. Forecasts demand for the next specified number of months.
    """
    # Add seasonality based on the order date
    def get_season(month):
        if month in [12, 1, 2]:
            return 'Winter'
        elif month in [3, 4, 5]:
            return 'Spring'
        elif month in [6, 7, 8]:
            return 'Summer'
        else:
            return 'Fall'

    data['order_date'] = pd.to_datetime(data['order_date'], format='%d.%m.%Y')
    data['Season'] = data['order_date'].dt.month.apply(get_season)

    # Encode categorical variables
    encoded_data = pd.get_dummies(data, columns=['Season','year_month', 'Customer Country Code','Curr', 'Route', 'order_type'], drop_first=True)

    # Define features and target variable
    X = encoded_data.drop(columns=['Product Code', 'order_date', 'requested_delivery_date', 'Customer Order Code', 'Description'])
    y = encoded_data['Product Code']

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a logistic regression model
    logistic_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, random_state=42)
    logistic_model.fit(X_train, y_train)

    # Make predictions
    y_pred_logistic = logistic_model.predict(X_test)

    # Forecast demand for the next `months`
    future_forecast_demand = np.unique(y_pred_logistic)

    return future_forecast_demand

Q3

In [49]:
import pandas as pd
import numpy as np

def simulate_quantity_demand(data, n_months=5):
    """
    Recalculates 25th and 75th percentiles for each product and simulates demand for the next n months.
    :param data: Pandas DataFrame with columns 'Product Code' and 'items'.
    :param n_months: Number of months to simulate demand for.
    :return: DataFrame with simulated demand for each product.
    """
    # Recalculate percentiles for each product
    # Ensure the 'items' column is numeric
    data['items'] = pd.to_numeric(data['items'], errors='coerce')
    # Drop rows with NaN in the 'items' column after conversion
    data = data.dropna(subset=['items'])
    # Group by 'Product Code' and calculate the quantiles
    quantity_bounds = data.groupby('Product Code')['items'].quantile([0.25, 0.5, 0.75]).unstack().reset_index()
    # Rename the columns for clarity
    quantity_bounds.columns = ['Product Code', '25th Percentile','50th Percentile', '75th Percentile']


    # Simulate demand for the next n_months
    simulated_demand = []

    for _, row in quantity_bounds.iterrows():
        mean_quantity = row['50th Percentile']
        std_dev_quantity = (row['75th Percentile'] - row['25th Percentile']) / 6  # Assuming normal distribution
        product_demand = np.random.normal(mean_quantity, std_dev_quantity, n_months).clip(0)  # Ensure no negative values
        simulated_demand.append(product_demand)

    # Create a DataFrame for the simulated demand
    simulated_demand_data = pd.DataFrame(
        simulated_demand,
        columns=[f"Month {i+1}" for i in range(n_months)],
        index=quantity_bounds['Product Code']
    ).reset_index()

    # Rename columns for clarity
    simulated_demand_data.rename(columns={'index': 'Product Code'}, inplace=True)

    return simulated_demand_data

In [64]:
simulate_quantity_demand(data)

Unnamed: 0,Product Code,Month 1,Month 2,Month 3,Month 4,Month 5
0,L10705000,5.53073,5.873876,6.496701,6.310992,6.023981
1,L10705100,9.793377,9.707179,9.425231,10.66452,8.356164
2,L10705200,5.723702,6.725485,6.508793,5.330738,5.761408
3,L10705300,7.289474,5.654259,7.953344,7.612941,6.289844
4,L10705400,7.733946,5.772218,9.072885,5.622738,3.7413
5,L10705500,8.455173,7.122715,7.044557,7.833685,11.080199
6,L10705600,7.956314,7.035775,7.965798,7.531081,7.637003
7,L10705700,7.35527,6.78118,7.104237,7.34975,6.595239
8,L10850600,5.420301,6.525951,5.189146,5.028922,5.607341
9,L10850700,1.820771,1.787061,1.985569,2.271493,1.138555


Q4

In [50]:
import pandas as pd
import numpy as np

def calculate_and_simulate_lead_time(data, n_samples=1, n_simulations = 100):
    """
    Calculate lead time bounds and simulate lead time using normal distribution.

    Parameters:
        data (pd.DataFrame): Input data containing 'order_date' and 'requested_delivery_date'.
        months (int): Number of months to filter data for lead time calculation.
        n_samples (int): Number of lead time samples to generate per month.

    Returns:
        pd.DataFrame: Simulated lead times for the filtered months.
    """

    # Step 2: Convert date columns and calculate lead time in months
    data['order_date'] = pd.to_datetime(data['order_date'], format='%d.%m.%Y')
    data['requested_delivery_date'] = pd.to_datetime(data['requested_delivery_date'], format='%d.%m.%Y')
    data['lead_time'] = (data['requested_delivery_date'] - data['order_date']).dt.days / 30

    # Step 3: Drop rows with NaN in 'lead_time'
    data = data.dropna(subset=['lead_time'])

    # Step 4: Group by 'year_month' and calculate lead time quantiles
    data['year_month'] = data['order_date'].dt.to_period('M')
    lead_time_bounds = data.groupby('year_month')['lead_time'].quantile([0.05, 0.5, 0.95]).unstack().reset_index()
    lead_time_bounds.columns = ['year_month', '5th Percentile', '50th Percentile', '95th Percentile']

    # Step 5: Simulate lead time using normal distribution
    simulated_leadtime = {}
    for _, row in lead_time_bounds.iterrows():
        q05 = row['5th Percentile']
        q95 = row['95th Percentile']

        # Generate samples
        samples = np.random.uniform(q05, q95, n_samples).clip(min=0)

        # Store in dictionary
        simulated_leadtime[row['year_month']] = samples

    # Step 6: Convert simulated leadtime dictionary to DataFrame
# Convert the simulated_leadtime dictionary to a DataFrame
    simulated_leadtime_df = pd.DataFrame.from_dict(
        simulated_leadtime, orient='index'
    ).reset_index()

    # Rename columns for clarity
    simulated_leadtime_df.columns = ['year_month'] + [f"Lead Time" ]

    return simulated_leadtime_df

In [65]:
calculate_and_simulate_lead_time(data)

Unnamed: 0,year_month,Lead Time
0,2009-07,7.344784
1,2009-08,6.031121
2,2009-09,6.313382
3,2009-10,5.940302
4,2009-11,7.7
5,2009-12,10.062725
6,2010-01,6.447622
7,2010-02,8.258751
8,2010-03,7.670742
9,2010-04,5.137234


consolidate function

In [51]:
def monte_carlo_simulation(data, n_months=5, n_simulations=100):
    """
    Consolidated Monte Carlo simulation that integrates:
    1. Monthly order forecasting using SARIMA.
    2. Product demand classification.
    3. Quantity demand simulation.
    4. Lead time simulation.

    Parameters:
    - data: Original DataFrame containing order and demand data.
    - n_months: Number of months for forecasting and simulations.
    - n_simulations: Number of lead-time samples per month for simulation.

    Returns:
    - Consolidated simulation results from all steps.
    """
    # Step 1: Forecast monthly orders with SARIMA
    future_forecast_orders = calculate_monthly_orders_with_sarima(data)

    # Step 2: Classify product demand
    future_demand_forecast = classify_and_evaluate_product_demand(data)

    # Step 3: Simulate quantity demand
    simulated_quantity = simulate_quantity_demand(data, n_months=n_months)

    # Step 4: Calculate and simulate lead time
    simulated_lead_times = calculate_and_simulate_lead_time(data, n_simulations=n_simulations)

    # Consolidate results into a dictionary
    results = {
        "Future Orders Forecast (SARIMA)": future_forecast_orders,
        "Future Product Demand Forecast (Logistic Regression)": future_demand_forecast,
        "Simulated Quantities": simulated_quantity,
        "Simulated Lead Times": simulated_lead_times.iloc[-5:],
    }
    
    return results


In [52]:
monte_carlo_simulation(data)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Too few observations to estimate starting parameters%s.'
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


{'Future Orders Forecast (SARIMA)': 22      3.225343
 23     25.940710
 24      9.143209
 25    132.401489
 26     22.757174
 Name: predicted_mean, dtype: float64,
 'Future Product Demand Forecast (Logistic Regression)': array(['L12134400', 'L12134600', 'L12135800', 'L12136100', 'L12916900',
        'L12917900', 'L12918000', 'L12918400', 'L12918500', 'L12918600',
        'L12918700', 'L12919200'], dtype=object),
 'Simulated Quantities':    Product Code    Month 1   Month 2   Month 3    Month 4   Month 5
 0     L10705000   6.781563  6.872075  5.587354   6.393195  6.066589
 1     L10705100   9.217628  7.781024  8.094152   7.809484  9.342804
 2     L10705200   5.178954  6.224780  6.582716   6.980723  4.881036
 3     L10705300   7.419653  6.601381  6.645952   6.391951  5.626886
 4     L10705400   5.909441  5.460214  6.187378  11.502595  6.641329
 5     L10705500  10.205760  8.907718  8.051111   4.783934  8.043596
 6     L10705600   8.248006  6.939359  8.059471   7.244979  6.953478
 7     L

In [53]:
import pandas as pd

# Provided simulation results
sarima_forecast = [3.190273, 25.982044, 9.073522, 132.028416, 22.489284]
product_demand_forecast = ['L12134400', 'L12916900', 'L12918400', 'L12918700', 'L12916900']
simulated_quantities = {
    'Product Code': ['L12134400', 'L12916900', 'L12918400', 'L12918700', 'L12916900'],
    'Month 1': [6.785766, 4.245232, 5.478310, 5.163947, 4.245232],
    'Month 2': [7.973964, 3.735805, 5.856477, 5.409120, 3.735805],
    'Month 3': [4.725396, 4.198413, 3.508490, 5.809602, 4.198413],
    'Month 4': [6.974093, 4.062948, 5.272238, 4.940882, 4.062948],
    'Month 5': [6.188111, 3.396047, 6.657568, 6.241263, 3.396047]
}
lead_times = {
    'year_month': ['2011-10', '2011-11', '2012-01', '2012-04', '2012-11'],
    'Lead Time': [5.70, 4.62, 4.45, 6.00, 0.00]
}

# Create dataframes for each component
sarima_df = pd.DataFrame({
    'Month': ['2024-08', '2024-09-01', '2024-10-01', '2024-11-01', '2024-12-01'],
    'SARIMA Forecast': sarima_forecast
})
product_demand_df = pd.DataFrame({
    'Month': ['2024-08-01', '2024-09-01', '2024-10-01', '2024-11-01', '2024-12-01'],
    'Predicted Product Code': product_demand_forecast
})
simulated_quantities_df = pd.DataFrame(simulated_quantities)
lead_times_df = pd.DataFrame(lead_times)

# Merge into a single DataFrame
final_output_df = sarima_df.copy()
final_output_df['Predicted Product Code'] = product_demand_df['Predicted Product Code']
final_output_df['Simulated Quantity Demand'] = simulated_quantities_df['Month 1']  # Taking Month 1 as an example
final_output_df['Lead Time'] = lead_times_df['Lead Time']

final_output_df

Unnamed: 0,Month,SARIMA Forecast,Predicted Product Code,Simulated Quantity Demand,Lead Time
0,2024-08,3.190273,L12134400,6.785766,5.7
1,2024-09-01,25.982044,L12916900,4.245232,4.62
2,2024-10-01,9.073522,L12918400,5.47831,4.45
3,2024-11-01,132.028416,L12918700,5.163947,6.0
4,2024-12-01,22.489284,L12916900,4.245232,0.0


In [54]:
data.head()

Unnamed: 0,order_date,requested_delivery_date,Customer Country Code,Product Code,Description,order_type,Customer Order Code,value,Curr,items,Route,year_month,Season,lead_time
0,2009-07-13,2010-01-28,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435553,2337.0,RUB,6,RU0001,2009-07,Summer,6.633333
1,2009-07-15,2010-03-24,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435694,10160.25,RUB,23,RU0001,2009-07,Summer,8.4
2,2009-07-16,2010-02-04,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435741,2992.5,RUB,7,RU0001,2009-07,Summer,6.766667
3,2009-07-17,2010-02-04,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435907,4061.25,RUB,9,RU0001,2009-07,Summer,6.733333
4,2009-07-21,2010-02-01,RU,L10705000,Parka Outdoor Lifestyle STD,VO,3200435963,2208.75,RUB,5,RU0001,2009-07,Summer,6.5


In [55]:
filtered_year_months = ['2011-10', '2011-11', '2012-01', '2012-04', '2012-11']
# Create an empty DataFrame to store the filtered results
filtered_data = pd.DataFrame()

# Loop through each value in filtered_year_months and filter rows
for year_month in filtered_year_months:
    filtered_rows = data[data['year_month'] == year_month]
    filtered_data = pd.concat([filtered_data, filtered_rows], ignore_index=True)

# Get distinct product codes and sum their associated quantities ('items') for the specified 'year_month'
distinct_product_quantities = filtered_data.groupby(['year_month', 'Product Code'], as_index=False)['items'].sum()

In [56]:
distinct_product_quantities

Unnamed: 0,year_month,Product Code,items
0,2011-10,L12916800,10
1,2011-10,L12917000,4
2,2011-10,L12917100,8
3,2011-10,L12917700,1
4,2011-10,L12917800,8
5,2011-10,L12917900,9
6,2011-10,L12918000,1
7,2011-10,L12918400,4
8,2011-10,L12918500,0
9,2011-10,L12918600,5


In [57]:
import pandas as pd
import numpy as np

# Simulated results
sarima_forecast = pd.Series([3.190273, 25.982044, 9.073522, 132.028416, 22.489284])
simulated_quantities = pd.DataFrame({
    'Product Code': ['L10705400', 'L12134400', 'L12135800', 'L12916900', 'L12917900',
        'L12918000', 'L12918400', 'L12918700', 'L12919200'],
    'Month 1': [7.809696, 5.655631, 6.980979, 4.574621, 6.057039,3.956021,6.211332, 5.367128,5.570693]
})


# Actual data (example)
actual_orders = pd.Series([89, 13, 3, 1, 1])  # Example actual values for SARIMA forecast
actual_quantities = pd.DataFrame({
    'Product Code': ['L12917000', 'L12917100', 'L12917100', 'L12917700', 'L12917900',
                    'L12918000',  'L12918400','L12918500', 'L12918600' ],
    'Actual Quantity': [10,4,8,1,8,9,1,4,0]
})


# 1. Calculate MAPE for SARIMA forecast
sarima_ape = np.abs((actual_orders - sarima_forecast) / actual_orders)
sarima_mape = sarima_ape.mean() * 100
print(f"MAPE for SARIMA Forecast: {sarima_mape:.2f}%")

# 2. Merge simulated quantities and actual quantities to calculate MAPE
quantities_merged = pd.merge(simulated_quantities, actual_quantities, on='Product Code')
quantities_merged['APE'] = np.abs((quantities_merged['Actual Quantity'] - quantities_merged['Month 1']) / quantities_merged['Actual Quantity'])
quantities_mape = quantities_merged['APE'].mean() * 100
print(f"MAPE for Simulated Product and Quantities: {quantities_mape:.2f}%")


MAPE for SARIMA Forecast: 3130.10%
MAPE for Simulated Product and Quantities: 200.49%


In [62]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, classification_report

def monte_carlo_simulation(data, n_months=5, n_simulations=100):
    """
    Consolidated Monte Carlo simulation that integrates demand forecasting,
    product classification, quantity simulation, and lead-time analysis.
    
    :param data: Original DataFrame containing order and demand data.
    :param n_months: Number of months for future forecasting and simulations.
    :param n_simulations: Number of samples for lead time simulation per month.
    :return: Consolidated results for all simulations.
    """
    # Step 1: Forecast monthly orders with SARIMA
    data['order_date'] = pd.to_datetime(data['order_date'], format='%d.%m.%Y')
    data['year_month'] = data['order_date'].dt.to_period('M')
    monthly_orders = (
        data.groupby('year_month')['Customer Order Code']
        .nunique()
        .reset_index(name='distinct_orders')
    )
    
    train_size = len(monthly_orders) - n_months
    train_data = monthly_orders.iloc[:train_size]
    test_data = monthly_orders.iloc[train_size:]

    sarima_model = SARIMAX(train_data['distinct_orders'], order=(1, 0, 2), seasonal_order=(1, 1, 1, 12))
    sarima_result = sarima_model.fit(disp=False)
    future_forecast_orders = sarima_result.forecast(steps=n_months)

    # Step 2: Classify product demand
    def get_season(month):
        if month in [12, 1, 2]: return 'Winter'
        elif month in [3, 4, 5]: return 'Spring'
        elif month in [6, 7, 8]: return 'Summer'
        else: return 'Fall'
    
    data['Season'] = data['order_date'].dt.month.apply(get_season)
    encoded_data = pd.get_dummies(data, columns=['Season', 'Customer Country Code','Curr', 'Route', 'order_type'], drop_first=True)
    X = encoded_data.drop(columns=['Product Code', 'year_month', 'order_date', 'requested_delivery_date', 'Customer Order Code', 'Description'])
    y = encoded_data['Product Code']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    logistic_model = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
    logistic_model.fit(X_train, y_train)
    future_forecast_demand = logistic_model.predict(X_test.sample(n=n_months, random_state=42)).unique()

    # Step 3: Simulate quantity demand
    data['items'] = pd.to_numeric(data['items'], errors='coerce')
    data = data.dropna(subset=['items'])
    quantity_bounds = data.groupby('Product Code')['items'].quantile([0.25, 0.5, 0.75]).unstack()
    quantity_bounds.columns = ['25th Percentile', '50th Percentile', '75th Percentile']

    simulated_demand = []
    for _, row in quantity_bounds.iterrows():
        mean_quantity = row['50th Percentile']
        std_dev_quantity = (row['75th Percentile'] - row['25th Percentile']) / 6
        product_demand = np.random.normal(mean_quantity, std_dev_quantity, n_months).clip(0)
        simulated_demand.append(product_demand)
    
    simulated_demand_data = pd.DataFrame(simulated_demand, index=quantity_bounds.index).reset_index()
    simulated_demand_data.columns = ['Product Code'] + [f'Month {i+1}' for i in range(n_months)]

    # Step 4: Calculate and simulate lead time
    data['lead_time'] = (data['requested_delivery_date'] - data['order_date']).dt.days / 30
    lead_time_bounds = data.groupby('year_month')['lead_time'].quantile([0.05, 0.5, 0.95]).unstack()
    lead_time_bounds.columns = ['5th Percentile', '50th Percentile', '95th Percentile']

    simulated_leadtime = {}
    for _, row in lead_time_bounds.iterrows():
        q05, q50, q95 = row['5th Percentile'], row['50th Percentile'], row['95th Percentile']
        samples = np.random.uniform(q05, q95, n_simulations).clip(0)
        simulated_leadtime[row.name] = samples

    simulated_leadtime_df = pd.DataFrame.from_dict(simulated_leadtime, orient='index')
    simulated_leadtime_df.columns = [f'Simulation {i+1}' for i in range(n_simulations)]

    # Consolidate results
    results = {
        'Future Orders Forecast': future_forecast_orders,
        'Future Demand Forecast': future_forecast_demand,
        'Simulated Quantities': simulated_demand_data,
        'Simulated Lead Times': simulated_leadtime_df.iloc[-5:]
    }
    return results


In [63]:
monte_carlo_simulation(data)

  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Too few observations to estimate starting parameters%s.'
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


AttributeError: 'numpy.ndarray' object has no attribute 'unique'