In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Task 6

In [2]:
# read modified_data/
df_demand_log = pd.read_csv('modified_data/df_demand_log_t5.csv')
df_demand_log.rename(columns={'Jar Size  (vol. ounces)': 'jarSize'}, inplace=True)
df_demand_log.head()

Unnamed: 0,DemandID,Demand Request Date,Mix,jarSize,State,Quantity,Earliest Delivery Date,Preferred Delivery Date,Latest Delivery Date,Earliest Acceptable Shipping Date,Preferred Shipping Date,Latest Acceptable Shipping Date,MinOrderToShip,PrefOrderToShip,MaxOrderToShip,Smoothed Daily Demand
0,1,2021-01-01,Apple-Mango,8,Maryland,1,2021-01-01,2021-01-04,2021-01-07,2020-12-27,2020-12-30,2021-01-02,-5,-2,1,0.142857
1,39,2021-01-01,Orange-Passionfruit,32,Maryland,1,2021-01-01,2021-01-03,2021-01-03,2020-12-27,2020-12-29,2020-12-29,-5,-3,-3,0.333333
2,53,2021-01-01,Grape-Pomegranate,8,Maryland,1,2021-01-02,2021-01-07,2021-01-18,2020-12-28,2021-01-02,2021-01-13,-4,1,12,0.058824
3,125,2021-01-02,Cherry-Lime,32,Maryland,1,2021-01-03,2021-01-06,2021-01-08,2020-12-29,2021-01-01,2021-01-03,-4,-1,1,0.166667
4,182,2021-01-02,Strawberry-Kiwi,32,Maryland,1,2021-01-02,2021-01-03,2021-01-06,2020-12-28,2020-12-29,2021-01-01,-5,-4,-1,0.2


In [3]:
# Query where orders are impossible to fulfill
lost_demand = df_demand_log.query('MaxOrderToShip < 0')
acheivable_demand = df_demand_log.query('MaxOrderToShip >= 0')

# Percentage of lost demand
lost_demand_percentage = lost_demand['Quantity'].sum() / df_demand_log['Quantity'].sum() * 100

print(f'Percentage of lost demand: {lost_demand_percentage:.2f}%')

Percentage of lost demand: 25.21%


About 25% of the volume of demand is lost due shipping timelines where the mix that was order would have to be shipped date before the day of request, or earlier. Depending on the responsiveness of the supply chain, this could potentially make day of shipping or shortly after impossible as well.

# Task 7

In [4]:
production_capacity = pd.read_excel('FruitSoul Production and Materials Characteristics and Costs.xlsx', sheet_name='Center Capacity', header=2)
production_capacity.rename(columns={'Unnamed: 1': 'Center', 'Unnamed: 7': "Unit"}, inplace=True)
production_capacity.drop(columns=['Unnamed: 0'], inplace=True)

## Mixing Demand History

Because mixing demand is measured in ounces, it's demand is not exactly in line with packaging and bottling demand.

### Demand History: Ounces

In [6]:
# Create total ounces columns
acheivable_demand['ozOfProduct'] = acheivable_demand['Quantity'] * acheivable_demand['jarSize']

# Preferred Demand History
preferred_ounces_demand = acheivable_demand.groupby(["Preferred Shipping Date"]).agg({"ozOfProduct": "sum"}).reset_index().rename(columns={"jarSize": "Ounces Demand"})

# Earliest Arrival Demand History
early_ounces_demand = acheivable_demand.groupby(["Earliest Acceptable Shipping Date"]).agg({"ozOfProduct": "sum"}).reset_index().rename(columns={"jarSize": "Ounces Demand"})

# Latest Arrival Demand History
late_ounces_demand = acheivable_demand.groupby(["Latest Acceptable Shipping Date"]).agg({"ozOfProduct": "sum"}).reset_index().rename(columns={"jarSize": "Ounces Demand"})

# Smoothed Arrival Demand History

def demand_probability(row, col_name='Quantity'):
    preferred_date = pd.to_datetime(row['Preferred Shipping Date'])
    earliest_date = pd.to_datetime(row['Earliest Acceptable Shipping Date'])
    latest_date = pd.to_datetime(row['Latest Acceptable Shipping Date'])
    
    # Calculate the standard deviation (earliest/latest represent ±2 standard deviations)
    std_dev = (latest_date - earliest_date).days / 4
    
    # Ensure std_dev is at least 1 to avoid division by zero
    std_dev = max(std_dev, 1)
    
    # Initialize empty list for smoothed demand distribution
    demand_distribution = []
    
    # Compute cumulative probabilities for the range of acceptable shipping dates
    for day in pd.date_range(earliest_date, latest_date):
        # CDF at the start and end of the day
        cdf_start = norm.cdf((day - pd.Timedelta(days=0.5) - preferred_date).days, scale=std_dev)
        cdf_end = norm.cdf((day + pd.Timedelta(days=0.5) - preferred_date).days, scale=std_dev)
        
        # Cumulative probability for the 24-hour period (between start and end of the day)
        daily_probability = cdf_end - cdf_start
        
        demand_distribution.append({
            'Date': day,
            'Mix': row['Mix'],
            'Probability': daily_probability
        })
    
    # Convert list to DataFrame
    df_demand_distribution = pd.DataFrame(demand_distribution)

    if len(df_demand_distribution) == 0:
        return pd.DataFrame(columns=['Date', 'Mix', 'Smoothed Daily Demand'])
    
    # Normalize probabilities to sum up to 1, then scale to match total quantity
    df_demand_distribution['smoothedDailyDemand'] = (df_demand_distribution['Probability']) * row[col_name]
    
    return df_demand_distribution[['Date', 'Mix', 'smoothedDailyDemand']]

# Apply the function to each row in the demand log and concatenate the results
smoothed_ounces_demand = pd.concat([demand_probability(row, col_name = 'ozOfProduct') for _, row in acheivable_demand.iterrows()])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acheivable_demand['ozOfProduct'] = acheivable_demand['Quantity'] * acheivable_demand['jarSize']


### Demand History: Jars

Because packaging and bottling demand is measured in jar, the demand will be identical for each order.

In [7]:
# Preferred Demand History
preferred_jars_demand = acheivable_demand.groupby(["Preferred Shipping Date"]).agg({"Quantity": "sum"}).reset_index().rename(columns={"jarSize": "Jars Demand"})
# Earliest Arrival Demand History
early_jars_demand = acheivable_demand.groupby(["Earliest Acceptable Shipping Date"]).agg({"Quantity": "sum"}).reset_index().rename(columns={"jarSize": "Jars Demand"})
# Latest Arrival Demand History
late_jars_demand = acheivable_demand.groupby(["Latest Acceptable Shipping Date"]).agg({"Quantity": "sum"}).reset_index().rename(columns={"jarSize": "Jars Demand"})

# Smoothed Arrival Demand History
smoothed_jars_demand = pd.concat([demand_probability(row, col_name = 'Quantity') for _, row in acheivable_demand.iterrows()])

### Adjust demand dates for off days


In [22]:
# Function to check if a date is Labor Day
def is_labor_day(date):
    if date.strftime('%m') == '09' and date.strftime('%d') <= '07' and date.weekday() == 0:
        return True
    return False

# Define the date for the 4th of July 
july_4th = '07-04'

# Function to adjust dates for earliest or latest shipping dates on Sundays, labor days, or 4th of July
def adjust_date(date, type='e'):
    """
    Adjust the date based on the type:
    'e' for Earliest Acceptable Shipping Date,
    'l' for Latest Acceptable Shipping Date
    """
    if type == 'e':
        if date.weekday() == 6:
            date = date + pd.Timedelta(days=1)
        if is_labor_day(date) or date.strftime('%m-%d') == july_4th:
            date = date + pd.Timedelta(days=1)
            if date.weekday() == 6:
                date = date + pd.Timedelta(days=1)
        return date
    
    if type == 'l':
        if is_labor_day(date) or date.strftime('%m-%d') == july_4th:
            date = date - pd.Timedelta(days=1)
        if date.weekday() == 6:
            date = date - pd.Timedelta(days=1)
            if is_labor_day(date) or date.strftime('%m-%d') == july_4th:
                date = date - pd.Timedelta(days=1)
        return date

    return date

# Adjust the dates
smoothed_jars_demand['Date'] = smoothed_jars_demand['Date'].apply(lambda x: adjust_date(x, 'e'))
smoothed_ounces_demand['Date'] = smoothed_ounces_demand['Date'].apply(lambda x: adjust_date(x, 'e'))

# Function to check if a date is a Sunday, Labor Day, or the 4th of July
def is_special_day(date):
    return date.weekday() == 6 or is_labor_day(date) or date.strftime('%m-%d') == july_4th

# Function to move the date forward by one day if it is a special day
def adjust_preferred_shipping_date(date):
    if is_special_day(date):
        date += pd.Timedelta(days=1)
        if is_special_day(date):
            date -= pd.Timedelta(days=2)
    return date

# Apply the function to the Preferred Shipping Date column
smoothed_jars_demand['Date'] = smoothed_jars_demand['Date'].apply(adjust_preferred_shipping_date)
smoothed_ounces_demand['Date'] = smoothed_ounces_demand['Date'].apply(adjust_preferred_shipping_date)

In [None]:
smoothed_jars_demand_date = smoothed_jars_demand.groupby('Date').agg({'smoothedDailyDemand': 'sum'}).reset_index()
smoothed_ounces_demand_date = smoothed_ounces_demand.groupby('Date').agg({'smoothedDailyDemand': 'sum'}).reset_index()

# Task 8

## Identify production capacity 

In [23]:
def identify_production_demand(demand_vector, s = 5):
    s_pctile_demand  = np.percentile(np.array(demand_vector), 100-s)
    return s_pctile_demand

def identify_max_daily_production(production_capacity, center_name = "Mixing"):
    max_shift_production = production_capacity[production_capacity['Center'] == center_name][5].values[0]
    max_daily_production = max_shift_production * 2
    return max_daily_production

def identify_cells_needed(demand, max_daily_production):
    cells_needed = np.ceil(demand / max_daily_production)
    return cells_needed

In [37]:
jars = smoothed_jars_demand_date.rename(columns={'smoothedDailyDemand': 'jarsDemand'})
ounces = smoothed_ounces_demand_date.rename(columns={'smoothedDailyDemand': 'ouncesDemand'})

# Merge on Date
production_demand = jars.merge(ounces, on='Date', how='left')

# Fill NA values with 0\
production_demand.fillna(0, inplace=True)

Unnamed: 0,Date,smoothedDailyDemand,Unit
0,2020-12-28,0.699472,Jars
1,2020-12-28,6.993890,Jars
2,2020-12-29,13.790215,Jars
3,2020-12-30,23.725394,Jars
4,2020-12-31,37.695940,Jars
...,...,...,...
1116,2024-01-17,0.103070,Jars
1117,2024-01-18,0.036771,Jars
1118,2024-01-19,0.017915,Jars
1119,2024-01-20,0.008219,Jars


## Identify total cells needed

In [34]:
def identify_total_cells_needed(production_demand, production_capacity, center_name, s):
    unit = production_capacity[production_capacity['Center'] == center_name]['Unit'].values[0]
    if unit == "Jars":
        demand_vector = production_demand['jarsDemand']
    else:
        demand_vector = production_demand['ouncesDemand']
    s_pctile_demand = identify_production_demand(demand_vector, s)
    max_daily_production = identify_max_daily_production(production_capacity, center_name)
    cells_needed = identify_cells_needed(s_pctile_demand, max_daily_production)
    return cells_needed

identify_total_cells_needed(production_demand['smoothedDailyDemand'], production_capacity, "Mixing", 5)

## Optimize Daily Set-Up

In [None]:
# Create function to return demand for a given date
def get_demand(date, unit, production_demand, demandColumnName = 'smoothedDailyDemand'):
    return production_demand[(production_demand['Date'] == date) & (production_demand['Unit'] == unit)][demandColumnName].values[0]

235.84775664364847