In [1]:
import pandas as pd
import numpy as np

In [2]:
# import the 'cells.csv' file
WAREHOUSE_DATA = pd.read_csv('./original/cells.csv')
# drop the 'fetch_zone' column
WAREHOUSE_DATA

Unnamed: 0,location,aisle,x_length,y_width,z_height,heightfromfloor,cell_attractiveness,distance_from_io_to_aisle,fetch_tool,putaway_zone,fetch_zone,cell_volume,available_volume,percent_full
0,010101,1,1.275000,-24.79875,0.0,0.25,0.027721,34.79875,PALLET_JACK,OPT-A-01-04,OPT01-04-LOW,0.487687,1.447029e-05,2.967123e-05
1,010102,1,1.366071,-24.79875,0.0,0.55,0.027651,34.79875,PALLET_JACK,OPT-A-01-04,OPT01-04-LOW,0.522522,5.004286e-07,9.577171e-07
2,010103,1,1.275000,-24.79875,0.0,0.85,0.027721,34.79875,PALLET_JACK,OPT-A-01-04,OPT01-04-LOW,0.487687,4.881990e-06,1.001049e-05
3,010104,1,1.275000,-24.79875,0.0,1.15,0.027721,34.79875,PALLET_JACK,OPT-A-01-04,OPT01-04-LOW,0.487687,7.891645e-05,1.618177e-04
4,010105,1,1.366071,-24.79875,0.0,1.45,0.027651,34.79875,PALLET_JACK,OPT-A-01-04,OPT01-04-LOW,0.522522,4.110894e-04,7.867404e-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5664,FLOOR01,100,4.781250,12.78625,0.0,0.00,0.000000,22.78625,PALLET_JACK,PALLET_JACK,DEPOSIT,717.187500,4.408904e+02,6.147491e-01
5665,FLOOR02,100,14.343750,12.78625,0.0,0.00,0.000000,22.78625,PALLET_JACK,PALLET_JACK,DEPOSIT,717.187500,7.171875e+02,1.000000e+00
5666,FLOOR03,100,23.906250,12.78625,0.0,0.00,0.000000,22.78625,PALLET_JACK,PALLET_JACK,DEPOSIT,717.187500,7.171875e+02,1.000000e+00
5667,FLOOR04,100,33.468750,12.78625,0.0,0.00,0.000000,22.78625,PALLET_JACK,PALLET_JACK,DEPOSIT,717.187500,5.903939e+02,8.232071e-01


In [3]:
# create a dataframe of the location and index as the 'location_id' column
cells = pd.DataFrame(WAREHOUSE_DATA['location'])
cells['location_id'] = cells['location'].index
cells

Unnamed: 0,location,location_id
0,010101,0
1,010102,1
2,010103,2
3,010104,3
4,010105,4
...,...,...
5664,FLOOR01,5664
5665,FLOOR02,5665
5666,FLOOR03,5666
5667,FLOOR04,5667


In [4]:
# export the dataframe to a csv file called 'location_ids.csv' under the 'ids' folder
cells.to_csv('./ids/location_ids.csv', index=False)

In [5]:
# do the same process for the 'putaway_zone' column
putaway_zones = pd.DataFrame(WAREHOUSE_DATA['putaway_zone'])
# drop the duplicates, reset the index
putaway_zones = putaway_zones.drop_duplicates().reset_index(drop=True)
putaway_zones['putaway_zone_id'] = putaway_zones.index
putaway_zones.to_csv('./ids/putaway_zone_ids.csv', index=False)
putaway_zones

Unnamed: 0,putaway_zone,putaway_zone_id
0,OPT-A-01-04,0
1,OPT-B-01-04,1
2,OPT-C-01-04,2
3,OPT-MDK-B,3
4,OPT-MDK-14,4
5,CANTI-LIGHT,5
6,OPT-A-05-07,6
7,OPT-B-05-07,7
8,OPT-C-05-07,8
9,CANTI-HEAVY,9


In [38]:
def import_warehouse_data(path: str = './original/cells.csv') -> pd.DataFrame:
    df = pd.read_csv(path)
    # drop the 'fetch_zone' column
    df = df.drop(columns=['fetch_zone'])
    # replace all column which are objects with the relevant ids from 'src/data/ids/
    location_df = pd.read_csv('./ids/location_ids.csv')
    # replace the values under the 'location' column with the corresponding value from the location_df 'location_id' column
    df = df.merge(location_df, on='location').drop(columns=['location'])
    putaway_zone_df = pd.read_csv('./ids/putaway_zone_ids.csv')
    df = df.merge(putaway_zone_df[['putaway_zone', 'putaway_zone_id']], on='putaway_zone').drop(columns=['putaway_zone'])
    tool_df = pd.read_csv('./ids/tool_ids.csv', usecols=['fetch_tool', 'fetch_tool_id'])
    df = df.merge(tool_df, on='fetch_tool').drop(columns=['fetch_tool'])
    # remove all _id from the column names
    df.columns = [col.replace('_id', '') for col in df.columns]
    # change the order of the columns so location will be first
    cols = df.columns.tolist()
    # remove the location column from the list
    cols.remove('location')
    cols = ['location'] + cols
    df = df[cols]
    # sort by location and reset index
    df = df.sort_values(by='location').reset_index(drop=True)
    return df

wh_data = import_warehouse_data()
wh_data

Unnamed: 0,location,aisle,x_length,y_width,z_height,heightfromfloor,cell_attractiveness,distance_from_io_to_aisle,cell_volume,available_volume,percent_full,putaway_zone,fetch_tool
0,0,1,1.275000,-24.79875,0.0,0.25,0.027721,34.79875,0.487687,1.447029e-05,2.967123e-05,0,1
1,1,1,1.366071,-24.79875,0.0,0.55,0.027651,34.79875,0.522522,5.004286e-07,9.577171e-07,0,1
2,2,1,1.275000,-24.79875,0.0,0.85,0.027721,34.79875,0.487687,4.881990e-06,1.001049e-05,0,1
3,3,1,1.275000,-24.79875,0.0,1.15,0.027721,34.79875,0.487687,7.891645e-05,1.618177e-04,0,1
4,4,1,1.366071,-24.79875,0.0,1.45,0.027651,34.79875,0.522522,4.110894e-04,7.867404e-04,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5664,5664,100,4.781250,12.78625,0.0,0.00,0.000000,22.78625,717.187500,4.408904e+02,6.147491e-01,23,1
5665,5665,100,14.343750,12.78625,0.0,0.00,0.000000,22.78625,717.187500,7.171875e+02,1.000000e+00,23,1
5666,5666,100,23.906250,12.78625,0.0,0.00,0.000000,22.78625,717.187500,7.171875e+02,1.000000e+00,23,1
5667,5667,100,33.468750,12.78625,0.0,0.00,0.000000,22.78625,717.187500,5.903939e+02,8.232071e-01,23,1


In [40]:
wh_data[0:0]

Unnamed: 0,location,aisle,x_length,y_width,z_height,heightfromfloor,cell_attractiveness,distance_from_io_to_aisle,cell_volume,available_volume,percent_full,putaway_zone,fetch_tool


In [7]:
# import the items.csv file
ITEMS_DATA = pd.read_csv('./items.csv')
ITEMS_DATA

Unnamed: 0,uuid,item_volume,item_attractiveness,putaway_zone,initial_stock
0,A000001,0.000036,0.000045,CANTI-LIGHT,0
1,A000002,0.000144,0.000000,CANTI-LIGHT,0
2,A000003,0.000729,0.000045,OPT-A-05-07,0
3,A000004,0.000259,0.000045,OPT-C-01-04,0
4,A000005,0.000911,0.000045,MERAKEZET,0
...,...,...,...,...,...
30472,A030473,0.014850,0.000000,OPT-B-07-11,0
30473,A030474,0.015055,0.000136,OPT-C-05-07,1
30474,A030475,0.400000,0.000680,PALLET-C,0
30475,A030476,0.065309,0.000091,OPT-B-05-07,25


In [8]:
# create a dataframe of the uuid and index as the 'item_id' column
items = pd.DataFrame(ITEMS_DATA['uuid'])
items['item_id'] = items.index
items.to_csv('./ids/item_ids.csv', index=False)
items

Unnamed: 0,uuid,item_id
0,A000001,0
1,A000002,1
2,A000003,2
3,A000004,3
4,A000005,4
...,...,...
30472,A030473,30472
30473,A030474,30473
30474,A030475,30474
30475,A030476,30475


In [9]:
# import the 'tool_capacity.csv' file
TOOL_CAPACITY = pd.read_csv('./tool_capacity.csv')
TOOL_CAPACITY

Unnamed: 0,fetch_tool,max_volume
0,REACH_FORK,48.0
1,PALLET_JACK,48.0
2,ORDER_PICKER,24.332
3,CROSS_DOCK,inf


In [10]:
# add a tool index column
tools = TOOL_CAPACITY[['fetch_tool']]
tools['fetch_tool_id'] = tools.index
tools.to_csv('./ids/tool_ids.csv', index=False)
tools

Unnamed: 0,fetch_tool,fetch_tool_id
0,REACH_FORK,0
1,PALLET_JACK,1
2,ORDER_PICKER,2
3,CROSS_DOCK,3


In [11]:
DATA_PATH = '.'
def import_warehouse_positions(path: str = f'{DATA_PATH}/original/positions.csv') -> pd.DataFrame:
    df = pd.read_csv(path)
    # replace all column which are objects with the relevant ids from 'src/data/ids/
    location_df = pd.read_csv(f'{DATA_PATH}/ids/location_ids.csv')
    # replace the values under the 'location' column with the corresponding value from the location_df 'location_id' column
    df = df.merge(location_df, on='location').drop(columns=['location'])
    item_df = pd.read_csv(f'{DATA_PATH}/ids/item_ids.csv')
    df = df.merge(item_df[['uuid', 'item_id']], on='uuid').drop(columns=['uuid'])
    df = df.rename(columns={'location_id': 'location', 'item_id': 'uuid'})
    df.sort_values(by=['location', 'uuid'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # change the order of the columns
    df = df[['location', 'uuid', 'quantity']]
    
    return df

import_warehouse_positions().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21525 entries, 0 to 21524
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   location  21525 non-null  int64  
 1   uuid      21525 non-null  int64  
 2   quantity  21525 non-null  float64
dtypes: float64(1), int64(2)
memory usage: 504.6 KB


In [12]:
from datetime import datetime
MAX_DATETIME = pd.to_datetime('2021-01-14')
def import_dates_data(max_date: datetime or None = None) -> pd.DataFrame:
    df = pd.read_csv(f'{DATA_PATH}/dates.csv')
    # define the date column as datetime
    df['date'] = pd.to_datetime(df['date'])
    if max_date is not None:
        df = df[df['date'] < max_date]
    return df

import_dates_data(MAX_DATETIME)

Unnamed: 0,date,short_day
0,2021-01-01,True
1,2021-01-03,False
2,2021-01-04,False
3,2021-01-05,False
4,2021-01-06,False
5,2021-01-07,False
6,2021-01-08,True
7,2021-01-10,False
8,2021-01-11,False
9,2021-01-12,False


In [13]:
def import_items_data(path: str = f'{DATA_PATH}/items.csv') -> pd.DataFrame:
    df = pd.read_csv(path)
    # replace all column which are objects with the relevant ids from 'src/data/ids/
    item_df = pd.read_csv(f'{DATA_PATH}/ids/item_ids.csv')
    # merge the df with the item_df dataframe on the 'uuid' column, keep only the 'item_id' column
    df = df.merge(item_df[['uuid', 'item_id']], on='uuid').drop(columns=['uuid'])
    putaway_zone_df = pd.read_csv(f'{DATA_PATH}/ids/putaway_zone_ids.csv')
    # do the same for the 'putaway_zone' column
    df = df.merge(putaway_zone_df[['putaway_zone', 'putaway_zone_id']], on='putaway_zone').drop(columns=['putaway_zone'])
    # remove the _id from the column names
    df = df.rename(columns={'putaway_zone_id': 'putaway_zone', 'item_id': 'uuid'})
    df.sort_values(by=['uuid'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # change the order of the columns to match the original dataframe
    df = df[['uuid', 'putaway_zone', 'item_volume', 'item_attractiveness', 'initial_stock']]
    return df

items_df = import_items_data().sort_values(by=['uuid'])
items_df

Unnamed: 0,uuid,putaway_zone,item_volume,item_attractiveness,initial_stock
0,0,5,0.000036,0.000045,0
1,1,5,0.000144,0.000000,0
2,2,6,0.000729,0.000045,0
3,3,2,0.000259,0.000045,0
4,4,10,0.000911,0.000045,0
...,...,...,...,...,...
30472,30472,12,0.014850,0.000000,0
30473,30473,8,0.015055,0.000136,1
30474,30474,22,0.400000,0.000680,0
30475,30475,7,0.065309,0.000091,25


In [31]:
# do the same as the former function only for the shipments.csv file
def import_shipments_data(path: str = f'{DATA_PATH}/shipments.csv', max_date: datetime or None = None) -> pd.DataFrame:
    df = pd.read_csv(path)
    # change the 'date' column to datetime
    df['date'] = pd.to_datetime(df['date'])
    if max_date is not None:
        df = df[df['date'] < max_date]
    # replace all column which are objects with the relevant ids from 'src/data/ids/
    item_df = pd.read_csv(f'{DATA_PATH}/ids/item_ids.csv')
    # merge the df with the item_df dataframe on the 'uuid' column, keep only the 'item_id' column
    df = df.merge(item_df[['uuid', 'item_id']], on='uuid').drop(columns=['uuid'])
    # remove the _id from the column names
    df = df.rename(columns={'item_id': 'uuid'})
    df.sort_values(by=['date', 'uuid'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # change the order of the columns to match the original dataframe
    df = df[['date', 'uuid', 'quantity']]
    return df
MAX_DATETIME = pd.to_datetime('2021-07-02')
shipments_df = import_shipments_data(max_date=MAX_DATETIME)
shipments_df

Unnamed: 0,date,uuid,quantity
0,2021-01-03,7311,1
1,2021-01-03,8565,1
2,2021-01-03,10113,10
3,2021-01-03,14793,1
4,2021-01-03,15421,1
...,...,...,...
21033,2021-07-01,30169,60
21034,2021-07-01,30172,40
21035,2021-07-01,30342,20
21036,2021-07-01,30386,70


In [15]:
first_shipments_df = shipments_df.groupby('uuid').agg({'date': 'min'}).sort_values(by=['date', 'uuid']).reset_index()
first_shipments_df

Unnamed: 0,uuid,date
0,7311,2021-01-03
1,8565,2021-01-03
2,10113,2021-01-03
3,14793,2021-01-03
4,15421,2021-01-03
...,...,...
763,26861,2021-01-13
764,27750,2021-01-13
765,27877,2021-01-13
766,28371,2021-01-13


In [16]:
item_df = pd.read_csv(f'{DATA_PATH}/items.csv')
item_df


Unnamed: 0,uuid,item_volume,item_attractiveness,putaway_zone,initial_stock
0,A000001,0.000036,0.000045,CANTI-LIGHT,0
1,A000002,0.000144,0.000000,CANTI-LIGHT,0
2,A000003,0.000729,0.000045,OPT-A-05-07,0
3,A000004,0.000259,0.000045,OPT-C-01-04,0
4,A000005,0.000911,0.000045,MERAKEZET,0
...,...,...,...,...,...
30472,A030473,0.014850,0.000000,OPT-B-07-11,0
30473,A030474,0.015055,0.000136,OPT-C-05-07,1
30474,A030475,0.400000,0.000680,PALLET-C,0
30475,A030476,0.065309,0.000091,OPT-B-05-07,25


In [17]:
# take the item_df dataframe, keep the 'uuid' and 'initial_stock' columns
# change the 'initial_stock' column to 'quantity'
# add a 'date' column with the value of '2021-01-01'
first_day_shipments_df = items_df[['uuid', 'initial_stock']].rename(columns={'initial_stock': 'quantity'})
first_day_shipments_df['date'] = pd.to_datetime('2021-01-01')
first_day_shipments_df

Unnamed: 0,uuid,quantity,date
0,0,0,2021-01-01
1,1,0,2021-01-01
2,2,0,2021-01-01
3,3,0,2021-01-01
4,4,0,2021-01-01
...,...,...,...
30472,30472,0,2021-01-01
30473,30473,1,2021-01-01
30474,30474,0,2021-01-01
30475,30475,25,2021-01-01


In [18]:
# merge with the shipments_df dataframe
complete_shipments_df = pd.concat([first_day_shipments_df, shipments_df])
complete_shipments_df

Unnamed: 0,uuid,quantity,date
0,0,0,2021-01-01
1,1,0,2021-01-01
2,2,0,2021-01-01
3,3,0,2021-01-01
4,4,0,2021-01-01
...,...,...,...
795,26861,1,2021-01-13
796,27750,1,2021-01-13
797,27877,1,2021-01-13
798,28371,1,2021-01-13


In [19]:
# for each uuid, calculate the cumulative sum of the 'quantity' column
complete_shipments_df['cumulative_shipments'] = complete_shipments_df.groupby('uuid')['quantity'].cumsum()
# rename the 'quantity' column to 'shipments'
complete_shipments_df = complete_shipments_df.rename(columns={'quantity': 'shipments'})
complete_shipments_df

Unnamed: 0,uuid,shipments,date,cumulative_shipments
0,0,0,2021-01-01,0
1,1,0,2021-01-01,0
2,2,0,2021-01-01,0
3,3,0,2021-01-01,0
4,4,0,2021-01-01,0
...,...,...,...,...
795,26861,1,2021-01-13,4
796,27750,1,2021-01-13,2
797,27877,1,2021-01-13,6
798,28371,1,2021-01-13,1


In [20]:
orders_df = pd.read_csv(f'{DATA_PATH}/orders.csv')
item_ids_df = pd.read_csv(f'{DATA_PATH}/ids/item_ids.csv')
orders_df = orders_df.merge(item_ids_df[['uuid', 'item_id']], on='uuid').drop(columns=['uuid'])
orders_df = orders_df.rename(columns={'item_id': 'uuid', 'quantity': 'orders'})
orders_df

Unnamed: 0,order_id,timestamp,orders,uuid
0,1,2021-01-01 08:00:00,5,16075
1,139,2021-01-03 08:11:23,20,16075
2,248,2021-01-03 09:02:04,21,16075
3,374,2021-01-03 10:18:01,7,16075
4,703,2021-01-03 14:39:09,3,16075
...,...,...,...,...
473295,473002,2023-05-29 09:44:23,1,19838
473296,473012,2023-05-29 09:53:28,1,15498
473297,473020,2023-05-29 09:59:32,1,21252
473298,473173,2023-05-29 13:03:05,1,29992


In [21]:
# sort by timestamp, group by uuid and calculate the cumulative sum of the 'quantity' column
orders_df = orders_df.sort_values(by=['timestamp', 'uuid']).reset_index(drop=True)
orders_df['cumulative_orders'] = orders_df.groupby('uuid')['orders'].cumsum()
# add a 'date' column using the 'timestamp' column, drop the hours and minutes
orders_df['date'] = pd.to_datetime(pd.to_datetime(orders_df['timestamp']).dt.date)
orders_df

Unnamed: 0,order_id,timestamp,orders,uuid,cumulative_orders,date
0,1,2021-01-01 08:00:00,5,16075,5,2021-01-01
1,2,2021-01-01 08:03:15,1,29535,1,2021-01-01
2,3,2021-01-01 08:03:49,2,16585,2,2021-01-01
3,4,2021-01-01 08:04:04,1,19984,1,2021-01-01
4,5,2021-01-01 08:05:17,1,15418,1,2021-01-01
...,...,...,...,...,...,...
473295,473296,2023-05-29 14:32:15,1,16787,439,2023-05-29
473296,473297,2023-05-29 14:32:39,1,16587,44301,2023-05-29
473297,473298,2023-05-29 14:33:24,2,16757,3172,2023-05-29
473298,473299,2023-05-29 14:33:25,1,25182,6,2023-05-29


In [22]:
# create the comparison dataframe by merging the orders_df and complete_shipments_df dataframes on the 'uuid' and 'date' columns
comparison_df = orders_df.merge(complete_shipments_df, on=['uuid', 'date'], how='left')
# fill empty shipments with 0
comparison_df['shipments'] = comparison_df['shipments'].fillna(0)
comparison_df['cumulative_shipments'] = comparison_df.groupby('uuid')['shipments'].cumsum()
comparison_df


Unnamed: 0,order_id,timestamp,orders,uuid,cumulative_orders,date,shipments,cumulative_shipments
0,1,2021-01-01 08:00:00,5,16075,5,2021-01-01,513.0,513.0
1,2,2021-01-01 08:03:15,1,29535,1,2021-01-01,2.0,2.0
2,3,2021-01-01 08:03:49,2,16585,2,2021-01-01,0.0,0.0
3,4,2021-01-01 08:04:04,1,19984,1,2021-01-01,29.0,29.0
4,5,2021-01-01 08:05:17,1,15418,1,2021-01-01,20.0,20.0
...,...,...,...,...,...,...,...,...
473295,473296,2023-05-29 14:32:15,1,16787,439,2023-05-29,0.0,0.0
473296,473297,2023-05-29 14:32:39,1,16587,44301,2023-05-29,0.0,0.0
473297,473298,2023-05-29 14:33:24,2,16757,3172,2023-05-29,0.0,0.0
473298,473299,2023-05-29 14:33:25,1,25182,6,2023-05-29,0.0,0.0


In [23]:
# add a stock column by subtracting the cumulative_ordersfrom the  cumulative_shipments
comparison_df['stock'] = comparison_df['cumulative_shipments'] - comparison_df['cumulative_orders']
comparison_df

Unnamed: 0,order_id,timestamp,orders,uuid,cumulative_orders,date,shipments,cumulative_shipments,stock
0,1,2021-01-01 08:00:00,5,16075,5,2021-01-01,513.0,513.0,508.0
1,2,2021-01-01 08:03:15,1,29535,1,2021-01-01,2.0,2.0,1.0
2,3,2021-01-01 08:03:49,2,16585,2,2021-01-01,0.0,0.0,-2.0
3,4,2021-01-01 08:04:04,1,19984,1,2021-01-01,29.0,29.0,28.0
4,5,2021-01-01 08:05:17,1,15418,1,2021-01-01,20.0,20.0,19.0
...,...,...,...,...,...,...,...,...,...
473295,473296,2023-05-29 14:32:15,1,16787,439,2023-05-29,0.0,0.0,-439.0
473296,473297,2023-05-29 14:32:39,1,16587,44301,2023-05-29,0.0,0.0,-44301.0
473297,473298,2023-05-29 14:33:24,2,16757,3172,2023-05-29,0.0,0.0,-3172.0
473298,473299,2023-05-29 14:33:25,1,25182,6,2023-05-29,0.0,0.0,-6.0


In [24]:
# drop every row where the stock is negative, then recalculating the cumulative_shipments and stock

In [25]:
# starting_stock_items will be the items that are not in the first_shipments_df dataframe and have initial_stock > 0, rename the 'item_id' column to 'uuid' and keep only the 'uuid' column
starting_stock_items = items_df[~items_df['uuid'].isin(first_shipments_df['uuid']) & (items_df['initial_stock'] > 0)].rename(columns={'item_id': 'uuid'})[['uuid']]
starting_stock_items['date'] = pd.to_datetime('2021-01-01')
starting_stock_items

Unnamed: 0,uuid,date
8,8,2021-01-01
11,11,2021-01-01
12,12,2021-01-01
14,14,2021-01-01
15,15,2021-01-01
...,...,...
30469,30469,2021-01-01
30471,30471,2021-01-01
30473,30473,2021-01-01
30475,30475,2021-01-01


In [26]:
existing_items = pd.concat([first_shipments_df, starting_stock_items]).sort_values(by=['date', 'uuid']).drop_duplicates(subset=['uuid'], keep='first').reset_index(drop=True)
existing_items

Unnamed: 0,uuid,date
0,8,2021-01-01
1,11,2021-01-01
2,12,2021-01-01
3,14,2021-01-01
4,15,2021-01-01
...,...,...
21021,26861,2021-01-13
21022,27750,2021-01-13
21023,27877,2021-01-13
21024,28371,2021-01-13


In [41]:
def import_orders_data(path: str = f'{DATA_PATH}/orders.csv', shipments_df: pd.DataFrame = pd.DataFrame(), items_df: pd.DataFrame = pd.DataFrame(), max_date: datetime or None = None) -> pd.DataFrame:
    df = pd.read_csv(path)
    # change the 'date' column to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    if max_date is not None:
        df = df[df['timestamp'] < max_date]
    
    # first_shipments_df = shipments_df.groupby('uuid').agg({'date': 'min'}).sort_values(by=['date', 'uuid']).reset_index()
    
    # # starting_stock_items will be the items that are not in the first_shipments_df dataframe and have initial_stock > 0, rename the 'item_id' column to 'uuid' and keep only the 'uuid' column
    # starting_stock_items = items_df[~items_df['uuid'].isin(first_shipments_df['uuid']) & (items_df['initial_stock'] > 0)].rename(columns={'item_id': 'uuid'})[['uuid']]
    # # add a date column with the value of 2021-01-01
    # starting_stock_items['date'] = pd.to_datetime('2021-01-01')
    # # concat the in_stock_items to the first_shipments_df, sort by date and uuid, drop duplicates and reset the index
    # existing_items = pd.concat([first_shipments_df, starting_stock_items]).sort_values(by=['date', 'uuid']).drop_duplicates(subset=['uuid'], keep='first').reset_index(drop=True)
    
    # replace all column which are objects with the relevant ids from 'src/data/ids/
    item_ids_df = pd.read_csv(f'{DATA_PATH}/ids/item_ids.csv')
    # merge the df with the item_df dataframe on the 'uuid' column, keep only the 'item_id' column
    df = df.merge(item_ids_df[['uuid', 'item_id']], on='uuid').drop(columns=['uuid'])
    df = df.rename(columns={'item_id': 'uuid'})
    # remove each row which has a 'uuid' which isn't in the existing_items dataframe
    # df = df[df['uuid'].isin(existing_items['uuid'])]
    # remove each row which has a corresponding 'date' which is before the 'date' in the existing_items dataframe
    # df = df.merge(existing_items, on='uuid')
    # df = df[df['timestamp'] >= df['date']]
    # remove the date column
    # df.drop(columns=['date'], inplace=True)
    df.sort_values(by=['timestamp', 'uuid'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # make order_id equal to the index
    df['order_id'] = df.index
    # change the order of the columns to match the original dataframe
    df = df[['order_id', 'timestamp', 'uuid', 'quantity']]
    return df
MAX_DATETIME = pd.to_datetime('2021-02-02')
import_orders_data(shipments_df=shipments_df, items_df=items_df, max_date=MAX_DATETIME).head(40)

Unnamed: 0,order_id,timestamp,uuid,quantity
0,0,2021-01-01 08:00:00,16075,5
1,1,2021-01-01 08:03:15,29535,1
2,2,2021-01-01 08:03:49,16585,2
3,3,2021-01-01 08:04:04,19984,1
4,4,2021-01-01 08:05:17,15418,1
5,5,2021-01-01 08:07:30,16073,1
6,6,2021-01-01 08:08:01,14626,10
7,7,2021-01-01 08:13:26,22668,2
8,8,2021-01-01 08:14:28,6959,1
9,9,2021-01-01 08:15:54,22722,2


In [56]:
# do the same for 'fetch_tools_speeds_mean_and_std.csv' file
def import_tools_data(path: str = f'{DATA_PATH}/fetch_tools_speeds_mean_and_std.csv') -> pd.DataFrame:
    df = pd.read_csv(path)
    # replace all column which are objects with the relevant ids from 'src/data/ids/
    tool_df = pd.read_csv(f'{DATA_PATH}/ids/tool_ids.csv')
    # merge the df with the tool_df dataframe on the 'uuid' column, keep only the 'fetch_tool_id' column
    df = df.merge(tool_df, on='fetch_tool').drop(columns=['fetch_tool'])
    # remove the _id from the column names
    df = df.rename(columns={'fetch_tool_id': 'fetch_tool'})
    df.sort_values(by=['fetch_tool'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # put the last column first in order
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]
    
    return df

import_tools_data()

Unnamed: 0,fetch_tool,horizontal_speed_mean,horizontal_speed_std,vertical_speed_mean,vertical_speed_std,remove_from_shelf_time_mean,remove_from_shelf_time_std
0,0,2.603184,0.285816,0.41655,0.10769,25.703594,7.192422
1,1,2.32678,0.474466,,,25.478281,6.698394
2,2,2.051716,0.273328,0.301558,0.033932,26.034063,6.916285


In [57]:
# do the same for the 'tool_capacity.csv' file
def import_tool_capacity_data(path: str = f'{DATA_PATH}/tool_capacity.csv') -> pd.DataFrame:
    df = pd.read_csv(path)
    # replace all column which are objects with the relevant ids from 'src/data/ids/
    tool_df = pd.read_csv(f'{DATA_PATH}/ids/tool_ids.csv')
    # merge the df with the tool_df dataframe on the 'uuid' column, keep only the 'fetch_tool_id' column
    df = df.merge(tool_df, on='fetch_tool').drop(columns=['fetch_tool'])
    # remove the _id from the column names
    df = df.rename(columns={'fetch_tool_id': 'fetch_tool'})
    df.sort_values(by=['fetch_tool'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # put the last column first in order
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]
    
    return df

import_tool_capacity_data()

Unnamed: 0,fetch_tool,max_volume
0,0,48.0
1,1,48.0
2,2,24.332
3,3,inf
