In [None]:
import pandas as pd
import numpy as np
import datetime
import random
import sqlalchemy
from sqlalchemy import create_engine

In [None]:
# CourierShifts table

In [None]:
dti = pd.date_range("2020-01-01", periods=1070, freq="8H")
rand_dti = random.choices(population=dti, k=300)

In [None]:
shifts = pd.DataFrame(columns=['id', 'Courier', 'TransportUsed', 'StartTime', 'EndTime'])

In [None]:
shifts['id'] = range(1, len(rand_dti)+1)
shifts['Courier'] = np.random.randint(1, 7, size=len(rand_dti))
shifts['TransportUsed'] = np.random.randint(1, 7, size=len(rand_dti))
shifts['StartTime'] = rand_dti
shifts['EndTime'] = [t + random.choice([pd.to_timedelta('12:00:00'), pd.to_timedelta('08:00:00'), pd.to_timedelta('06:00:00')]) for t in rand_dti]

In [None]:
shifts.set_index('id', inplace=True)

In [None]:
shifts

In [None]:
engine = create_engine('mysql+mysqlconnector://user:password@localhost:port/DB')

In [None]:
shifts.to_sql(name='couriersshifts', con=engine, if_exists='append')

In [None]:
# Customers table

In [None]:
columns = ['id', 'FirstName', 'LastName', 'Phone', 'BirthDate', 'Login', 'Hash', 'Email', 'RegDate', 'IsHuskyOwner']
customers = pd.read_excel("D:\cust.xlsx", header=None, names=columns, skiprows=1)

In [None]:
birth = pd.date_range("1940-01-01", "2006-01-01", freq="D")
rand_birth = random.choices(population=birth, k=len(customers))

In [None]:
reg = pd.date_range("2020-01-01", shifts['StartTime'].max(), freq="D")
rand_reg = random.choices(population=reg, k=len(customers))

In [None]:
customers['id'] = range(1, len(customers)+1)
customers['BirthDate'] = rand_birth
customers['RegDate'] = rand_reg
customers['IsHuskyOwner'] = random.choices(population=[1, 2], weights=[20, 5], k=len(customers))

In [None]:
customers.set_index('id', inplace=True)

In [None]:
rand_ind_del = random.choices(population=customers.index, k=40)
rand_col_del = random.choices(population=['LastName', 'BirthDate', 'IsHuskyOwner'], k=40)

In [None]:
customers.at[rand_ind_del, rand_col_del] = np.nan

In [None]:
customers

In [None]:
customers.to_sql(name='customers', con=engine, if_exists='append')

In [None]:
# DeliveryAddresses table

In [None]:
address = pd.read_excel("D:\Adr.xlsx", header=None, names=['id', 'CityDistrict', 'Address'], skiprows=1)

In [None]:
address['id'] = range(1, len(address)+1)
address['CityDistrict'] = random.choices(['Central', 'Fest', 'RW', 'Volotova'], k=len(address))
address.set_index('id', inplace=True)

In [None]:
address

In [None]:
address.to_sql(name='deliveryaddresses', con=engine, if_exists='append')

In [None]:
# Orders table

In [None]:
orderedvia = ['App', 'Phone', 'Website']
paymeth = ['Card', 'Cash', 'Online']
length = 1000
huskies_ind = customers.index[customers['IsHuskyOwner'] == 2]
huskies_weights = np.ones(len(customers.index))
huskies_weights[huskies_ind-1] *= 1.5

In [None]:
orders = pd.DataFrame(columns=['id', 'Customer', 'OrderedVia', 'PayMethPlan', 
                               'PayMethFact', 'DeliveryAddress', 'DeliverySession'])

In [None]:
orders['id'] = range(1, length+1)
orders['Customer'] = random.choices(population=customers.index, weights=huskies_weights, k=length)
orders['OrderedVia'] = random.choices(population=orderedvia, weights=[6, 2, 3], k=length)
orders['PayMethPlan'] = random.choices(population=paymeth, k=length)

orders.set_index('id', inplace=True)

In [None]:
# leave 'PayMethFact', 'DeliveryAddress', 'DeliverySession' for later, after OrderStatusHistory table is done

In [None]:
# GoodsInOrder table

In [None]:
goods = pd.DataFrame(columns=['id', 'ForOrder', 'AssortmentID'])

In [None]:
orders_for_huskies = [i for i in orders.index if orders['Customer'][i] in huskies_ind]

In [None]:
fororder = random.choices(population=orders.index, k=3*len(orders))
fororder.sort()

goods['id'] = range(1, 3*len(orders)+1)
goods['ForOrder'] = fororder
goods.set_index('id', inplace=True)

for i in goods.index:
    if goods.at[i, 'ForOrder'] in orders_for_huskies:
        goods.at[i, 'AssortmentID'] = random.choice([1,2,4,5,9])
    else:
        goods.at[i, 'AssortmentID'] = random.choice([1,3,5,6,7,8,9])

In [None]:
goods

In [None]:
# DeliverySessions table

In [None]:
sessions = pd.DataFrame(columns=['id', 'Restaurant', 'CouriersShift', 'StartTime', 'EndTime', 'TotalDistance'])

In [None]:
sessions['id'] = range(1, 501)
sessions['Restaurant'] = random.choices(population=[1, 2], weights=[15, 5], k=len(sessions))
sessions['CouriersShift'] = random.choices(population=shifts.index, k=len(sessions))
sessions['TotalDistance'] = [round(random.uniform(1.8, 27), 2) for _ in sessions.index]

In [None]:
for i, shift in enumerate(sessions['CouriersShift']):
    trig = True
    while trig:
        start = random.choice(pd.date_range(shifts.StartTime[shift], shifts.EndTime[shift], freq='10min'))
        end = start + random.choice(pd.timedelta_range("00:20:00", "02:00:00", freq="5min"))
        if end < shifts.EndTime[shift]:
            sessions.at[i, 'StartTime'] = start
            sessions.at[i, 'EndTime'] = end
            trig = False
        else:
            continue

In [None]:
sessions.set_index('id', inplace=True)

In [None]:
sessions

In [None]:
sessions.to_sql(name='deliverysessions', con=engine, if_exists='append')

In [None]:
# OrderStatusHist table

In [None]:
history = pd.DataFrame(columns=['id', 'Order', 'Status', 'Time'])

In [None]:
history['id'] = range(1, 3 * len(orders.index) + 1)
history['Order'] = sorted(list(orders.index) * 3)
for i in range(0, len(history), 3):
    history.at[i, 'Status'] = 1
    history.at[i+1, 'Status'] = 2
    history.at[i+2, 'Status'] = random.choices([3,4], weights=[20,1], k=1)[0]

In [None]:
history.set_index('id', inplace=True)

In [None]:
history

In [None]:
# leave Time column of OrderStatusHist table for later (to make it adequate to delivery sessions, as they are sparse)

In [None]:
#finalizing Orders table by adding PaymentFact column and misspay situations according to OrderStatusHist table

In [None]:
fulfilled_ind = history['Order'][history['Status'] == 3]
canceled_ind = history['Order'][history['Status'] == 4]

In [None]:
orders.at[fulfilled_ind, 'PayMethFact'] = orders.loc[fulfilled_ind, 'PayMethPlan']

In [None]:
misspay_ind = random.choices(population=list(fulfilled_ind), k=150)

In [None]:
orders.at[misspay_ind, 'PayMethFact'] = 'Cash'

In [None]:
weekend_sessions = sessions.index[(sessions.EndTime.dt.day_name() == 'Saturday') | (sessions.EndTime.dt.day_name() == 'Sunday')]
weekend_weights = np.ones(len(sessions.index))
weekend_weights[weekend_sessions-1] *= 1.5

In [None]:
orders['DeliveryAddress'] = random.choices(population=address.index, k=len(orders))
orders['DeliverySession'] = random.choices(population=sessions.index, weights=weekend_weights, k=len(orders))

In [None]:
orders.at[canceled_ind, ['DeliveryAddress', 'DeliverySession']] = np.nan

In [None]:
orders

In [None]:
orders.to_sql(name='orders', con=engine, if_exists='append')

In [None]:
# insert into GoodsInOrder table

In [None]:
goods.to_sql(name='goodsinorder', con=engine, if_exists='append')

In [None]:
# finalizing OrderStatusHist table by adding Time

In [None]:
timedict_start = {i:sessions.loc[orders.loc[i, 'DeliverySession'], 'StartTime'] for i in fulfilled_ind}
timedict_end = {i:sessions.loc[orders.loc[i, 'DeliverySession'], 'EndTime'] for i in fulfilled_ind}

In [None]:
for k in range(1, len(history)+1, 3):
    if history.at[k+2, 'Status'] == 3:
        history.at[k+2, 'Time'] = (timedict_start[history.Order[k]] + 
                                   (timedict_end[history.Order[k]] - timedict_start[history.Order[k]]) * random.random())
    else:
        history.at[k+2, 'Time'] = random.choice(pd.date_range(shifts.StartTime.min(), shifts.EndTime.max(), freq="4H"))
    history.at[k+1, 'Time'] = history.at[k+2, 'Time'] - random.choice(pd.timedelta_range('00:20:00', '02:00:00', freq="7min"))
    history.at[k, 'Time'] = history.at[k+1, 'Time'] - pd.to_timedelta('00:05:00')

In [None]:
history

In [None]:
history.to_sql(name='orderstatushist', con=engine, if_exists='append')

In [None]:
# engine.dispose()