## Generate Data

#### Store Data
Generate store front data

In [6]:
import folium
import numpy as np
import pandas as pd

In [196]:
class BaseEntity:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

    def __repr__(self):
        attrs = ', '.join(f"{key}={value}" for key, value in self.__dict__.items())
        return f"{self.__class__.__name__}({attrs})"
    
    def showDataFrame(self):
        return pd.DataFrame([self.__dict__])


class Store(BaseEntity):
    all_stores = []  # Class-level list to store all instances of Store

    def __init__(self, store_id, latitude, longitude, store_name, opendt):
        super().__init__(STORE_ID=store_id, 
                         LATITUDE=latitude, 
                         LONGITUDE=longitude, 
                         STORE_NAME=store_name, 
                         OPENDT=opendt)
        Store.all_stores.append(self)  # Add each instance to the class-level list

    def __repr__(self):
        return f"Store(store_id={self.STORE_ID}, lat={self.LATITUDE}, lon={self.LONGITUDE}, name={self.STORE_NAME}, opendt={self.OPENDT})"

    @classmethod
    def display_all_stores(cls):
        """
        Display a DataFrame of all store instances.
        """
        return pd.DataFrame([store.__dict__ for store in cls.all_stores])
    

class Customer(BaseEntity):
    all_customers = []  # Class-level list to store all instances of Customers
    def __init__(self, customer_id, lat, lon, firstname, lastname, homestoreID):
        super().__init__(customer_id=customer_id, lat=lat, lon=lon, firstname=firstname, lastname=lastname, homestoreID=homestoreID)
        Customer.all_customers.append(self)  # Add each instance to the class-level list

    def __repr__(self):
        return f"Customer(customer_id={self.customer_id}, lat={self.lat}, lon={self.lon}, name={self.firstname} {self.lastname}, homestoreID={self.homestoreID})"
    
    @classmethod
    def display_all_customers(cls):
        """
        Display a DataFrame of all customer instances.
        """
        return pd.DataFrame([cust.__dict__ for cust in cls.all_customers])
    
class Order(BaseEntity):
    all_orders = []  # Class-level list to store all instances of Orders
    def __init__(self, order_id, customer_id, store_id, order_date, order_total):
        super().__init__(order_id=order_id, customer_id=customer_id, store_id=store_id, order_date=order_date, order_total=order_total)

    @classmethod
    def display_all_orders(cls):
        """
        Display a DataFrame of all order instances.
        """
        return pd.DataFrame([order.__dict__ for order in cls.all_orders])

#### Generate random store locations

In [197]:
min_lat, max_lat = 33.0633, 33.2182
min_lon, max_lon = -96.9162, -96.6718

num_stores = 10

import random
random.seed(1033)  # For reproducibility

In [205]:
# Generate random store locations within the bounding box


Store.all_stores = []  # Reset the class-level list to avoid duplicates
name_prefix = "Panucci's Pizza - "
store_ids = [random.randint(0, 9999) for _ in range(num_stores)]
store_locations = [
    Store(
        store_id=store_id,
        latitude=random.uniform(min_lat, max_lat),
        longitude=random.uniform(min_lon, max_lon),
        store_name=f"{name_prefix}{store_id}",
        opendt=f"{random.randint(2010, 2023)}-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}"
    ).__dict__ for store_id in store_ids
]

Store.display_all_stores()

Unnamed: 0,STORE_ID,LATITUDE,LONGITUDE,STORE_NAME,OPENDT
0,6032,33.207601,-96.704979,Panucci's Pizza - 6032,2023-03-24
1,9748,33.125917,-96.706469,Panucci's Pizza - 9748,2011-05-26
2,424,33.148394,-96.820777,Panucci's Pizza - 424,2013-01-04
3,5562,33.171431,-96.725935,Panucci's Pizza - 5562,2017-10-28
4,9693,33.160777,-96.83704,Panucci's Pizza - 9693,2017-06-05
5,1356,33.160343,-96.825862,Panucci's Pizza - 1356,2014-08-09
6,3303,33.16819,-96.786738,Panucci's Pizza - 3303,2014-06-01
7,2089,33.108848,-96.820273,Panucci's Pizza - 2089,2012-08-06
8,848,33.187996,-96.7257,Panucci's Pizza - 848,2014-12-24
9,777,33.211547,-96.831201,Panucci's Pizza - 777,2018-12-05


In [199]:
# # Create a map centered around Frisco, TX
# map_center = [np.mean([min_lat,max_lat]), np.mean([min_lon,max_lon])]
# m = folium.Map(location=map_center, zoom_start=12, tiles='cartodbdark_matter')

# # Add markers for each store location
# for location in Store.all_stores:
#     # Create a marker for each store location
#     # Add a popup with the store name and open date
#     folium.Marker(
#         [location.LATITUDE, location.LONGITUDE],
#         popup=folium.Popup(f"{location.STORE_NAME}", max_width=1000),
#         tooltip=location.STORE_NAME,
#     ).add_to(m)

# m

#### Generate customer locations for each store

In [200]:
num_customers_per_store = (20,50)

radius_range = (0.5, 2.0)  # in km

In [201]:
from scipy.stats import gamma
import numpy as np

# Predefined list of first names
first_names = ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Hannah", "Ivy", "Jack", "Kathy", "Liam", "Mona", "Nathan", "Olivia", "Paul", "Quincy", "Rachel", "Steve", "Tina"]
# Predefined list of Pokémon names
pokemon_last_names = ["Pikachu", "Charmander", "Bulbasaur", "Squirtle", "Jigglypuff", "Meowth", "Psyduck", "Snorlax", "Eevee", "Mewtwo"]


def generate_customers(store_location, search_radius, num_customers, store_id=None):
    """
    Generate customers around a store location using a gamma-gamma distribution.

    :param store_location: Tuple of (latitude, longitude) for the store location.
    :param search_radius: Maximum search radius in kilometers.
    :param num_customers: Number of customers to generate.
    :param alpha: Shape parameter for the gamma distribution (default: 2.0).
    :param beta: Scale parameter for the gamma distribution (default: 2.0).
    :param store_id: ID of the store to associate customers with.
    :return: List of Customer objects.
    """
    customers = []
    for i in range(num_customers):
        # Generate a random distance using the exponential distribution
        distance = np.random.exponential(scale=search_radius / 2)
        # Generate a random bearing (angle in radians)
        bearing = np.random.uniform(0, 2 * np.pi)
        # Calculate the new latitude and longitude
        delta_lat = distance * np.cos(bearing) / 111  # Approx. conversion of km to degrees latitude
        delta_lon = distance * np.sin(bearing) / (111 * np.cos(np.radians(store_location[0])))  # Adjust for longitude
        customer_lat = store_location[0] + delta_lat
        customer_lon = store_location[1] + delta_lon
        # Create a Customer object
        customer_id = f"{random.randint(0, 9999)}"
        first_name = random.choice(first_names)
        last_name = random.choice(pokemon_last_names)
        customers.append(Customer(customer_id, customer_lat, customer_lon, first_name, last_name, store_id))
    return customers

In [206]:
# Generate customers for each store
Customer.all_customers = []  # Reset the class-level list to avoid duplicates


for store in Store.all_stores:
    store_location = (store.LATITUDE, store.LONGITUDE)
    num_customers = random.randint(*num_customers_per_store)  # Randomly choose the number of customers within the range
    search_radius_km = random.uniform(*radius_range)  # Randomly choose the search radius within the range

    generate_customers(store_location, search_radius_km, num_customers, store_id=store.STORE_ID)


In [207]:
Customer.display_all_customers()

Unnamed: 0,customer_id,lat,lon,firstname,lastname,homestoreID
0,1551,33.208645,-96.704015,Nathan,Eevee,6032
1,8031,33.211886,-96.705498,Tina,Pikachu,6032
2,1074,33.206774,-96.703557,Charlie,Bulbasaur,6032
3,1235,33.206243,-96.708854,Steve,Psyduck,6032
4,104,33.208338,-96.700365,Jack,Mewtwo,6032
...,...,...,...,...,...,...
320,1707,33.211456,-96.831179,Eve,Squirtle,777
321,2679,33.211386,-96.833646,Eve,Eevee,777
322,4385,33.215273,-96.821671,Jack,Eevee,777
323,2120,33.204620,-96.818327,Paul,Eevee,777


#### Transactional data
Generate random transactional data for each store

In [211]:
def generate_gaussian_value(mean, std, floor):
    """
    Generate a random value based on a Gaussian distribution.

    :param mean: Mean of the Gaussian distribution.
    :param std: Standard deviation of the Gaussian distribution.
    :param floor: Minimum value (floor) for the generated value.
    :return: A random value from the Gaussian distribution, floored at the specified minimum value.
    """
    value = np.random.normal(loc=mean, scale=std)
    return round(max(value, floor), 2)  # Round to 2 decimal places and ensure it's not below the floor value

In [217]:
num_orders_per_customer = (3, 25)

Order.all_orders = []  # Reset the class-level list to avoid duplicates

for customer in Customer.all_customers:
    num_orders = random.randint(*num_orders_per_customer)  # Randomly choose the number of orders for each customer
    for _ in range(num_orders):
        order_id = f"{random.randint(0, 99999)}"
        order_date = f"{random.randint(2020, 2023)}-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}"
        order_total = generate_gaussian_value(mean=30, std=10, floor=5)  # Generate a random order total
        Order.all_orders.append(Order(order_id, customer.customer_id, customer.homestoreID, order_date, order_total))

In [218]:
Order.display_all_orders()

Unnamed: 0,order_id,customer_id,store_id,order_date,order_total
0,11682,1551,6032,2022-12-16,42.71
1,8493,1551,6032,2022-07-28,25.84
2,67739,1551,6032,2020-05-27,11.05
3,97015,1551,6032,2022-01-15,35.93
4,13986,1551,6032,2021-02-09,36.13
...,...,...,...,...,...
4657,99009,1966,777,2023-01-28,35.74
4658,87536,1966,777,2021-09-20,26.06
4659,27853,1966,777,2020-08-22,33.66
4660,44782,1966,777,2022-10-20,37.62
