In [43]:
# # Importing libraries for file handling 
!pip install pandas



In [44]:
# Installing the Faker library to generate fake data for testing and simulations  
!pip install faker



In [45]:
# Importing necessary libraries for data handling, fake data generation, randomization, and date manipulation  
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta

# Initializing Faker to Generate Synthetic Data

In [46]:
# Initialize Faker for generating synthetic data
faker = Faker("en_IN")

# Number of records for each table
NUM_CUSTOMERS = 200
NUM_RESTAURANTS = 80
NUM_ORDERS = 400
NUM_DELIVERIES = 400
NUM_DELIVERY_PERSONS = 40


# Data Generation for Customers Dataset

In [47]:
# Function to generate Customers dataset
def generate_customers(num_records):
    customers = []
    for i in range(1, num_records + 1):
        customers.append({
            "customer_id": f"C{i:03}",
            "name": (name := faker.first_name_male() if random.choice([True, False]) else faker.first_name_female()),
            "email": f"{name.lower()}@gmail.com",
            "phone": random.choice([6, 7, 8, 9]) * 1000000000 + random.randint(10000000, 99999999),
            "location": f"{faker.street_address()}, {faker.city()}, {faker.state().upper()}",
            "signup_date": faker.date_this_decade(),
            "is_premium": faker.boolean(),
            "preferred_cuisine": random.choice(["indian", "chinese", "italian", "mexican"]),
            "total_orders": random.randint(1, 10),
            "average_rating": round(random.uniform(1, 5), 1)
        })
    return pd.DataFrame(customers)


# Data Generation for Restaurants Dataset

In [48]:
# Function to generate Restaurants dataset
def generate_restaurants(num=80):
    restaurants = []
    first_words = [
        "Saffron", "Annapurna", "Rasa", "Paneer", "Golden", "Naan", "Chaat",
        "Dosa", "Tandoori", "Biryani", "Spice", "Curry", "Mithai", "Sizzler",
        "Flavors", "Garam", "Masala", "Tandoor", "Roti", "Chai", "Kebab"
    ]
    second_words = [
        "Spice", "Palace", "Kitchen", "Bistro", "Delight", "Royale", "Corner",
        "Bliss", "Haven", "House", "Co", "Tale", "Treats", "World", "Stop",
        "Spot", "Station", "Express", "Kingdom", "Place", "Mahal"
    ]
    for i in range(1, num + 1):
        name = f"{random.choice(first_words)} {random.choice(second_words)}"
        restaurants.append({
            "restaurant_id": f"R{i:03}",
            "name": name,
            "cuisine_type": random.choice(["Indian", "Chinese", "Italian", "Mexican", "Thai", "Continental"]),
            "location": f"{faker.city()}, {faker.state().upper()}",  # Only city, state
            "owner_name": faker.name(),
            "average_delivery_time(min)": random.randint(20, 60),
            "contact_number": random.choice([6, 7, 8, 9]) * 1000000000 + random.randint(10000000, 99999999),
            "rating": round(random.uniform(1, 5), 1),
            "total_orders": random.randint(5, 10),
            "is_active": faker.boolean()
        })
    return pd.DataFrame(restaurants)


# Data Generation for Orders Dataset

In [49]:
# Function to generate Orders dataset
def generate_orders(customers_df, restaurants_df, num=400):
    orders = []
    for i in range(1, num + 1):
        customer = customers_df.sample().iloc[0]
        restaurant = restaurants_df.sample().iloc[0]
        order_date = faker.date_this_year()
        random_hour = random.randint(1, 12)
        random_minute = random.randint(0, 59)
        period = random.choice(["AM", "PM"])
        delivery_time = f"{random_hour:02}:{random_minute:02} {period}"

        orders.append({
            "order_id": f"O{i:03}",
            "customer_id": customer["customer_id"],
            "restaurant_id": restaurant["restaurant_id"],
            "order_date": order_date,
            "delivery_time": delivery_time,
            "status": random.choice(["Pending", "Delivered", "Cancelled"]),
            "total_amount": round(random.uniform(100, 1000), 2),
            "payment_mode": random.choice(["Credit Card", "Cash", "UPI"]),
            "discount_applied": random.randint(50, 150),
            "feedback_rating": round(random.uniform(1, 5), 1)
        })
    return pd.DataFrame(orders)


# Data Generation for delivery_person Dataset

In [50]:
# Function to generate delivery_person dataset
def generate_delivery_persons(num=40):
    delivery_persons = []
    for i in range(1, num + 1):
        delivery_persons.append({
            "delivery_person_id": f"D{i:03}",
            "name": faker.first_name_male(),
            "contact_number": random.choice([6, 7, 8, 9]) * 1000000000 + random.randint(10000000, 99999999),
            "vehicle_type": random.choice(["Bike", "Car"]),
            "total_deliveries": random.randint(5, 15),
            "average_rating": round(random.uniform(1, 5), 1),
            "location": f"{faker.city()}, {faker.state().upper()}"  
        })
    return pd.DataFrame(delivery_persons)


# Data Generation for deliveries  Dataset

In [51]:
# Function to generate deliveries dataset
def generate_deliveries(orders_df, delivery_persons_df, num=400):
    deliveries = []
    for i in range(1, num + 1):
        order = orders_df.sample().iloc[0]
        delivery_person = delivery_persons_df.sample().iloc[0]
        distance = random.randint(2, 15)
        delivery_time = random.randint(15, 60)
        estimated_time = random.randint(20, 60)

        deliveries.append({
            "delivery_id": f"DLY{i:03}",
            "order_id": order["order_id"],
            "delivery_person_id": delivery_person["delivery_person_id"],
            "delivery_status": random.choice(["On the way", "Delivered"]),
            "distance(km)": distance,
            "delivery_time(min)": delivery_time,
            "estimated_time(min)": estimated_time,
            "delivery_fee": random.randint(30, 150),
            "vehicle_type": delivery_person["vehicle_type"]
        })
    return pd.DataFrame(deliveries)


# Generate and Save Datasets to CSV Files

In [52]:
# Generate datasets
customers_df = generate_customers(NUM_CUSTOMERS)
restaurants_df = generate_restaurants(NUM_RESTAURANTS)
orders_df = generate_orders(customers_df, restaurants_df, NUM_ORDERS)
delivery_persons_df = generate_delivery_persons(NUM_DELIVERY_PERSONS)
deliveries_df = generate_deliveries(orders_df, delivery_persons_df, NUM_DELIVERIES)

# Save datasets into CSV files
customers_df.to_csv('customers.csv', index=False)
restaurants_df.to_csv("restaurants.csv", index=False)
orders_df.to_csv("orders.csv", index=False)
delivery_persons_df.to_csv("delivery_persons.csv", index=False)
deliveries_df.to_csv("deliveries.csv", index=False)


In [53]:
customers_df.head()

Unnamed: 0,customer_id,name,email,phone,location,signup_date,is_premium,preferred_cuisine,total_orders,average_rating
0,C001,Timothy,timothy@gmail.com,7077107836,"H.No. 591, Gour Marg, Cuttack, ODISHA",2023-01-02,False,italian,6,3.6
1,C002,Gunbir,gunbir@gmail.com,9088685024,"88/002, Morar Circle, Sultan Pur Majra, GOA",2021-02-16,True,chinese,4,3.4
2,C003,Qadim,qadim@gmail.com,9088487610,"778, Choudhary Nagar, Gurgaon, SIKKIM",2023-04-25,True,mexican,7,2.3
3,C004,Leela,leela@gmail.com,7094534770,"03\nWason Ganj, Bhubaneswar, HARYANA",2020-11-04,False,mexican,8,2.5
4,C005,Hemangini,hemangini@gmail.com,9084382786,"52/91\nBorra Nagar, Proddatur, GOA",2020-02-27,True,mexican,2,3.3


In [54]:
customers_df

Unnamed: 0,customer_id,name,email,phone,location,signup_date,is_premium,preferred_cuisine,total_orders,average_rating
0,C001,Timothy,timothy@gmail.com,7077107836,"H.No. 591, Gour Marg, Cuttack, ODISHA",2023-01-02,False,italian,6,3.6
1,C002,Gunbir,gunbir@gmail.com,9088685024,"88/002, Morar Circle, Sultan Pur Majra, GOA",2021-02-16,True,chinese,4,3.4
2,C003,Qadim,qadim@gmail.com,9088487610,"778, Choudhary Nagar, Gurgaon, SIKKIM",2023-04-25,True,mexican,7,2.3
3,C004,Leela,leela@gmail.com,7094534770,"03\nWason Ganj, Bhubaneswar, HARYANA",2020-11-04,False,mexican,8,2.5
4,C005,Hemangini,hemangini@gmail.com,9084382786,"52/91\nBorra Nagar, Proddatur, GOA",2020-02-27,True,mexican,2,3.3
...,...,...,...,...,...,...,...,...,...,...
195,C196,Manan,manan@gmail.com,9084312287,"H.No. 882, Bhatnagar Chowk, Gulbarga, MANIPUR",2022-12-10,True,mexican,9,4.9
196,C197,Upkaar,upkaar@gmail.com,7062984042,"43/61, Kapur Street, Bhiwani, GUJARAT",2021-10-29,False,chinese,8,4.9
197,C198,Alexander,alexander@gmail.com,8019316128,"205, Baral, Thoothukudi, KERALA",2022-11-04,True,italian,1,4.5
198,C199,Samaksh,samaksh@gmail.com,9030319779,"06/89\nWagle, Nellore, ANDHRA PRADESH",2022-01-22,False,italian,10,1.9


In [55]:
restaurants_df

Unnamed: 0,restaurant_id,name,cuisine_type,location,owner_name,average_delivery_time(min),contact_number,rating,total_orders,is_active
0,R001,Garam Palace,Thai,"Sikar, TRIPURA",Tanay Bhargava,35,9082703425,4.9,5,False
1,R002,Tandoor Place,Chinese,"Adoni, GOA",Wazir Kale,36,7031727188,2.3,6,False
2,R003,Mithai Corner,Mexican,"Imphal, GUJARAT",Yadavi Lal,23,7095149150,3.4,5,False
3,R004,Sizzler Royale,Continental,"Baranagar, ANDHRA PRADESH",Ekta Desai,35,7025638466,2.7,10,True
4,R005,Flavors Express,Chinese,"Asansol, NAGALAND",Gaurang Issac,35,7013689628,4.6,5,False
...,...,...,...,...,...,...,...,...,...,...
75,R076,Dosa Kitchen,Chinese,"Tezpur, BIHAR",Vedant Bora,29,7096620767,4.4,9,True
76,R077,Tandoori Bistro,Indian,"Gandhinagar, MIZORAM",Anay D’Alia,44,9017542224,3.5,8,False
77,R078,Mithai Spice,Chinese,"Silchar, UTTARAKHAND",Yashawini Bhatti,40,7067639289,3.9,5,False
78,R079,Biryani Royale,Italian,"Faridabad, HARYANA",Ekalinga Khalsa,20,6039510994,3.9,9,False


In [56]:
restaurants_df.iloc[0:50]

Unnamed: 0,restaurant_id,name,cuisine_type,location,owner_name,average_delivery_time(min),contact_number,rating,total_orders,is_active
0,R001,Garam Palace,Thai,"Sikar, TRIPURA",Tanay Bhargava,35,9082703425,4.9,5,False
1,R002,Tandoor Place,Chinese,"Adoni, GOA",Wazir Kale,36,7031727188,2.3,6,False
2,R003,Mithai Corner,Mexican,"Imphal, GUJARAT",Yadavi Lal,23,7095149150,3.4,5,False
3,R004,Sizzler Royale,Continental,"Baranagar, ANDHRA PRADESH",Ekta Desai,35,7025638466,2.7,10,True
4,R005,Flavors Express,Chinese,"Asansol, NAGALAND",Gaurang Issac,35,7013689628,4.6,5,False
5,R006,Annapurna Kitchen,Thai,"Jhansi, PUNJAB",Saumya Bora,47,8070623985,2.2,7,True
6,R007,Flavors Tale,Continental,"Dehradun, PUNJAB",Janaki Bhargava,38,6063976605,2.4,10,False
7,R008,Dosa Co,Thai,"Khammam, BIHAR",Widisha Kapur,60,9044719028,1.5,10,True
8,R009,Sizzler Kitchen,Italian,"Anantapur, KARNATAKA",Chandresh Hegde,30,6097068652,2.6,6,True
9,R010,Flavors Spice,Indian,"Akola, GOA",Advik Bhatt,41,9094887630,4.0,7,True


In [57]:
orders_df.iloc[34:50]

Unnamed: 0,order_id,customer_id,restaurant_id,order_date,delivery_time,status,total_amount,payment_mode,discount_applied,feedback_rating
34,O035,C176,R060,2025-01-17,08:46 AM,Delivered,424.67,UPI,65,1.7
35,O036,C116,R072,2025-01-11,04:12 AM,Cancelled,556.22,Credit Card,112,3.9
36,O037,C088,R051,2025-01-22,11:47 PM,Cancelled,949.01,Credit Card,113,1.0
37,O038,C188,R020,2025-01-10,07:16 PM,Delivered,380.27,Cash,71,2.9
38,O039,C133,R060,2025-01-08,03:19 PM,Pending,343.43,UPI,129,3.2
39,O040,C020,R001,2025-01-19,10:34 PM,Pending,854.15,UPI,143,2.8
40,O041,C101,R026,2025-01-15,12:55 PM,Cancelled,330.86,Cash,89,1.3
41,O042,C005,R051,2025-01-31,09:00 PM,Pending,326.34,UPI,113,3.4
42,O043,C105,R032,2025-01-27,11:49 AM,Pending,131.62,UPI,85,2.3
43,O044,C050,R063,2025-01-01,09:26 PM,Pending,648.6,Cash,81,1.8


In [58]:
orders_df.iloc[340:370]

Unnamed: 0,order_id,customer_id,restaurant_id,order_date,delivery_time,status,total_amount,payment_mode,discount_applied,feedback_rating
340,O341,C152,R051,2025-01-14,03:25 PM,Delivered,548.76,UPI,108,4.6
341,O342,C175,R066,2025-01-11,08:38 PM,Cancelled,513.36,Credit Card,114,2.7
342,O343,C049,R062,2025-01-20,10:01 AM,Cancelled,330.44,Cash,134,4.2
343,O344,C119,R033,2025-02-01,11:50 PM,Delivered,377.24,UPI,101,3.8
344,O345,C043,R069,2025-01-13,09:30 PM,Pending,228.69,Credit Card,80,2.6
345,O346,C030,R044,2025-01-29,11:33 PM,Pending,215.2,UPI,119,1.3
346,O347,C046,R010,2025-01-13,03:30 AM,Delivered,799.74,Cash,71,1.6
347,O348,C105,R022,2025-01-20,04:46 PM,Delivered,440.19,UPI,122,3.9
348,O349,C152,R005,2025-02-04,02:17 AM,Delivered,375.52,UPI,89,1.0
349,O350,C107,R079,2025-01-11,07:12 PM,Pending,291.19,Credit Card,57,4.0


In [59]:
delivery_persons_df

Unnamed: 0,delivery_person_id,name,contact_number,vehicle_type,total_deliveries,average_rating,location
0,D001,Ayaan,8014794818,Bike,8,4.4,"Bilaspur, UTTARAKHAND"
1,D002,Udarsh,6063479590,Car,7,4.4,"Gulbarga, MIZORAM"
2,D003,Nachiket,8022813378,Bike,14,3.8,"Kakinada, PUNJAB"
3,D004,Rushil,7041895700,Car,6,4.4,"Naihati, MEGHALAYA"
4,D005,Teerth,6053687095,Car,6,3.5,"Bhalswa Jahangir Pur, ARUNACHAL PRADESH"
5,D006,Harrison,8063919269,Bike,9,1.6,"Kharagpur, MIZORAM"
6,D007,Jai,9093333241,Bike,13,3.9,"Muzaffarnagar, MIZORAM"
7,D008,Atharv,6086982044,Bike,10,1.4,"Gangtok, TAMIL NADU"
8,D009,Jatin,8043708210,Bike,11,4.5,"Jaipur, MIZORAM"
9,D010,Pranit,9022759401,Car,10,4.1,"Nellore, ODISHA"


In [60]:
deliveries_df

Unnamed: 0,delivery_id,order_id,delivery_person_id,delivery_status,distance(km),delivery_time(min),estimated_time(min),delivery_fee,vehicle_type
0,DLY001,O059,D006,Delivered,4,50,47,134,Bike
1,DLY002,O044,D020,Delivered,3,38,53,105,Bike
2,DLY003,O128,D018,On the way,14,30,50,64,Car
3,DLY004,O040,D024,On the way,14,43,59,79,Bike
4,DLY005,O056,D019,On the way,2,41,37,130,Bike
...,...,...,...,...,...,...,...,...,...
395,DLY396,O159,D023,Delivered,9,43,57,42,Car
396,DLY397,O212,D024,Delivered,5,21,42,141,Bike
397,DLY398,O040,D031,Delivered,3,16,25,91,Bike
398,DLY399,O109,D015,Delivered,9,41,52,104,Car
