In [1]:
import pandas as pd
import random
from datetime import datetime, timedelta

In [3]:
random.seed(42)

In [23]:
file_path = "C:\\Users\\Tisha\\OneDrive\\Desktop\\models\\data\\Car-----Table.xlsx"

In [25]:
cars_df = pd.read_excel(file_path,engine="openpyxl")

In [27]:
num_rentals = 5000


In [29]:
start_date = datetime(2025, 3, 26, 0, 0, 0)
end_date = datetime(2025, 4, 30, 23, 59, 59)

In [31]:
# Dictionary to track car bookings to prevent double bookings
car_bookings = {}


In [53]:
# Generate rental data
rental_data = []
rented_car_ids = set()  # Track rented cars

for rental_id in range(1, num_rentals + 1):
    # Generate a random Pickup Location (City)
    pickup_location = random.choice(cars_df["City"].unique().tolist())

    # Filter cars available in that city
    available_cars = cars_df[cars_df["City"] == pickup_location]["Car_Id"].tolist()
    
    if not available_cars:
        continue  # Skip if no cars are available for this city

    # Shuffle available cars for randomness
    random.shuffle(available_cars)

    # Try to find an available car
    car_id = None
    rental_start_date = None
    return_date = None
    
    for candidate_car_id in available_cars:
        # Generate a random Rental Date within the specified range
        random_days = random.randint(0, (end_date - start_date).days)  # Random day within range
        random_hours = random.randint(0, 23)  # Random hour
        random_minutes = random.randint(0, 59)  # Random minute
        
        rental_start_date = start_date + timedelta(days=random_days, hours=random_hours, minutes=random_minutes)

        # Generate a random rental duration (between 1 hour to 7 days = 168 hours)
        rental_duration_hours = random.randint(1, 168)
        
        # Calculate return date
        return_date = rental_start_date + timedelta(hours=rental_duration_hours)

        # Check if car is already booked during this time
        if candidate_car_id in car_bookings:
            overlapping = any(
                not (return_date <= prev_rental["Rental_Date"] or rental_start_date >= prev_rental["Return_Date"])
                for prev_rental in car_bookings[candidate_car_id]
            )
            if overlapping:
                continue  # Skip this car as it is already booked

        # Car is available, assign it
        car_id = candidate_car_id
        break

    if not car_id:
        continue  # If no available car found, skip this rental

    # Get Car's Price per Hour
    price_per_hour = cars_df.loc[cars_df["Car_Id"] == car_id, "Price per Hour (INR)"].values[0]
    base_fare = cars_df.loc[cars_df["Car_Id"] == car_id, "Base_Fare"].values[0]
    
    # Calculate total amount
    total_amount = round(base_fare + (price_per_hour * rental_duration_hours), 2)

    # Store rental details
    rental_data.append([
        rental_id, car_id, random.randint(1000, 9999), pickup_location,
        rental_start_date.strftime("%Y-%m-%d %H:%M:%S"), return_date.strftime("%Y-%m-%d %H:%M:%S"),
        rental_duration_hours, total_amount
    ])

    # Update car booking record to avoid double booking
    if car_id not in car_bookings:
        car_bookings[car_id] = []
    car_bookings[car_id].append({"Rental_Date": rental_start_date, "Return_Date": return_date})

    # Track rented car
    rented_car_ids.add(car_id)

# Create DataFrame
rentals_df = pd.DataFrame(rental_data, columns=[
    "Rental_ID", "Car_ID", "User_ID", "Pickup_Location", 
    "Rental_Date", "Return_Date", "Duration_Hours", "Total_Amount"
])

In [63]:
rentals_df

Unnamed: 0,Rental_ID,Car_ID,User_ID,Pickup_Location,Rental_Date,Return_Date,Duration_Hours,Total_Amount
0,1,269,4941,Ranchi,2025-04-07 10:42:00,2025-04-12 05:42:00,115,38985
1,2,7544,8730,Kullu-Manali,2025-04-06 16:29:00,2025-04-10 23:29:00,103,22470
2,3,4683,9382,Puri,2025-04-14 19:52:00,2025-04-21 16:52:00,165,44485
3,4,358,7050,Ranchi,2025-04-13 00:48:00,2025-04-18 08:48:00,128,73684
4,5,5283,5821,Srinagar,2025-04-24 09:58:00,2025-04-24 15:58:00,6,3304
...,...,...,...,...,...,...,...,...
4995,4996,10755,9976,Mumbai,2025-04-08 05:54:00,2025-04-11 02:54:00,69,20181
4996,4997,8823,2495,Diu,2025-04-02 17:03:00,2025-04-07 23:03:00,126,23580
4997,4998,10191,9186,Indore,2025-03-29 02:36:00,2025-03-30 14:36:00,36,9828
4998,4999,7152,8319,Bhopal,2025-04-02 10:03:00,2025-04-04 19:03:00,57,12262


In [59]:
# Count unique Car_ID occurrences
car_counts = rentals_df["Duration_Hours"].value_counts()

# Display the result
print(car_counts)


Duration_Hours
46     47
2      46
44     43
25     40
1      40
       ..
150    18
131    18
167    16
162    16
139    16
Name: count, Length: 168, dtype: int64


In [65]:
# Group by Pickup_Location and rental_date and check for duplicate car_id entries
duplicates = rentals_df.duplicated(subset=['Pickup_Location', 'Rental_Date', 'Car_ID'], keep=False)

# Display duplicate records
duplicate_entries = rentals_df[duplicates]

# Print the number of duplicates found
print(f"Number of duplicate (Pickup_Location, Rental_Date, Car_ID) entries: {duplicate_entries.shape[0]}")
print(duplicate_entries.head())  # Show a few duplicate entries

Number of duplicate (Pickup_Location, Rental_Date, Car_ID) entries: 0
Empty DataFrame
Columns: [Rental_ID, Car_ID, User_ID, Pickup_Location, Rental_Date, Return_Date, Duration_Hours, Total_Amount]
Index: []
