In [2]:

import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# -----------------------------
# Step 1: Define base parameters

n_bookings = 1000
n_members = 200
n_books = 500

membership_levels = ['Bronze', 'Silver', 'Gold', 'Platinum']
genres = ['Fiction', 'Non-fiction', 'Sci-Fi', 'Romance', 'Mystery', 'Biography']

# -----------------------------
# Step 2: Generate Members table

member_ids = [f"M{str(i).zfill(4)}" for i in range(1, n_members + 1)]

df_members = pd.DataFrame({
    "Member_ID": member_ids,
    "Name": [f"Member_{i}" for i in range(1, n_members + 1)],
    "Age": np.random.randint(16, 70, n_members),
    "Membership_Level": np.random.choice(membership_levels, n_members, p=[0.4, 0.3, 0.2, 0.1]),
    "Join_Date": [datetime(2020, 1, 1) + timedelta(days=np.random.randint(0, 1000)) for _ in range(n_members)]
})

# Save to CSV
df_members.to_csv("Members.csv", index=False)
print(" 'Members.csv' created successfully.")

# -----------------------------
# Step 3: Generate Books table

book_ids = [f"B{str(i).zfill(4)}" for i in range(1, n_books + 1)]

df_books = pd.DataFrame({
    "Book_ID": book_ids,
    "Title": [f"Book Title {i}" for i in range(1, n_books + 1)],
    "Author": [f"Author {i}" for i in range(1, n_books + 1)],
    "Genre": np.random.choice(genres, n_books),
    "Publication_Year": np.random.randint(1980, 2023, n_books)
})

# Save to CSV
df_books.to_csv("Books.csv", index=False)
print(" 'Books.csv' created successfully.")

# -----------------------------
# Step 4: Generate Bookings table

n = n_bookings
member_data = np.random.choice(member_ids, n)
book_data = np.random.choice(book_ids, n)
membership_data = np.random.choice(membership_levels, n, p=[0.4, 0.3, 0.2, 0.1])

start_dates = [datetime(2023, 1, 1) + timedelta(days=int(np.random.randint(0, 365))) for _ in range(n)]
durations = np.random.randint(1, 30, n)
due_dates = [s + timedelta(days=14) for s in start_dates]
end_dates = [s + timedelta(days=int(d)) for s, d in zip(start_dates, durations)]

late_fees = [max(0, (end - due).days * np.random.uniform(0.1, 0.5)) if end > due else 0 for end, due in zip(end_dates, due_dates)]
ratings = np.round(np.random.uniform(1, 5, n), 1)

df_bookings = pd.DataFrame({
    "Booking_ID": range(1, n + 1),
    "Member_ID": member_data,
    "Book_ID": book_data,
    "Membership_Level": membership_data,
    "Start_Date": start_dates,
    "Due_Date": due_dates,
    "End_Date": end_dates,
    "Duration_Days": durations,
    "Late_Fee": late_fees,
    "User_Rating": ratings
})

# Introduce some missing values
for col in ['End_Date', 'Late_Fee', 'User_Rating']:
    mask = np.random.rand(n) < 0.05
    df_bookings.loc[mask, col] = np.nan

df_bookings['Status'] = np.where(df_bookings['End_Date'].isna(), 'Ongoing', 'Returned')
df_bookings['Is_Overdue'] = np.where(
    (df_bookings['Status'] == 'Ongoing') & (pd.to_datetime('today') > df_bookings['Due_Date']),
    1,
    np.where(df_bookings['End_Date'] > df_bookings['Due_Date'], 1, 0)
)

# Save to CSV
df_bookings.to_csv("Bookings.csv", index=False)
print(" 'Bookings.csv' created successfully.")

print("\nAll CSV files created successfully!")


 'Members.csv' created successfully.
 'Books.csv' created successfully.
 'Bookings.csv' created successfully.

All CSV files created successfully!
