In [132]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)


*Define tables*

In [133]:
description = pd.DataFrame(columns=["description_id", "description"])
author = pd.DataFrame(columns=["author_id", "name"])
category = pd.DataFrame(columns=["category_id", "category_name"])
book = pd.DataFrame(columns=["ISBN", "title", "publication_year", "description_id"])
book_author = pd.DataFrame(columns=["ISBN", "author_id"])
book_category = pd.DataFrame(columns=["ISBN", "category_id"])
book_copy = pd.DataFrame(columns=["copy_id", "status", "condition", "price", "ISBN"])
member = pd.DataFrame(columns=["member_id", "fname", "lname", "email", "phone", "city", "street", "bdate"])
staff = pd.DataFrame(columns=["staff_id", "fname", "lname", "email", "phone", "role", "password", "salary"])
reservation = pd.DataFrame(columns=["reservation_id", "member_id", "staff_id", "reservation_date", "expiration_date", "returned_at"])
reservation_details = pd.DataFrame(columns=["reservation_id", "copy_id", "position_in_queue"])

*Read books1 data*

In [134]:
books_data1 = pd.read_csv(
    "C:/Users/mekod/Desktop/Database_project/Data/Kaggle datasets/books1.csv",
    sep=";",
    quotechar='"',
    encoding="cp1252",
    on_bad_lines="skip"
)
books_data1 = books_data1.loc[books_data1['ISBN'].notna()]
books_data1 = books_data1.iloc[:2500, :3]
books_data1 = books_data1.reset_index(drop=True)

  books_data1 = pd.read_csv(


*Read books2 data*

In [135]:
import pandas as pd

books_data2 = pd.read_csv(
    "C:/Users/mekod/Desktop/Database_project/Data/Kaggle datasets/books2.csv",
    sep=",",      
    quotechar='"',      
    encoding="utf-8",  
    on_bad_lines="skip" 
)
books_data2 = books_data2.loc[books_data2['Description'].notna()]
books_data2.drop_duplicates(inplace=True)
books_data2 = books_data2.reset_index(drop=True)
books_data2 = books_data2.iloc[:2500]
books_data2 = books_data2.reset_index(drop=True)


In [136]:
books_data2['ISBN'] = books_data1['ISBN']
books_data2['description_id'] = range(1, len(books_data2) + 1)

# description table

In [137]:
description = books_data2[["description_id", "Description"]]
book = books_data2[["ISBN", "Title", "Publish Date (Year)", "description_id"]]

In [138]:
books_data2['Authors'] = books_data2['Authors'].str.replace(r'^By\s+', '', regex=True)
books_data2['Authors_list'] = books_data2['Authors'].str.split(",")
books_data2['Category_list'] = books_data2['Category'].str.split(",")

In [139]:
books_expanded = (
    books_data2
    .explode("Authors_list")    
    .explode("Category_list")   
)

books_expanded['Authors_list'] = books_expanded['Authors_list'].str.strip()
books_expanded['Category_list'] = books_expanded['Category_list'].str.strip()

books_data2["Authors_list"] = books_data2["Authors_list"].str.strip()
books_data2["Category_list"] = books_data2["Category_list"].str.strip()

book_author = books_expanded[["ISBN", "Authors_list"]].dropna().drop_duplicates().reset_index(drop=True)
book_category = books_expanded[["ISBN", "Category_list"]].dropna().drop_duplicates(subset=['ISBN', 'Category_list']).reset_index(drop=True)

# Author table

In [140]:
author = pd.DataFrame(book_author['Authors_list'].drop_duplicates().reset_index(drop=True), columns=['Authors_list'])
author['author_id'] = author.index + 1

book_author = book_author.merge(
    author,
    on='Authors_list',
    how='left'
)


# Category table

In [141]:
category = pd.DataFrame(
    book_category['Category_list']
    .drop_duplicates()
    .reset_index(drop=True),
    columns=['Category_list']
)

category['category_id'] = category.index + 1


book_category = book_category.merge(
    category,
    left_on='Category_list',
    right_on='Category_list', 
    how='left'
)

book_category = book_category[['ISBN', 'category_id']].drop_duplicates().reset_index(drop=True)

**Rename columns**

In [142]:
description.rename(columns={"Description": "description"}, inplace=True)
book.rename(columns={"Title": "title", "Publish Date (Year)" : "publication_year" }, inplace= True)
author.rename(columns={"Authors_list": "name"}, inplace= True)
category.rename(columns={"Category_list": "category_name"}, inplace= True)


# Member Table

This code generates **3,000 library members** with randomized details:

- **Fields Generated**:
  - `member_id`: Unique identifier
  - `fname` / `lname`: Random first and last names
  - `email`: Unique email
  - `phone`: Unique phone number
  - `city` / `street`: Random Egyptian city and street
  - `bdate`: Random date of birth (ages 18–80)

**Output**:

- `member`


In [143]:
from faker import Faker

fake = Faker('ar_EG')

emails = set()
phones = set()

member = []
for i in range(3000):
    email = fake.unique.email()
    phone = fake.unique.phone_number()
    member.append({
        "member_id": i + 1,
        "fname": fake.first_name(),
        "lname": fake.last_name(),
        "email": email,
        "phone": phone,
        "city": fake.city(),
        "street": fake.street_address(),
        "bdate": fake.date_of_birth(minimum_age=18, maximum_age=80)
    })
    
member = pd.DataFrame(member)


# Staff Table

This code generates **100 staff members** with randomized details:

- **Manager Assignment**: First 3 staff are assigned the role `Manager`.
- **Other Roles**: Remaining staff randomly assigned `Librarian`, `Assistant`, or `Technician`.
- **Fields Generated**:
  - `staff_id`: Unique identifier
  - `fname` / `lname`: Random first and last names
  - `email`: Unique email
  - `phone`: Unique phone number
  - `role`: Staff role (Manager or other roles)
  - `password`: Random 8-character alphanumeric string
  - `salary`: Random float between 5000 and 15000

**Output**:

- `staff` 


In [144]:
import random
import string

fake = Faker('ar_EG')

other_roles = [ 'Librarian', 'Assistant', 'Technician']

staff = []

for i in range(100):
    email = fake.unique.email()
    phone = fake.unique.phone_number()
    password = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
    salary = round(random.uniform(5000, 15000), 2)
    if i < 3:
        role = 'Manager'
    else:
        role = random.choice(other_roles)
    
    staff.append({
        "staff_id": i + 1,
        "fname": fake.first_name(),
        "lname": fake.last_name(),
        "email": email,
        "phone": phone,
        "role": role,
        "password": password,
        "salary": salary
    })

staff = pd.DataFrame(staff)

# Book_Copy Table

## Generate Book Copies

This code creates 5,000 book copies with randomized attributes:

- **ISBN**: Randomly selected from the `book` table.
- **Status**: One of `Available`, `Reserved`, or `Checked Out`.
- **Condition**: One of `New`, `Good`, `Fair`, or `Poor`.
- **Price**: Random value between 20 and 150.
- **copy_id**: Unique identifier for each copy.

**Output**:

- `book_copy` 


In [145]:
isbn_list = book['ISBN'].tolist()  
statuses = ['Available', 'Reserved', 'Checked Out']
conditions = ['New', 'Good', 'Fair', 'Poor']

book_copies = []

num_copies = 5000 

for i in range(num_copies):
    isbn = random.choice(isbn_list) 
    status = random.choice(statuses)
    condition = random.choice(conditions)
    price = round(random.uniform(20, 150), 2)
    
    book_copies.append({
        "copy_id": i + 1,
        "status": status,
        "condition": condition,
        "price": price,
        "ISBN": isbn
    })

book_copy = pd.DataFrame(book_copies)


# Reservation Table

This script generates reservations and reservation details for library members:

- Each member makes 1–2 reservations, handled only by Assistants.
- Each reservation has a start date, expiration date, and returned_at (80% chance returned).
- Each reservation gets 1–3 book copies, with a max queue of 5 per copy.
- Returned books free up copies for new reservations.
- `book_copy` status is updated dynamically: Checked Out if not returned, Available otherwise.

**Output:**

- `df_reservation` → reservation info
- `df_reservation_details` → copy assignments and queue positions
- `df_book_copy_updated` → updated copy status


In [146]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Convert member and staff lists to DataFrames for easier handling
df_member = pd.DataFrame(member)
df_staff = pd.DataFrame(staff)
df_book_copy_updated = pd.DataFrame(book_copy)  # Assuming book_copy is already a list of dicts

reservation_list = []
reservation_details_list = []

# Track queue per copy
copy_queues = {copy_id: [] for copy_id in df_book_copy_updated['copy_id']}

reservation_id_counter = 1

for _, member_row in df_member.iterrows():
    # Each member makes 1-2 reservations randomly
    num_reservations = random.randint(1, 2)
    
    for _ in range(num_reservations):
        # Pick a random Assistant for the reservation
        assistants = df_staff[df_staff['role'] == 'Assistant']
        staff_id = random.choice(assistants['staff_id'].tolist())
        
        # Generate reservation date
        reservation_date = datetime(2025, 1, 1) + timedelta(days=random.randint(0, 365*2))
        expiration_date = reservation_date + timedelta(days=random.randint(7, 21))  # 1-3 weeks loan
        
        # Determine returned_at (80% chance returned)
        if random.random() <= 0.8:
            returned_at = reservation_date + timedelta(days=random.randint(1, (expiration_date - reservation_date).days))
        else:
            returned_at = None
        
        # Add reservation
        reservation_list.append({
            "reservation_id": reservation_id_counter,
            "member_id": member_row['member_id'],
            "staff_id": staff_id,
            "reservation_date": reservation_date,
            "expiration_date": expiration_date,
            "returned_at": returned_at
        })
        
        # Assign 1-3 book copies for this reservation
        num_copies = random.randint(1, 3)
        available_copies = df_book_copy_updated['copy_id'].tolist()
        random.shuffle(available_copies)
        
        assigned = 0
        for copy_id in available_copies:
            queue = copy_queues[copy_id]
            
            # Limit queue length to 5
            if len(queue) < 5:
                # Check availability: available if last in queue returned or no queue
                last_returned = queue[-1]['returned_at'] if queue else returned_at
                if not queue or last_returned is not None:
                    position_in_queue = len(queue) + 1
                    queue.append({"reservation_id": reservation_id_counter, "returned_at": returned_at})
                    
                    reservation_details_list.append({
                        "reservation_id": reservation_id_counter,
                        "copy_id": copy_id,
                        "position_in_queue": position_in_queue
                    })
                    assigned += 1  
            if assigned >= num_copies:
                break
        reservation_id_counter += 1
# Convert lists to DataFrames
df_reservation = pd.DataFrame(reservation_list)
df_reservation_details = pd.DataFrame(reservation_details_list)

# Update book_copy status based on reservations
for copy_id in df_book_copy_updated['copy_id']:
    queues = [r for r in reservation_details_list if r['copy_id'] == copy_id]
    if not queues:
        df_book_copy_updated.loc[df_book_copy_updated['copy_id'] == copy_id, 'status'] = 'Available'
    else:
        # Check the latest reservation returned_at
        latest_res_id = max([r['reservation_id'] for r in queues])
        returned_at = df_reservation.loc[df_reservation['reservation_id'] == latest_res_id, 'returned_at'].values[0]
        df_book_copy_updated.loc[df_book_copy_updated['copy_id'] == copy_id, 'status'] = 'Checked Out' if pd.isna(returned_at) else 'Available'

*Save as CSV*

In [147]:
author.to_csv("data/author.csv", index=False)
book.to_csv("data/book.csv", index=False)
book_author.to_csv("data/book_author.csv", index=False)
book_category.to_csv("data/book_category.csv", index=False)
category.to_csv("data/category.csv", index=False)
description.to_csv("data/description.csv", index=False)
staff.to_csv("data/staff.csv", index=False)
member.to_csv("data/member.csv", index=False)
df_book_copy_updated.to_csv("data/book_copy.csv", index=False)
df_reservation.to_csv("data/reservation.csv", index=False)
df_reservation_details.to_csv("data/reservation_details.csv", index=False)