In [12]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Setup
np.random.seed(42)
student_names = ['Rahul Sharma', 'Ananya Singh', 'Kunal Mehta', 'Priya Desai', 'Aman Verma',
                 'Sneha Kapoor', 'Rohit Das', 'Megha Jain', 'Vivek Agarwal', 'Isha Khanna']
book_titles = ['Introduction to ML', 'Python Basics', 'Data Science with Python', 'Database Systems',
               'Operating Systems', 'Modern Web Dev', 'Linear Algebra', 'Digital Circuits',
               'Artificial Intelligence', 'Data Structures']
genres = ['CS', 'Math', 'Electronics', 'AI', 'Web Dev']
librarians = ['Mrs. Batra', 'Mr. Khurana', 'Ms. Ahuja']

# Settings
n = 500
fine_per_day = 1
start_date = datetime(2024, 1, 1)

# 17 offsets from -2 to 14
return_offsets = list(range(-2, 15))
# 17 probabilities matching above
probabilities = np.array([0.03, 0.03, 0.06, 0.06, 0.06, 0.06, 0.06,
                          0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05,
                          0.05, 0.05, 0.05])
# Normalize to sum to 1.0
probabilities = probabilities / probabilities.sum()

# Generate data
data = []
for i in range(n):
    slip_id = f"SLIP{i+1:05d}"
    student = random.choice(student_names)
    book = random.choice(book_titles)
    genre = random.choice(genres)
    librarian = random.choice(librarians)

    issue_date = start_date + timedelta(days=np.random.randint(0, 240))
    due_date = issue_date + timedelta(days=14)
    return_offset = np.random.choice(return_offsets, p=probabilities)
    return_date = due_date + timedelta(days=int(return_offset))

    delay_days = max(0, (return_date - due_date).days)
    fine = delay_days * fine_per_day

    data.append([slip_id, student, book, genre, issue_date.date(), due_date.date(),
                 return_date.date(), librarian, delay_days, fine])

# Create and save dataframe
columns = ['Slip_ID', 'Student_Name', 'Book_Title', 'Genre', 'Issue_Date', 'Due_Date',
           'Return_Date', 'Librarian_Issued_By', 'Delay_Days', 'Fine']
df = pd.DataFrame(data, columns=columns)
df.to_csv("library_slip_tracker_realistic.csv", index=False)

print("✅ Dataset saved as 'library_slip_tracker_realistic.csv'")

✅ Dataset saved as 'library_slip_tracker_realistic.csv'


In [13]:
# Optional: Display first 10 rows
print("\n🔍 Sample Preview:")
print(df.head(500))


🔍 Sample Preview:
       Slip_ID   Student_Name               Book_Title        Genre  \
0    SLIP00001      Rohit Das         Database Systems  Electronics   
1    SLIP00002  Vivek Agarwal  Artificial Intelligence  Electronics   
2    SLIP00003    Isha Khanna           Modern Web Dev           CS   
3    SLIP00004   Sneha Kapoor       Introduction to ML      Web Dev   
4    SLIP00005   Rahul Sharma           Linear Algebra           CS   
..         ...            ...                      ...          ...   
495  SLIP00496      Rohit Das        Operating Systems      Web Dev   
496  SLIP00497    Isha Khanna        Operating Systems  Electronics   
497  SLIP00498     Megha Jain            Python Basics  Electronics   
498  SLIP00499  Vivek Agarwal         Database Systems           AI   
499  SLIP00500    Isha Khanna          Data Structures         Math   

     Issue_Date    Due_Date Return_Date Librarian_Issued_By  Delay_Days  Fine  
0    2024-04-12  2024-04-26  2024-05-07         

In [14]:
df = pd.read_csv("library_slip_tracker_realistic.csv")



In [15]:
df.to_excel("library_slip_tracker_realistic.xlsx", index=False)
