In [4]:
import numpy as np
import pandas as pd

# Reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Generate features
course_difficulty = np.random.randint(1, 6, n_samples)  # 1–5
credit_hours = np.random.randint(2, 7, n_samples)       # 2–6
days_until_exam = np.random.randint(1, 121, n_samples)  # 1–120 days
previous_grade = np.random.randint(50, 101, n_samples)  # 50–100
daily_available_hours = np.round(np.random.uniform(1, 6, n_samples), 1)
number_of_courses = np.random.randint(3, 9, n_samples)  # 3–8 courses

# Base workload
base_workload = 5

# Compute required study hours
required_study_hours = (
    base_workload
    + course_difficulty * 2.0
    + credit_hours * 1.5
    + (30 / days_until_exam) * 5
    + (100 - previous_grade) * 0.1
    - number_of_courses * 0.5
    + np.random.normal(0, 1.5, n_samples)  # noise
)

# Cap study hours by available time
max_weekly_hours = daily_available_hours * 7
required_study_hours = np.minimum(required_study_hours, max_weekly_hours)

# Ensure minimum study hours
required_study_hours = np.clip(required_study_hours, 2, None)

# Create DataFrame
df = pd.DataFrame({
    "course_difficulty": course_difficulty,
    "credit_hours": credit_hours,
    "days_until_exam": days_until_exam,
    "previous_grade": previous_grade,
    "daily_available_hours": daily_available_hours,
    "number_of_courses": number_of_courses,
    "required_study_hours": np.round(required_study_hours, 2)
})

# Save dataset
df.to_csv("smart_study_planner_dataset.csv", index=False)

print("Dataset generated successfully!")
print(df.head())

Dataset generated successfully!
   course_difficulty  credit_hours  days_until_exam  previous_grade  \
0                  4             5               92              57   
1                  5             5               71              86   
2                  3             5               32              81   
3                  5             5               11              97   
4                  5             6               10              92   

   daily_available_hours  number_of_courses  required_study_hours  
0                    4.9                  5                 25.40  
1                    2.0                  5                 14.00  
2                    4.6                  7                 23.32  
3                    4.1                  5                 28.70  
4                    5.5                  4                 37.35  
