In [1]:
import pandas as pd
import random
from faker import Faker

fake = Faker() # Initialize Faker
Faker.seed(0)   # For reproducibility
random.seed(0) # For reproducibility

# Sample options
grades = list(range(1, 13)) #` Grades 1 to 12`
sports = ["Soccer", "Basketball", "Swimming", "Tennis", "Volleyball", "Cricket", "Hockey", "Track & Field", "Gymnastics"] # List of sports
interest_levels = ["Interested", "Enrolled", "Competitive"] # Interest levels in sports
enrollment_statuses = ["Not Enrolled", "Enrolled", "Waitlisted"] # Enrollment statuses
positions = ["Goalkeeper", "Forward", "Midfielder", "Defender", "Runner", "Swimmer", "Striker", "Winger", "Libero"] # Positions in sports
skill_levels = ["Beginner", "Intermediate", "Advanced"] # Skill levels in sports
training_schedules = ["Weekly", "Bi-weekly", "Monthly"] # Training schedules
participation_types = ["Seasonal", "Year-round"] # Participation types
genders = ["Male", "Female", "Other", "Prefer not to say"] #genders
communication_preferences = ["Email", "SMS", "WhatsApp"] # Communication preferences
photo_consent_options = ["Yes", "No"] # Photo consent options

# Generate student data
def generate_student_record(student_id):
    first_name = fake.first_name()
    last_name = fake.last_name()
    dob = fake.date_of_birth(minimum_age=6, maximum_age=18)
    grade_level = random.choice(grades)
    school_name = f"North York Public School {random.randint(1, 10)}"
    gender = random.choice(genders)
    
    parent_name = fake.name()
    contact_number = fake.phone_number()
    email = fake.email()
    address = "North York, Toronto, ON"

    sport = random.choice(sports)
    interest_level = random.choice(interest_levels)
    enrollment_status = random.choice(enrollment_statuses)
    club_name = f"North York {sport} Club"
    position = random.choice(positions)
    experience_years = random.randint(0, 8)
    training_schedule = random.choice(training_schedules)
    participation = random.choice(participation_types)
    skill_level = random.choice(skill_levels)
    
    medical_conditions = random.choice(["None", "Asthma", "Allergies", "None"])
    accessibility_needs = random.choice(["None", "Wheelchair Access", "Visual Aid", "None"])
    emergency_contact = fake.name() + " - " + fake.phone_number()
    
    photo_consent = random.choice(photo_consent_options)
    volunteer_interest = random.choice(["Yes", "No"])
    communication_preference = random.choice(communication_preferences)
    
    achievements = random.choice(["None", "City Level Medal", "School Recognition", "None"])
    feedback_notes = fake.sentence()
    updated_at = fake.date_this_year()

    return [
        student_id, first_name, last_name, dob, grade_level, school_name, gender,
        parent_name, contact_number, email, address,
        sport, interest_level, enrollment_status, club_name, position, experience_years,
        training_schedule, participation, skill_level,
        medical_conditions, accessibility_needs, emergency_contact,
        photo_consent, volunteer_interest, communication_preference,
        achievements, feedback_notes, updated_at
    ]

# Column names
columns = [
    "student_id", "first_name", "last_name", "date_of_birth", "grade_level", "school_name", "gender",
    "parent_name", "contact_number", "email", "address",
    "sport_name", "interest_level", "enrollment_status", "club_or_team_name", "position_or_role", "experience_years",
    "training_schedule", "seasonal_or_year_round", "skill_level",
    "medical_conditions", "accessibility_needs", "emergency_contact",
    "photo_consent", "volunteer_interest", "communication_preference",
    "achievements", "feedback_notes", "updated_at"
]

# Create dataset
data = [generate_student_record(i+1) for i in range(1000)]
df = pd.DataFrame(data, columns=columns)



In [7]:
df.to_csv('neighbourhood_sports.csv', index=False)

In [2]:
df.head(5)

Unnamed: 0,student_id,first_name,last_name,date_of_birth,grade_level,school_name,gender,parent_name,contact_number,email,...,skill_level,medical_conditions,accessibility_needs,emergency_contact,photo_consent,volunteer_interest,communication_preference,achievements,feedback_notes,updated_at
0,1,Megan,Chang,2011-12-19,7,North York Public School 7,Male,Jorge Sullivan,475-693-8242x1948,blairrachel@example.net,...,Advanced,Asthma,Wheelchair Access,Justin Gomez - 993-787-7840x80160,No,Yes,Email,School Recognition,Room region as true develop.,2025-05-27
1,2,Douglas,Daniels,2007-09-08,9,North York Public School 10,Female,Sheri Bolton DDS,384-818-5839x8947,barbara42@example.org,...,Intermediate,,Visual Aid,Amanda Zavala - 571-512-2018x684,Yes,No,SMS,School Recognition,Stay information every manage political record...,2025-03-01
2,3,Darrell,Matthews,2010-11-09,1,North York Public School 9,Male,Alan Nelson,001-335-225-6012x309,reidcatherine@example.net,...,Advanced,Allergies,,Brandon Bass - 209-203-2173,Yes,Yes,Email,City Level Medal,International big employee determine positive ...,2025-04-02
3,4,Benjamin,Garcia,2019-05-07,9,North York Public School 8,Male,Ian Dennis,001-323-902-2584,alexandermaldonado@example.net,...,Advanced,,Visual Aid,Sylvia Banks - (942)780-7150x8423,Yes,No,SMS,,Explain grow water plant perform resource hold...,2025-04-29
4,5,Aaron,Lee,2010-11-05,10,North York Public School 7,Other,Mark Gray,+1-996-206-9602x71427,simpsonlarry@example.com,...,Intermediate,,,Rodney Mccann - (338)512-0665x03008,Yes,Yes,Email,,Part cup few read.,2025-02-03


In [3]:
# Function to search for students enrolled in a specific sport
def search_students_by_sport(dataframe, sport_name):
    enrolled_students = dataframe[
        (dataframe['sport_name'].str.lower() == sport_name.lower()) &
        (dataframe['enrollment_status'].str.lower() == 'enrolled')
    ][['first_name', 'last_name', 'grade_level', 'school_name', 'parent_name', 'contact_number', 'email']]
    
    return enrolled_students.reset_index(drop=True)




In [6]:
# Example usage: search for students enrolled in Soccer
soccer_students = search_students_by_sport(df, 'Tennis')
soccer_students.head(10)

Unnamed: 0,first_name,last_name,grade_level,school_name,parent_name,contact_number,email
0,Glenn,Alexander,4,North York Public School 6,Laura Grimes,+1-728-966-3564x3343,carolhanson@example.org
1,Justin,Bradford,3,North York Public School 3,John Phillips,+1-793-317-3564,stevenburton@example.net
2,William,Nelson,6,North York Public School 2,Francisco Randall,414.743.1368,robertmarshall@example.org
3,Carrie,Savage,2,North York Public School 9,Justin Chandler MD,(320)787-1652,tharvey@example.com
4,Rachael,Robinson,12,North York Public School 9,James Bailey,001-518-397-0386x0669,daltonmatthew@example.net
5,Robert,Cherry,7,North York Public School 7,Miss Christine Callahan,+1-987-679-6387x5139,collierandrew@example.org
6,Steven,Hartman,9,North York Public School 1,Jeffrey Ramirez,554.272.6676,ashley07@example.net
7,Julie,Case,3,North York Public School 1,Garrett Murphy,+1-965-630-6251x2382,wallacejennifer@example.net
8,David,Moore,5,North York Public School 8,Sherry Neal,(201)307-2359,linda45@example.com
9,Lindsey,Woods,4,North York Public School 3,Jack Hoover,946-964-4203x2973,carrandrea@example.org
