## GENERATING DATA 

In [1]:
import pandas as pd
import numpy as np 
import random 
from faker import Faker

#### STUDENT TABLE

In [2]:
fake = Faker("en_IN")

# Define course durations
course_durations = {"Data Science": 1}

def create_student_table(num_students): 
    student_table = []

    for _ in range(num_students):
        student = {}
        student["student_id"] = fake.unique.random_int(min=1, max=500)
        student['name'] = fake.name()
        
        student['age'] = fake.random_int(min=18, max=25)
        # For Gender we have to use different conditions 
        gender = random.choice(["Male", "Female"])
        student["gender"] = gender
        
        if gender == "Male":
            first_name = fake.first_name_male()
        else:
            first_name = fake.first_name_female()
            
        #Now we comine first last name using formating    
        last_name = fake.last_name()
        student["name"] = f"{first_name} {last_name}"
        
        student['email'] = fake.email()
        student['phone_num'] = fake.phone_number()
        student['city'] = fake.city()

        # Choose a course and get its duration
        course_batch = fake.random_element(elements=list(course_durations.keys()))
        student['course_batch'] = course_batch
        course_duration = course_durations[course_batch]

        # Randomly choose enrollment year
        enrollment_year = fake.random_element(elements=[2024])
        student['enrollment_year'] = enrollment_year
        student['graduation_year'] = enrollment_year + course_duration

        student_table.append(student)

    # Convert to DataFrame
    df = pd.DataFrame(student_table)

    # Sort by student_id in ascending order
    df = df.sort_values(by='student_id').reset_index(drop=True)

    return df


In [11]:
#create_student_table(500)

In [None]:
#create_student_table(500)

In [23]:
df_student_table = create_student_table(500)
df_student_table.to_csv("Student_Table.csv", index = False)

#### PROGRAMMING TABLE

In [5]:
fake = Faker("en_IN")

language = ['Python','SQL']

def create_programming_table(num_values):
    programming_table = []
    
    for _ in range(num_values):
        program = {}
        program['programming_id'] = fake.unique.random_int(min=1000, max=1500)
        program['student_id'] =  fake.unique.random_int(min=1, max=500)
        program['language'] = ','.join(random.sample(language, 2,))
        program['problems_solved'] = random.randint(40,80)
        program['assessments_completed'] =  random.randint(2,10)
        program['mini_projects'] = random.randint(1,5)
        program['certifications_earned'] =  random.randint(1,7)
        program['latest_project_score'] =  random.randint(30,90)
        
        programming_table.append(program)
        
        
    df2= pd.DataFrame(programming_table)
    df2 = df2.sort_values(by='student_id').reset_index(drop=True)
    return df2


In [4]:
#create_programming_table(500)

In [6]:
df_programming_table = create_programming_table(500)
df_programming_table.to_csv("programming_table.csv", index = False)

#### SOFT SKILLS TABLE

In [6]:
fake = Faker("en_IN")
def create_softskills_table(skill_score):
    soft_skills_table = []
    for _ in range(skill_score):
        skills = {}
        skills['soft_skill_id'] = fake.unique.random_int(min=2000, max=2500)
        skills['student_id'] = fake.unique.random_int(min=1, max=500)
        skills['communication'] = random.randint(50,100)
        skills['teamwork'] = random.randint(50,100)
        skills['presentation'] = random.randint(50,100)
        skills['leadership'] = random.randint(50,100)
        skills['critical_thinking'] = random.randint(50,100)
        skills['interpersonal_skills'] = random.randint(50,100)
        
        soft_skills_table.append(skills)
        
    df3 = pd.DataFrame(soft_skills_table)
    df3 = df3.sort_values(by='student_id').reset_index(drop=True)
    return df3
        

In [9]:
#create_softskills_table(500)

In [11]:
df_softskills_table = create_softskills_table(500)
df_softskills_table.to_csv("softskills_table.csv", index = False)

#### PLACEMENT TABLE

In [9]:

fake = Faker("en_IN")

def create_placement_table(num_values):
    placement_table = []
    
    for _ in range(num_values):
        placement = {}
        placement['placement_id'] = fake.unique.random_int(min=3000, max=3500)
        placement['student_id'] = fake.unique.random_int(min=1, max=500)
        
        # Use the same score for logic and record
        mock_score = random.randint(40, 100)
        placement['mock_interview_score'] = mock_score
        
        placement['internships_completed'] = random.randint(1, 3)
        
        # Use mock score to determine status
        if mock_score >= 60:
            placement_status = "Ready"
        else:
            placement_status = "Not Ready"
        placement['placement_status'] = placement_status

        # Randomly decide if placed
        if placement_status == "Ready" and random.random() > 0.3:
            placement['company_name'] = fake.company()
            placement['placement_package'] = random.randint(50000, 150000)
            placement['interview_rounds_cleared'] = random.randint(4, 5)
            placement['placement_date'] = fake.date_this_month()
        else:
            placement['company_name'] = None
            placement['placement_package'] = 0
            placement['interview_rounds_cleared'] = 0
            placement['placement_date'] = None

        placement_table.append(placement)

    df4 = pd.DataFrame(placement_table)
    df4 = df4.sort_values(by='student_id').reset_index(drop=True)
    
    return df4


In [12]:
#create_placement_table(500)

In [10]:
df_placement_table = create_placement_table(500)
df_placement_table.to_csv("placement_table.csv", index = False)