In [None]:
#DI index
import pandas as pd

# Load the dataset
file_path = "/mnt/data/Context1(H:B:A:W).csv"
df = pd.read_csv(file_path)

# Define racial groups
racial_groups = ["Asian", "Black", "Hispanic", "White"]

# Ensure no division by zero issues
df["Total Student Count"].replace(0, pd.NA, inplace=True)
df["COURSE_TAKER_CS"].replace(0, pd.NA, inplace=True)

# Compute DI_X for each group
for group in racial_groups:
    cs_col = f"{group}_CS"
    school_col = group

    df[f"DI_{group}"] = (df[cs_col] / df["COURSE_TAKER_CS"]) - (df[school_col] / df["Total Student Count"])

# Round the DI_X values to three decimal places
for group in racial_groups:
    df[f"DI_{group}"] = df[f"DI_{group}"].round(3)

# Save the updated dataset (optional)
output_path = "/mnt/data/Disproportion_Index_Results.csv"
df.to_csv(output_path, index=False)

print(f"Updated dataset saved at: {output_path}")


In [5]:
#Classification
import pandas as pd

# Define the classification logic for schools
def classify_school(teacher_last_names):
    virtual_keywords = {"GAVS Virtual Teacher", "Virtual School (Non-GAVS)", "Software-Based Instruction"}
    in_person_keywords = {"Short-term Substitute"}
    
    contains_virtual = any(name.startswith("DE:") or name in virtual_keywords for name in teacher_last_names)
    contains_in_person = any(name in in_person_keywords for name in teacher_last_names)
    contains_human_names = any(
        name not in virtual_keywords 
        and not name.startswith("DE:") 
        and name not in in_person_keywords 
        for name in teacher_last_names
    )
    
    if contains_virtual and (contains_in_person or contains_human_names):
        return "Both"
    elif contains_virtual:
        return "Virtual"
    elif contains_in_person or contains_human_names:
        return "In Person"
    else:
        return "Unknown"

# Load the datasets
file_path_1 = '/Users/sepehrsalem/Filtered_Course_Data.csv'
file_path_2 = '/Users/sepehrsalem/Filtered_Course_Data.csv'

data_1 = pd.read_csv(file_path_1)
data_2 = pd.read_csv(file_path_2)

# Apply the classification logic to each dataset
classified_schools_1_2024 = data_1.groupby('Unique School ID')['TEACHER_LAST_NAME'].apply(lambda names: classify_school(names.unique())).reset_index(name='Classification')
classified_schools_2_2024 = data_2.groupby('Unique School ID')['TEACHER_LAST_NAME'].apply(lambda names: classify_school(names.unique())).reset_index(name='Classification')

classified_schools_1_2024.to_csv("Classified_Schools_1_2024.csv", index=False)
classified_schools_2_2024.to_csv("Classified_Schools_2_2024.csv", index=False)

# Display the classified results (replace these lines with saving/exporting if needed)
print(classified_schools_1_2024)
print(classified_schools_2_2024)


     Unique School ID Classification
0             6010103      In Person
1             6050189           Both
2             6070101           Both
3             6070300      In Person
4             6073052           Both
..                ...            ...
381       78206160616      In Person
382       78206180618        Virtual
383       78301030103      In Person
384       78306120612      In Person
385       78306300630      In Person

[386 rows x 2 columns]
     Unique School ID Classification
0             6010103      In Person
1             6050189           Both
2             6070101           Both
3             6070300      In Person
4             6073052           Both
..                ...            ...
381       78206160616      In Person
382       78206180618        Virtual
383       78301030103      In Person
384       78306120612      In Person
385       78306300630      In Person

[386 rows x 2 columns]


In [None]:
import pandas as pd

# Load the data (replace 'file_path' with your actual file path)
file_path = "your_first_file.csv"
df = pd.read_csv(file_path)

# List of approved courses
approved_courses = [
    "Advanced Placement, Computer Science A",
    "Advanced Placement Computer Science Principles",
    "Computer Science Principles",
    "Introduction to Cybersecurity",
    "Advanced Cybersecurity",
    "IB Computer Science, Year One",
    "IB Computer Science, Year Two",
    "Embedded Computing",
    "Game Design: Animation and Simulation",
    "Programming, Games, Apps and Society",
    "Web Development",
    "Introduction to Python",
    "Coding for FinTech",
]

# Add a column indicating whether the course is approved
df['Valid Course'] = df['COURSE_TITLE'].isin(approved_courses)

# Function to determine if a teacher is extra
def check_teacher_extra(row, data):
    # Find all courses taught by this teacher in the same school
    teacher_courses = data[
        (data['Unique School ID'] == row['Unique School ID']) &
        (data['CERTIFICATE_ID'] == row['CERTIFICATE_ID'])
    ]
    
    # If the teacher teaches at least one approved course, they are not extra
    if teacher_courses['Valid Course'].any():
        return 0  # Not an extra teacher
    return 1  # Extra teacher if only non-approved courses are taught

# Apply the function to determine extra teachers
df['Extra Teacher Count'] = df.apply(lambda row: check_teacher_extra(row, df), axis=1)

# Summarize at the school level: if any teacher in the school is extra, mark the school
school_extra_status = df.groupby('Unique School ID')['Extra Teacher Count'].max().reset_index()

# Add a column to indicate if the school has extra teachers
school_extra_status['Has Extra Teacher'] = school_extra_status['Extra Teacher Count'].apply(
    lambda x: 'Yes' if x == 1 else 'No'
)

# Save or display the final result
school_extra_status.to_csv("final_school_extra_teacher_summary.csv", index=False)
print("Processed data saved to 'final_school_extra_teacher_summary.csv'.")
