In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import urllib.request
import zipfile
import os

# Downloading Dataset
data_url = "https://github.com/shaswat373/Student-Performance-Predictor/raw/main/data2.zip"
data_zip_path = "data2.zip"

if not os.path.exists(data_zip_path):
    print("Downloading dataset...")
    urllib.request.urlretrieve(data_url, data_zip_path)
    print("Dataset downloaded.")

with zipfile.ZipFile(data_zip_path, 'r') as zip_ref:
    zip_ref.extractall()

# Loading
df = pd.read_csv("StudentsPerformance.csv")
df = df.drop(columns=['lunch'])  # drop unused column
df['average_score'] = df[['math score', 'reading score', 'writing score']].mean(axis=1)

label_encoders = {}
for col in ['gender', 'parental level of education', 'test preparation course']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

features = ['gender', 'parental level of education', 'test preparation course',
            'math score', 'reading score', 'writing score']
X = df[features]
y = df['average_score']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

# Prediction
def predict_student_performance():
    print("\n Student Performance Predictor\n")
    print("Please answer the following questions:\n")

    # Gender
    gender_input = input("1. What is the student's gender? (male/female): ").strip().lower()

    # Parental Education
    print("\n2. What is the highest level of education completed by the student's parent?")
    print("Options: some high school, high school, some college, associate's degree, bachelor's degree, master's degree")
    parent_edu_input = input("Enter exactly as shown above: ").strip().lower()

    # Test Prep
    prep_course_input = input("\n3. Did the student complete a test preparation course? (none/completed): ").strip().lower()

    # Subject Scores
    print("\n4. Please enter the student's scores (0â€“100):")
    math_score = float(input("   Math score: "))
    reading_score = float(input("   Reading score: "))
    writing_score = float(input("   Writing score: "))

    # Encode inputs
    try:
        gender_encoded = label_encoders['gender'].transform([gender_input])[0]
        parent_edu_encoded = label_encoders['parental level of education'].transform([parent_edu_input])[0]
        prep_course_encoded = label_encoders['test preparation course'].transform([prep_course_input])[0]
    except ValueError as e:
        print(f"\n Invalid input: {e}")
        return

    # Build input frame
    input_df = pd.DataFrame([[
        gender_encoded,
        parent_edu_encoded,
        prep_course_encoded,
        math_score,
        reading_score,
        writing_score
    ]], columns=features)

    # Predict
    predicted_score = model.predict(input_df)[0]
    print(f"\n Predicted final average score: **{round(predicted_score, 2)} / 100**")

# Run
predict_student_performance()