In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
file_path = 'StudentsPerformance 2.csv'
students = pd.read_csv(file_path)

# Rename the columns
students.rename(columns={
    'race/ethnicity': 'race',
    'math score': 'math',
    'reading score': 'read',
    'writing score': 'write'
}, inplace=True)

# Drop unnecessary columns and keep only 'race', 'math', 'read', and 'write'
students = students[['race', 'math', 'read', 'write']]

# Encode the 'race' column
label_encoder = LabelEncoder()
students['race'] = label_encoder.fit_transform(students['race'])

# Define features and target variable
X = students[['math', 'read', 'write']]
y = students['race']

# Split the dataset into training (75%) and test (25%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the SVC model
svc_model = SVC(random_state=42)
svc_model.fit(X_train, y_train)

# Predict on the test set
y_pred = svc_model.predict(X_test)

# Evaluate the model's accuracy on the test set
accuracy = accuracy_score(y_test, y_pred)
print(f"SVC Model Test Accuracy: {accuracy:.2f}")

# Save the model and scaler as a pickle file
with open('svc_model.pkl', 'wb') as model_file:
    pickle.dump(svc_model, model_file)
    
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
    
with open('label_encoder.pkl', 'wb') as encoder_file:
    pickle.dump(label_encoder, encoder_file)


SVC Model Test Accuracy: 0.34
