In [13]:
#import necessary libraries for ML
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder , StandardScaler
from sklearn.linear_model import LogisticRegression , LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error

In [14]:
df = pd.read_csv("SP.csv")
print(df.head())

   gender race/ethnicity parental level of education         lunch  \
0  female        group B           bachelor's degree      standard   
1  female        group C                some college      standard   
2  female        group B             master's degree      standard   
3    male        group A          associate's degree  free/reduced   
4    male        group C                some college      standard   

  test preparation course  math score  reading score  writing score  
0                    none          72             72             74  
1               completed          69             90             88  
2                    none          90             95             93  
3                    none          47             57             44  
4                    none          76             78             75  


In [15]:
#Targeted colums are (last of the data set)

TARGET = df.columns[-1]
FEATURES = df.columns[:-1]
print("Target Column:", TARGET)
print("Feature Columns:", FEATURES)

Target Column: writing score
Feature Columns: Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course', 'math score', 'reading score'],
      dtype='object')


In [16]:
# Encode Categorical Columns 

label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [17]:
#split Data 
X = df[FEATURES]
y = df[TARGET]

# Check Classification vs Regression (numeric or labels)
problem_type = "Regression" if df[TARGET].dtype in ['float64', 'int64'] and len(df[TARGET].unique()) > 15 else "Classification"
print(f"\n Detected Problem Type: {problem_type}")

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


 Detected Problem Type: Regression


In [18]:
if problem_type == "Classification":
    model = LogisticRegression(max_iter=300)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    print("\n --- CLASSIFICATION RESULTS ---")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
else:
    model = LinearRegression()
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    print("\n --- REGRESSION RESULTS ---")
    print(f"MSE: {mean_squared_error(y_test, y_pred)}")

print("\n Day 2 Completed: Model Trained and Evaluated.")


 --- REGRESSION RESULTS ---
MSE: 16.58798114027707

 Day 2 Completed: Model Trained and Evaluated.
