In [23]:
## Step-1 : Importing required Libraries 
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.pipeline import Pipeline
import numpy as np


## Step-2 : Loading dataset 
df = pd.read_csv("students_dataset.csv")

## Step - 3 : Data Cleaning
## Removing unncessary features 
df = df.drop(columns=["rollno","student_id","name"])

## Step - 4 : Feature Engineering
## converting /Encoding categorical features into numerical features 
df["internet_access"] = df["internet_access"].map({"Yes": 1, "No": 0})

## Step - 5 : Dividing Independent/Input and Dependent/Output features from dataset 
X = df[["study_hours","attendance","internet_access","play_hours","assignments_completed","sleep_hours"]]
y = df["marks"]

def prediction_model(X,y):
    ## applying train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42
    )
    
    ## Creating Pipeline
    model_pipeline = Pipeline([
        ("scaler",StandardScaler()), ## Step 1 : Scale features
        ("model",ElasticNet(alpha=0.05,l1_ratio=0.5)) ## Step 2 : Train Model
    ])
    
    ## Training the model
    model_pipeline.fit(X_train,y_train)
    
    ## Prediction
    y_pred = model_pipeline.predict(X_test)

    ## Evaluation 
    mse = mean_squared_error(y_test,y_pred)
    r2 = r2_score(y_test,y_pred)
    
    ## Print Results 
    print("MSE = ",np.round(mse,2)," r2 score = ",np.round(r2,2))

    return model_pipeline
marks_prediction_model = prediction_model(X,y)

MSE =  11.07  r2 score =  0.83


In [25]:
import joblib
## Saving our model
joblib.dump(marks_prediction_model,"marks_prediction_model.pkl")
## laoding our model 
load_model = joblib.load("marks_prediction_model.pkl")
load_model

In [33]:
## Prediciton on new test data ( Custom input )
new_data = pd.DataFrame([{
    "study_hours": 2,
    "attendance": 70,
    "internet_access": 1,
    "play_hours": 4,
    "assignments_completed": 9,
    "sleep_hours": 6
}])

# Predict
predicted_marks = load_model.predict(new_data)
print("Predicted Marks:", np.round(predicted_marks[0],2))

Predicted Marks: 37.81
