In [14]:
# import the libraries

import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import joblib #for saving the model

In [5]:
# loading dataset
data = pd.read_csv("Student Pass Fail Dataset.csv")
data.head()

Unnamed: 0,Exam_Marks,Pass_Fail
0,51,Pass
1,92,Pass
2,14,Fail
3,71,Pass
4,60,Pass


In [6]:
#convert the text data to numeric data

label_encoder = LabelEncoder()
data["Pass_Fail"] = label_encoder.fit_transform(data["Pass_Fail"])
print(data)

    Exam_Marks  Pass_Fail
0           51          1
1           92          1
2           14          0
3           71          1
4           60          1
..         ...        ...
95          39          0
96          84          1
97          79          1
98          81          1
99          52          1

[100 rows x 2 columns]


In [7]:
# identifying the dependent and independent variable

X = data[["Exam_Marks"]]
y = data["Pass_Fail"]

#create and fit the logistic regression model

model = LogisticRegression()
model.fit(X,y)

In [10]:
#data['y_pred'] = model.predict(X)
y_pred = model.predict(X)
#print(data)

In [12]:
# Evaluation metrics
# Calculate accuracy, precision, recall, and F1 score
accuracy = accuracy_score(data['Pass_Fail'], data['y_pred'])
precision_value = precision_score(data['Pass_Fail'], data['y_pred'], average='weighted')
recall = recall_score(data['Pass_Fail'], data['y_pred'], average='weighted')
f1 = f1_score(data['Pass_Fail'], data['y_pred'], average='weighted')
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision_value:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')
conf_matrix = confusion_matrix(data['Pass_Fail'], data['y_pred'])
print(conf_matrix)

Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
[[34  0]
 [ 0 66]]


In [15]:
joblib.dump(model,".pkl")
print("model save successfully")

model save successfully


In [20]:
# load the test data

new_data = pd.read_csv("Test_data.csv")
X_new = new_data[["Exam_Marks"]]

#load the trained model

logestic_model = LogisticRegression()
logestic_model.fit(X,y)

#Make the predictions on the new data
new_data['predictions']=logestic_model.predict(X_new)

new_data['predictions']=new_data['predictions'].map({1:"Pass", 0:"Fail"})

print(new_data)


    Exam_Marks predictions
0           78        Pass
1           40        Fail
2           88        Pass
3           68        Pass
4           36        Fail
5           58        Pass
6           37        Fail
7           45        Pass
8           84        Pass
9           61        Pass
10          50        Pass
11          82        Pass
12          31        Fail
13          36        Fail
14          43        Pass
15          52        Pass
16          33        Fail
17          69        Pass
18          89        Pass
19          44        Pass
20          43        Pass
21          88        Pass
22          82        Pass
23          55        Pass
24          59        Pass
25          33        Fail
26          55        Pass
27          84        Pass
28          81        Pass
29          68        Pass
30          81        Pass
31          20        Fail
32          44        Pass
33          31        Fail
34          61        Pass
35          30        Fail
3

In [21]:
new_data.to_csv("Updated_Predictions.csv", index=False)