In [98]:
#importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [99]:
#reading data
df=pd.read_csv("student_dataset.csv")

In [100]:
#checking columns
df.columns

Index(['Student_Names', 'Phone_No.', 'Math', 'Physics', 'Chemistry', 'Grade',
       'Comment', 'Roll No.', 'School Name', 'Student Address'],
      dtype='object')

In [101]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9000 entries, 0 to 8999
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Student_Names    9000 non-null   object
 1   Phone_No.        9000 non-null   int64 
 2   Math             9000 non-null   int64 
 3   Physics          9000 non-null   int64 
 4   Chemistry        9000 non-null   int64 
 5   Grade            9000 non-null   object
 6   Comment          9000 non-null   object
 7   Roll No.         9000 non-null   int64 
 8   School Name      9000 non-null   object
 9   Student Address  9000 non-null   object
dtypes: int64(5), object(5)
memory usage: 703.3+ KB


In [102]:
#checking null values
df.isnull().sum()

Student_Names      0
Phone_No.          0
Math               0
Physics            0
Chemistry          0
Grade              0
Comment            0
Roll No.           0
School Name        0
Student Address    0
dtype: int64

In [103]:
df

Unnamed: 0,Student_Names,Phone_No.,Math,Physics,Chemistry,Grade,Comment,Roll No.,School Name,Student Address
0,Donald Contreras,9208625450,76,84,54,B+,Good Pursuance,524613,Martin Luther School,"478 Mooney Park, New Valerie, VI 28836"
1,Joseph Horton,9886408555,91,75,78,A,Very Good Achivement,561635,Martin Luther School,"037 Matthew Shores, Greeneton, CA 98399"
2,Savannah Burns MD,9047592659,64,98,20,C,Below Average Achivement,560985,Martin Luther School,"96124 Lloyd Streets, Edwardmouth, DC 61677"
3,William Carter,9048473864,15,95,32,D,Poor Pursuance,535126,Martin Luther School,"11959 Clark Village, Ivanview, NH 43940"
4,John Rodriguez,9685225730,86,86,66,B+,Good Pursuance,559410,Martin Luther School,"051 Weaver Glen Apt. 724, West Davidborough, M..."
...,...,...,...,...,...,...,...,...,...,...
8995,Kimberly Stevens,9129352703,40,87,65,B,Average Performance,569342,Martin Luther School,"27054 Adrian Streets, Diazmouth, OH 81346"
8996,Kelsey Bonilla,9649715711,56,84,75,B+,Good Pursuance,530124,Martin Luther School,"570 Christopher Run, Williammouth, ND 11535"
8997,Kelly Dunn,9825362271,80,70,16,C,Below Average Achivement,592266,Martin Luther School,"32283 Carpenter Summit, North Patricia, PR 51483"
8998,Joseph Nichols,9363540473,24,95,59,C,Below Average Achivement,583028,Martin Luther School,"2336 Blackburn Fall Apt. 905, South Shelby, ND..."


### Preprocessing

In [104]:
# Drop irrelevant columns
df2= df.drop(columns=['Student_Names', 'Phone_No.', 'Roll No.', 'Comment', 'Student Address'])

In [105]:
#checking columns after drop
df2.columns

Index(['Math', 'Physics', 'Chemistry', 'Grade', 'School Name'], dtype='object')

In [106]:
#encoding target variable
label_encoder = LabelEncoder()
df2['Grade'] = label_encoder.fit_transform(df2['Grade'])

### Define feature

In [107]:
#feature and target
X = df2[['Math', 'Physics', 'Chemistry']]  
y = df2['Grade']  

In [108]:
df2

Unnamed: 0,Math,Physics,Chemistry,Grade,School Name
0,76,84,54,3,Martin Luther School
1,91,75,78,0,Martin Luther School
2,64,98,20,4,Martin Luther School
3,15,95,32,5,Martin Luther School
4,86,86,66,3,Martin Luther School
...,...,...,...,...,...
8995,40,87,65,2,Martin Luther School
8996,56,84,75,3,Martin Luther School
8997,80,70,16,4,Martin Luther School
8998,24,95,59,4,Martin Luther School


In [109]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Classification

In [110]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [111]:
#prediction
y_pred = model.predict(X_test)

In [112]:
#model evaluation
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

In [113]:
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_rep)

Accuracy: 0.9211
Classification Report:
               precision    recall  f1-score   support

           A       0.94      0.86      0.90        59
          A+       1.00      1.00      1.00         5
           B       0.91      0.90      0.90       368
          B+       0.93      0.89      0.91       201
           C       0.88      0.90      0.89       425
           D       0.94      0.96      0.95       593
           F       0.99      0.93      0.96       149

    accuracy                           0.92      1800
   macro avg       0.94      0.92      0.93      1800
weighted avg       0.92      0.92      0.92      1800



##  Regression

In [114]:
regressor = RandomForestRegressor(random_state=42)
regressor.fit(X_train, y_train)

In [115]:
#prediction
y_pred = regressor.predict(X_test)

In [116]:
#model evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [117]:
#printing results
print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared: {r2:.4f}")

Mean Squared Error: 0.1297
R-squared: 0.9386


In [123]:
y

0       3
1       0
2       4
3       5
4       3
       ..
8995    2
8996    3
8997    4
8998    4
8999    5
Name: Grade, Length: 9000, dtype: int32

In [120]:
X

Unnamed: 0,Math,Physics,Chemistry
0,76,84,54
1,91,75,78
2,64,98,20
3,15,95,32
4,86,86,66
...,...,...,...
8995,40,87,65
8996,56,84,75
8997,80,70,16
8998,24,95,59
