In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import pickle
import numpy as np
import sklearn

# Print the version of each library to ensure compatibility
print("numpy version:", np.__version__)  # Expected: 1.26.4
print("pandas version:", pd.__version__)  # Expected: 2.2.1
print("sklearn version:", sklearn.__version__)  # Expected: 1.5.1

# Check if the installed versions match the expected versions
assert np.__version__ == '1.26.4', f"Expected numpy version 1.26.4, but got {np.__version__}"
assert pd.__version__ == '2.2.1', f"Expected pandas version 2.2.1, but got {pd.__version__}"
assert sklearn.__version__ == '1.5.1', f"Expected sklearn version 1.5.1, but got {sklearn.__version__}"

# Display message if all versions are correct
print("All library versions are as expected. Ready to proceed!")


numpy version: 1.26.4
pandas version: 2.2.1
sklearn version: 1.5.1
All library versions are as expected. Ready to proceed!


In [7]:
#Load the data 
data=pd.read_csv(f'student_placement_data.csv')
data.head()

Unnamed: 0,IQ,CGPA,10th_Marks,12th_Marks,Communication_Skills,Placed
0,138,5.96,85,97,5.19,1
1,128,5.76,68,68,5.63,0
2,114,5.05,75,53,6.88,1
3,142,3.1,52,84,4.58,0
4,107,4.39,68,98,3.61,0


In [8]:
#Split the data into features and target variable 
x = data.drop('Placed', axis=1)

y = data['Placed']

#Split the data into training and testing sets

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

 # Define the parameter grid for hyperparameter tuning

param_grid = {

'n_estimators': [100, 200, 300],

'max_depth': [None, 10, 20, 30],

'min_samples_split': [2, 5, 10],

'min_samples_leaf':[1,2,4]
}

In [9]:
#Create a Random Forest classifier


rf = RandomForestClassifier(random_state=42)

#Perform grid search with cross-validation

grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

grid_search.fit(x_train, y_train) 

#Get the best model

best_rf = grid_search.best_estimator_

#Fitting 5 folds for each of 108 candidates, totalling 540 fits

#Evaluate the model on the test set

y_pred = best_rf.predict(x_test)

print(classification_report(y_test,y_pred))

Fitting 5 folds for each of 108 candidates, totalling 540 fits
              precision    recall  f1-score   support

           0       0.56      0.69      0.62        13
           1       0.00      0.00      0.00         7

    accuracy                           0.45        20
   macro avg       0.28      0.35      0.31        20
weighted avg       0.37      0.45      0.40        20



In [10]:
with open('model.pkl','wb') as file:
    pickle.dump(best_rf, file)