In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df = pd.read_csv('student_dataset.csv')

In [3]:
df['Skills'] = df['Skills'].apply(lambda x: x.split(', ')) 

In [4]:
mlb_accenture = MultiLabelBinarizer()
skills_encoded = pd.DataFrame(mlb_accenture.fit_transform(df['Skills']), columns=mlb_accenture.classes_, index=df.index)

In [5]:
features = df[['Gender', 'CGPA', 'Student_Department', 'Prior_Internship_Experience(Integer)', 'Aptitude_Score', 'Communication_Skill_Score(out_of_10)']]
X = pd.concat([features, skills_encoded], axis=1)

In [6]:
y = df['Accenture_hired']
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [7]:
X['Gender'] = label_encoder.fit_transform(X['Gender'])
X['Student_Department'] = label_encoder.fit_transform(X['Student_Department'])

In [8]:
X.shape

(600, 14)

In [9]:
X

Unnamed: 0,Gender,CGPA,Student_Department,Prior_Internship_Experience(Integer),Aptitude_Score,Communication_Skill_Score(out_of_10),C++,Communication,HTML/CSS,Java,JavaScript,Problem Solving,Python,Teamwork
0,0,7.6,5,2,95,4.5,1,1,0,1,0,1,0,0
1,0,7.6,4,1,98,4.9,0,1,0,1,0,1,1,0
2,0,9.0,0,2,65,7.7,1,0,1,1,0,1,0,0
3,0,8.7,3,1,85,5.0,1,1,0,0,0,0,0,0
4,1,7.9,3,0,51,6.0,0,1,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,0,6.3,3,1,75,6.0,0,1,0,0,1,0,0,1
596,0,8.5,1,2,50,9.2,0,0,0,0,0,0,1,0
597,1,7.0,0,2,59,6.1,0,0,1,0,1,1,0,0
598,0,8.0,0,2,65,9.8,0,1,0,0,0,0,0,0


In [10]:
X_train,X_test,y_train,y_test = train_test_split(X.values,y,test_size=0.2,random_state=42)

In [11]:
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [12]:
y_pred = classifier.predict(X_test)

In [13]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.92


In [14]:
import joblib

In [15]:
joblib.dump(classifier, 'Accenture.pkl')

['Accenture.pkl']

In [16]:
joblib.dump(mlb_accenture, 'mlb_accenture.pkl')

['mlb_accenture.pkl']