In [2]:
import pandas as pd

# Load the dataset
df = pd.read_csv('Placement.csv')


In [3]:
from sklearn.preprocessing import LabelEncoder

# Create label encoders for categorical features
label_encoders = {}
categorical_features = ['gender', 'ssc_b', 'hsc_b', 'hsc_s', 'degree_t', 'workex', 'specialisation']

for feature in categorical_features:
    label_encoders[feature] = LabelEncoder()
    df[feature] = label_encoders[feature].fit_transform(df[feature])

# Features and target variable
X = df[['gender', 'ssc_p', 'ssc_b', 'hsc_p', 'hsc_b', 'hsc_s', 'degree_p', 'degree_t', 'workex', 'etest_p', 'specialisation', 'mba_p']]
y = df['status']  # Assuming 'status' is the target variable indicating placement (0 or 1)

# Split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# Initialize models
lr = LogisticRegression()
svm_model = svm.SVC(probability=True)  # Set probability=True to enable predict_proba
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier()
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()

# Fit the models
lr.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
knn.fit(X_train, y_train)
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
gb.fit(X_train, y_train)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
import joblib

# Save the models
joblib.dump(lr, 'logistic_regression.pkl')
joblib.dump(svm_model, 'svm_model.pkl')
joblib.dump(knn, 'knn_model.pkl')
joblib.dump(dt, 'decision_tree.pkl')
joblib.dump(rf, 'random_forest.pkl')
joblib.dump(gb, 'gradient_boosting.pkl')


['gradient_boosting.pkl']

In [6]:
# Load a model
model = joblib.load('random_forest.pkl')

# Make a prediction
sample_data = X_test.iloc[0:1]
prediction = model.predict(sample_data)
probability = model.predict_proba(sample_data)

print(f"Prediction: {prediction}")
print(f"Probability: {probability}")


Prediction: ['Placed']
Probability: [[0.1 0.9]]


In [7]:
# Load a model
model = joblib.load('random_forest.pkl')

# Make a prediction
sample_data = X_test.iloc[0:1]
prediction = model.predict(sample_data)
probability = model.predict_proba(sample_data)

print(f"Prediction: {prediction}")
print(f"Probability: {probability}")


Prediction: ['Placed']
Probability: [[0.1 0.9]]


In [11]:
import joblib
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

def predict_placement(gender, ssc_p, ssc_b, hsc_p, hsc_b, degree_p, degree_t, workex, etest_p):
    # Convert input values to the same format used during training
    p1 = 1 if gender == "Male" else 0
    p3 = 1 if ssc_b == "Central" else 0
    p5 = 1 if hsc_b == "Central" else 0
    p8 = 2 if degree_t == "Sci&Tech" else 1 if degree_t == "Comm&Mgmt" else 0
    p9 = 1 if workex == "Yes" else 0

    # Default values for the removed features
    default_hsc_s = 0  # Assuming default to Arts
    default_specialisation = 0  # Assuming default to Mkt&Fin
    default_mba_p = 0.0  # Assuming 0.0 as default MBA percentage

    new_data = pd.DataFrame({
        'gender': [p1],
        'ssc_p': [ssc_p],
        'ssc_b': [p3],
        'hsc_p': [hsc_p],
        'hsc_b': [p5],
        'hsc_s': [default_hsc_s],
        'degree_p': [degree_p],
        'degree_t': [p8],
        'workex': [p9],
        'etest_p': [etest_p],
        'specialisation': [default_specialisation],
        'mba_p': [default_mba_p]
    })

    # Load the model
    model = joblib.load('random_forest.pkl')
    result = model.predict(new_data)
    result1 = model.predict_proba(new_data)

    if result[0] == 0:
        display(widgets.Label(value="Can't be Placed"))
    else:
        display(widgets.Label(value=f"Student Will be Placed With Probability of {round(result1[0][1] * 100, 2)}%"))

# Create interactive widgets
gender = widgets.Dropdown(options=["Male", "Female"], description="Gender:")
ssc_p = widgets.FloatText(description="10th %:")
ssc_b = widgets.Dropdown(options=["Central", "Others"], description="10th Board:")
hsc_p = widgets.FloatText(description="12th %:")
hsc_b = widgets.Dropdown(options=["Central", "Others"], description="12th Board:")
degree_p = widgets.FloatText(description="Degree %:")
degree_t = widgets.Dropdown(options=["Sci&Tech", "Comm&Mgmt", "Others"], description="Degree Type:")
workex = widgets.Dropdown(options=["Yes", "No"], description="Work Experience:")
etest_p = widgets.FloatText(description="eTest %:")

predict_button = widgets.Button(description="Predict")
predict_button.on_click(lambda b: predict_placement(gender.value, ssc_p.value, ssc_b.value, hsc_p.value, hsc_b.value, degree_p.value, degree_t.value, workex.value, etest_p.value))

display(gender, ssc_p, ssc_b, hsc_p, hsc_b, degree_p, degree_t, workex, etest_p, predict_button)


Dropdown(description='Gender:', options=('Male', 'Female'), value='Male')

FloatText(value=0.0, description='10th %:')

Dropdown(description='10th Board:', options=('Central', 'Others'), value='Central')

FloatText(value=0.0, description='12th %:')

Dropdown(description='12th Board:', options=('Central', 'Others'), value='Central')

FloatText(value=0.0, description='Degree %:')

Dropdown(description='Degree Type:', options=('Sci&Tech', 'Comm&Mgmt', 'Others'), value='Sci&Tech')

Dropdown(description='Work Experience:', options=('Yes', 'No'), value='Yes')

FloatText(value=0.0, description='eTest %:')

Button(description='Predict', style=ButtonStyle())

Label(value='Student Will be Placed With Probability of 79.0%')