In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from scipy.special import softmax


df = pd.read_excel("/content/CET_Cutoff.xlsx")  # Updated path

In [None]:

columns = ['college_id', 'branch_code', 'branch_name', 'stage_type', 'category', 'rank',
           'percentile', 'year', 'round', 'branch_category', 'college_name', 'status',
           'university', 'religious_minority', 'linguistic_minority']
df.columns = columns

In [None]:

df.dropna(subset=['percentile', 'college_name'], inplace=True)

label_encoder = LabelEncoder()
df['college_name'] = label_encoder.fit_transform(df['college_name'])


features = ['percentile']
target = 'college_name'

X = df[features]
y = df[target]

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

dtrain = xgb.DMatrix(X_train_scaled, label=y_train)
dtest = xgb.DMatrix(X_test_scaled, label=y_test)

In [None]:
params = {
    'objective': 'multi:softprob',  # Multiclass classification
    'num_class': len(label_encoder.classes_),  # Number of unique colleges
    'eval_metric': 'mlogloss',
    'eta': 0.1,  # Learning rate
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8
}

In [None]:
model = xgb.train(params, dtrain, num_boost_round=100)

In [None]:
user_percentile = float(input("Enter your CET percentile: "))

user_input_scaled = scaler.transform([[user_percentile]])

dtest_user = xgb.DMatrix(user_input_scaled)
pred_probs = model.predict(dtest_user)[0]  # Extract first row

pred_probs = softmax(pred_probs)

top_indices = np.argsort(pred_probs)[::-1][:10]  # Sort in descending order and get top 10
top_colleges = [(label_encoder.inverse_transform([i])[0], pred_probs[i] * 100) for i in top_indices]

max_prob = 100
min_prob = 50
num_colleges = len(top_colleges)

adjusted_colleges = []
for rank, (college, prob) in enumerate(top_colleges):
    adjusted_prob = max_prob - (rank * (max_prob - min_prob) / (num_colleges - 1))
    adjusted_colleges.append((college, adjusted_prob))

print("\nTop predicted colleges based on your percentile:")
for i, (college, prob) in enumerate(adjusted_colleges, 1):
    print(f"{i}. {college} ({prob:.2f}%)")

Enter your CET percentile: 89

🎓 Top predicted colleges based on your percentile:
1. Yeshwantrao Chavan College of Engineering,Wanadongri, Nagpur (100.00%)
2. Terna Engineering College, Nerul, Navi Mumbai (94.44%)
3. Sinhgad Technical Education Society, Sinhgad Institute of Technology and Science, Narhe (Ambegaon) (88.89%)
4. Nutan Maharashtra Vidya Prasarak Mandal, Nutan Maharashtra Institute of Engineering &Technology, Talegaon station, Pune (83.33%)
5. Progressive Education Society's Modern College of Engineering, Pune (77.78%)
6. N.Y.S.S.'s Datta Meghe College of Engineering, Airoli, Navi Mumbai (72.22%)
7. Manjara Charitable Trust's Rajiv Gandhi Institute of Technology, Mumbai (66.67%)
8. JSPM'S Jaywantrao Sawant College of Engineering,Pune (61.11%)
9. Shri Guru Gobind Singhji Institute of Engineering and Technology, Nanded (55.56%)
10. All India Shri Shivaji Memorial Society's Institute of Information Technology,Pune (50.00%)


