In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load the toy dataset
file_path = "neet_student_data_toy.xlsx"
df = pd.read_excel(file_path)

# Feature Engineering
df['correct_percentage'] = df['correct_answers'] / df['total_questions'] * 100
df['difficulty_adjusted_score'] = df['score'] * df['trophy_level']

features = ['score', 'accuracy', 'correct_percentage', 'difficulty_adjusted_score', 'trophy_level']
X = df[features]
y = df['neet_rank']

X = X.fillna(0)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Hyperparameter Tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None]
}


# Initialize RandomForestRegressor
rf = RandomForestRegressor(random_state=42)

# Grid search for best hyperparameters
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Get the best model from grid search
best_rf_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_rf_model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Best Hyperparameters: {grid_search.best_params_}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")

# Feature importance analysis (optional)
importances = best_rf_model.feature_importances_
feature_importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

print("\nFeature Importance:")
print(feature_importance_df)


FileNotFoundError: [Errno 2] No such file or directory: 'neet_student_data_toy.xlsx'

In [None]:
import streamlit as st
import numpy as np
import joblib

# Load the trained model
@st.cache_resource
def load_model():
    try:
        model = joblib.load("best_rf_model.pkl")
        return model
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None

model = load_model()

# Streamlit UI
st.title("🎯 NEET Rank Prediction")
st.markdown("Enter the following features to predict the NEET rank.")

# Input fields for only the selected 5 features
score = st.number_input("Score", min_value=0.0)
accuracy = st.number_input("Accuracy (0 to 1)", min_value=0.0, max_value=1.0, step=0.01)
correct_percentage = st.number_input("Correct Percentage (0 to 100)", min_value=0.0, max_value=100.0, step=0.1)
difficulty_adjusted_score = st.number_input("Difficulty Adjusted Score", min_value=0.0)
trophy_level = st.selectbox("Trophy Level", ["Bronze", "Silver", "Gold"])

# Encode trophy level
trophy_mapping = {"Bronze": 0, "Silver": 1, "Gold": 2}
trophy_level_encoded = trophy_mapping[trophy_level]

# Predict button
if st.button("Predict Rank"):
    if model:
        try:
            # Prepare input array with the selected features
            features = np.array([
                score, accuracy, correct_percentage, difficulty_adjusted_score, trophy_level_encoded
            ]).reshape(1, -1)

            # Make prediction
            predicted_rank = model.predict(features)[0]

            # Display prediction
            st.success(f"🏆 Predicted NEET Rank: {int(predicted_rank)}")

        except Exception as e:
            st.error(f"Prediction error: {e}")
    else:
        st.error("Model not loaded. Check if 'best_rf_model.pkl' exists.")


Predicted rank is  4110


  print("Predicted rank is ",int(predicted_neet_rank))


In [13]:
import pickle

# Save the model
with open("best_rf_model.pkl", "wb") as f:
    pickle.dump(best_rf_model, f)