In [1]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
# Load the merged data
merged_data = pd.read_csv("EngineeringRanking_Final.csv")

In [3]:
# Handling missing values by filling with 0
merged_data.fillna(0, inplace=True)

In [4]:
# Feature selection
features = ['Score_23', 'Score_22', 'Score_21', 'Score_20', 'Score_19', 'Score_18', 'Score_17', 'Score_16',
            'TLR_23', 'TLR_22', 'TLR_21', 'TLR_20', 'TLR_19', 'TLR_18', 'TLR_17', 'TLR_16',
            'RPC_23', 'RPC_22', 'RPC_21', 'RPC_20', 'RPC_19', 'RPC_18', 'RPC_17', 'RPC_16',
            'GO_23', 'GO_22', 'GO_21', 'GO_20', 'GO_19', 'GO_18', 'GO_17', 'GO_16',
            'OI_23', 'OI_22', 'OI_21', 'OI_20', 'OI_19', 'OI_18', 'OI_17', 'OI_16',
            'Perception_23', 'Perception_22', 'Perception_21', 'Perception_20', 'Perception_19',
            'Perception_18', 'Perception_17', 'Perception_16']
target = 'Rank_24'  # Predicting the rank for 2024

In [5]:
# Prepare the features (X) and target variable (y)
X = merged_data[features]
y = merged_data[target]

In [6]:
# Split the data into training (2016-2023) and testing (2024) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Train the model
model = XGBRegressor()
model.fit(X_train, y_train)

In [8]:
# Validation on 2023 data
merged_data.dropna(subset=['Rank_21'], inplace=True)  # Remove rows with missing target values
X_val = merged_data[features]
y_val_true = merged_data['Rank_21']

In [9]:
# Evaluate the model on the training set
y_train_pred = model.predict(X_train)
train_mse = mean_squared_error(y_train, y_train_pred)
train_mae = mean_absolute_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)
print(f"Training Set Performance:")
print(f"Mean Squared Error: {train_mse}")
print(f"Mean Absolute Error: {train_mae}")
print(f"R-squared: {train_r2}")

Training Set Performance:
Mean Squared Error: 0.0
Mean Absolute Error: 0.0
R-squared: 1.0


In [10]:
# Evaluate the model on the validation set
y_val_pred = model.predict(X_val)
val_mse = mean_squared_error(y_val_true, y_val_pred)
val_mae = mean_absolute_error(y_val_true, y_val_pred)
val_r2 = r2_score(y_val_true, y_val_pred)
print(f"\nValidation Set Performance:")
print(f"Mean Squared Error: {val_mse}")
print(f"Mean Absolute Error: {val_mae}")
print(f"R-squared: {val_r2}")


Validation Set Performance:
Mean Squared Error: 9830.307692307691
Mean Absolute Error: 73.58974358974359
R-squared: -1.2266422119559826


In [11]:
# Evaluate the model on the test set
y_test_pred = model.predict(X_test)
test_mse = mean_squared_error(y_test, y_test_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)
print(f"\nTest Set Performance:")
print(f"Mean Squared Error: {test_mse}")
print(f"Mean Absolute Error: {test_mae}")
print(f"R-squared: {test_r2}")


Test Set Performance:
Mean Squared Error: 0.0
Mean Absolute Error: 0.0
R-squared: 1.0


In [12]:
# Make predictions for 2024 data
y_pred = model.predict(X_test)

In [13]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [14]:
print(f"Mean Squared Error on Test Set (2024 data): {mse}")
print(f"Mean Absolute Error on Test Set (2024 data): {mae}")
print(f"R-squared on Test Set (2024 data): {r2}")

Mean Squared Error on Test Set (2024 data): 0.0
Mean Absolute Error on Test Set (2024 data): 0.0
R-squared on Test Set (2024 data): 1.0


In [15]:
# Get predictions for 2024 data
merged_data['Predicted_Rank_2024'] = model.predict(merged_data[['Score_23', 'Score_22', 'Score_21', 'Score_20', 'Score_19', 'Score_18', 'Score_17', 'Score_16',
            'TLR_23', 'TLR_22', 'TLR_21', 'TLR_20', 'TLR_19', 'TLR_18', 'TLR_17', 'TLR_16',
            'RPC_23', 'RPC_22', 'RPC_21', 'RPC_20', 'RPC_19', 'RPC_18', 'RPC_17', 'RPC_16',
            'GO_23', 'GO_22', 'GO_21', 'GO_20', 'GO_19', 'GO_18', 'GO_17', 'GO_16',
            'OI_23', 'OI_22', 'OI_21', 'OI_20', 'OI_19', 'OI_18', 'OI_17', 'OI_16',
            'Perception_23', 'Perception_22', 'Perception_21', 'Perception_20', 'Perception_19',
            'Perception_18', 'Perception_17', 'Perception_16']])
sorted_data_2024 = merged_data.sort_values(by='Predicted_Rank_2024')

# Displaying all predicted colleges for 2024
predicted_colleges_2024 = sorted_data_2024[['Institute Name', 'City', 'State', 'Predicted_Rank_2024']]
print("\nPredicted Colleges for 2024:")
print(predicted_colleges_2024)


Predicted Colleges for 2024:
                                        Institute Name                   City  \
0                Indian Institute of Technology Madras                Chennai   
172            Dr. D. Y. Patil Institute of Technology                   Pune   
173                          Guru Nanak Dev University               Amritsar   
174                Sri Ramakrishna Engineering College             Coimbatore   
175                         KIET Group of Institutions              Ghaziabad   
..                                                 ...                    ...   
88                                   Panjab University             Chandigarh   
87       Netaji Subhas University of Technology (NSUT)             South West   
86   Indian Institute of Information Technology All...  Prayagraj (Allahabad)   
92   Vel Tech Rangarajan Dr. Sagunthala R & D Insti...                Chennai   
272                K.S.Rangasamy College of Technology           Tiruchengode  