In [None]:
#Load the necessary libraries:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
#Load the data:
data = pd.read_csv("EngineeringRanking_Final.csv")
data.fillna(0, inplace=True)

In [None]:
#Convert non-numeric columns to numeric types:
numeric_columns = ['Score_23', 'Score_22', 'Score_21', 'Score_20', 'Score_19', 'Score_18', 'Score_17', 'Score_16',
            'TLR_23', 'TLR_22', 'TLR_21', 'TLR_20', 'TLR_19', 'TLR_18', 'TLR_17', 'TLR_16',
            'RPC_23', 'RPC_22', 'RPC_21', 'RPC_20', 'RPC_19', 'RPC_18', 'RPC_17', 'RPC_16',
            'GO_23', 'GO_22', 'GO_21', 'GO_20', 'GO_19', 'GO_18', 'GO_17', 'GO_16',
            'OI_23', 'OI_22', 'OI_21', 'OI_20', 'OI_19', 'OI_18', 'OI_17', 'OI_16',
            'Perception_23', 'Perception_22', 'Perception_21', 'Perception_20', 'Perception_19',
            'Perception_18', 'Perception_17', 'Perception_16']
data[numeric_columns] = data[numeric_columns].apply(pd.to_numeric, errors='coerce')

In [None]:
#Select the features and target variable:
features = ['Score_23', 'Score_22', 'Score_21', 'Score_20', 'Score_19', 'Score_18', 'Score_17', 'Score_16',
            'TLR_23', 'TLR_22', 'TLR_21', 'TLR_20', 'TLR_19', 'TLR_18', 'TLR_17', 'TLR_16',
            'RPC_23', 'RPC_22', 'RPC_21', 'RPC_20', 'RPC_19', 'RPC_18', 'RPC_17', 'RPC_16',
            'GO_23', 'GO_22', 'GO_21', 'GO_20', 'GO_19', 'GO_18', 'GO_17', 'GO_16',
            'OI_23', 'OI_22', 'OI_21', 'OI_20', 'OI_19', 'OI_18', 'OI_17', 'OI_16',
            'Perception_23', 'Perception_22', 'Perception_21', 'Perception_20', 'Perception_19',
            'Perception_18', 'Perception_17', 'Perception_16']
X = data[features]
y = data['Rank_24']

In [None]:
#Split the data into training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#Initialize an XGBoost regressor model and fit it to the training data:
model = XGBRegressor()
model.fit(X_train, y_train)

In [None]:
# Prepare the data for the year 2024 (where the Rank_23 is NaN)
X_2024 = data.loc[data['Rank_23'].isnull(), features]

In [None]:
# Make predictions using the trained model
y_2024_pred = model.predict(X_2024)

In [None]:
# Update the DataFrame with the predicted ranks for the year 2024
data.loc[data['Rank_23'].isnull(), 'Rank_24'] = y_2024_pred

In [None]:
# Extract the predicted ranks for the year 2024 and print them
predicted_ranks_2024 = data.loc[data['Rank_23'].isnull(), ['Institute Name', 'City', 'State', 'Rank_23']]
print("Predicted Ranks for the Year 2024:")
print(predicted_ranks_2024)

In [None]:
# Save the predicted ranks for the year 2024 to a new Excel file
predicted_ranks_2016_2024 = data.loc[:, ['Institute Id', 'Institute Name', 'City', 'State', 'Rank_16', 'Rank_17', 'Rank_18', 'Rank_19', 'Rank_20', 'Rank_21', 'Rank_22', 'Rank_23', 'Rank_24']]
predicted_ranks_2016_2024.to_excel('predicted_ranks_2016_2024.xlsx', index=False)