In [1]:
!pip install xgboost



In [2]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
# Load the data
data = pd.read_csv("EngineeringRanking_Final.csv")

In [4]:
# Handling missing values by filling with 0
data.fillna(0, inplace=True)

In [5]:
# Remove rows with missing target variable (Rank_21)
data.dropna(subset=['Rank_21'], inplace=True)

In [6]:
# Feature selection
features = ['Score_21', 'Rank_21', 'TLR_21', 'RPC_21', 'GO_21', 'OI_21', 'Perception_21']
X = data[features]
y = data['Rank_21']  # Target variable for prediction

In [7]:
# Splitting data into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [8]:
# Train the model
model = XGBRegressor()
model.fit(X_train, y_train)

In [9]:
# Evaluate the model on training set
y_train_pred = model.predict(X_train)
train_mse = mean_squared_error(y_train, y_train_pred)
train_mae = mean_absolute_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)
print("Training Set Evaluation:")
print(f"Mean Squared Error on Training Set: {train_mse}")
print(f"Mean Absolute Error on Training Set: {train_mae}")
print(f"R-squared on Training Set: {train_r2}\n")

Training Set Evaluation:
Mean Squared Error on Training Set: 1.219778358207069e-06
Mean Absolute Error on Training Set: 0.0006720243934649295
R-squared on Training Set: 0.9999999997246309



In [10]:
# Evaluate the model on validation set
y_val_pred = model.predict(X_val)
val_mse = mean_squared_error(y_val, y_val_pred)
val_mae = mean_absolute_error(y_val, y_val_pred)
val_r2 = r2_score(y_val, y_val_pred)
print("\nValidation Set Evaluation:")
print(f"Mean Squared Error on Validation Set: {val_mse}")
print(f"Mean Absolute Error on Validation Set: {val_mae}")
print(f"R-squared on Validation Set: {val_r2}")


Validation Set Evaluation:
Mean Squared Error on Validation Set: 6.117125039838679
Mean Absolute Error on Validation Set: 1.74941961963758
R-squared on Validation Set: 0.9984773183694151


In [11]:
# Evaluate the model on testing set
y_test_pred = model.predict(X_test)
test_mse = mean_squared_error(y_test, y_test_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)
print("\nTesting Set Evaluation:")
print(f"Mean Squared Error on Testing Set: {test_mse}")
print(f"Mean Absolute Error on Testing Set: {test_mae}")
print(f"R-squared on Testing Set: {test_r2}")


Testing Set Evaluation:
Mean Squared Error on Testing Set: 3.605556830232109
Mean Absolute Error on Testing Set: 1.3088502148263532
R-squared on Testing Set: 0.999227163190001


In [12]:
# Check for overfitting or underfitting
if train_r2 > val_r2:
    print("\nModel is overfitting (training R-squared > validation R-squared)")
elif train_r2 < val_r2:
    print("\nModel is underfitting (training R-squared < validation R-squared)")
else:
    print("\nModel is performing well (training R-squared == validation R-squared)")


Model is overfitting (training R-squared > validation R-squared)


In [13]:
# Get predictions for 2023 data
data.dropna(subset=['Rank_21'], inplace=True)  # Remove rows with missing target values
X_val = data[features]
y_val_true = data['Rank_21']
y_val_pred = model.predict(X_val)
data['Predicted_Rank_2023'] = y_val_pred
sorted_data_2023 = data.sort_values(by='Predicted_Rank_2023')

In [14]:
# Displaying the predicted Rank_23 for colleges
Predicted_Rank_23 = sorted_data_2023[['Institute Name', 'City', 'State', 'Rank_21', 'Predicted_Rank_2023']]
print("\nPredicted Rank for 2023:")
print(Predicted_Rank_23)


Predicted Rank for 2023:
                                   Institute Name          City  \
272           K.S.Rangasamy College of Technology  Tiruchengode   
220       Pimpri Chinchwad College of Engineering          Pune   
221           Ramrao Adik Institute of Technology   Navi Mumbai   
222                               BVRIT Hyderabad     Hyderabad   
223  National Institute of Industrial Engineering        Mumbai   
..                                            ...           ...   
195         The National Institute of Engineering        Mysore   
197    Kakatiya Institute of Technology & Science      Warangal   
196          K. J. Somaiya College of Engineering        Mumbai   
199                   Sri Venkateswara University      Tirupati   
198               Walchand College of Engineering        Sangli   

              State  Rank_21  Predicted_Rank_2023  
272      Tamil Nadu      0.0             0.000002  
220     Maharashtra      0.0             0.000002  
221     Mahar

In [15]:
#Accuracy:

# Calculate accuracy
threshold = 1  # Define the threshold for accurate predictions (e.g., within ±1 rank)

# Count the number of accurate predictions
accurate_predictions = np.sum(np.abs(y_val_true - y_val_pred) <= threshold)

# Total number of predictions
total_predictions = len(y_val_true)

# Calculate accuracy
accuracy = (accurate_predictions / total_predictions) * 100
print(f"Accuracy on Validation Set (2023 data): {accuracy:.2f}%")


Accuracy on Validation Set (2023 data): 78.39%


In [16]:
# DataFrame to compare actual and predicted ranks
comparison_df = pd.DataFrame({
    'Institute Name': data['Institute Name'],
    'City': data['City'],
    'State': data['State'],
    'Actual Rank 2023': y_val_true,
    'Predicted Rank 2023': y_val_pred.round().astype(int)  # Round predicted ranks to the nearest integer
})

# Display the comparison DataFrame
print("Comparison of Actual and Predicted Ranks for 2023:")
print(comparison_df)


Comparison of Actual and Predicted Ranks for 2023:
                                Institute Name          City          State  \
0        Indian Institute of Technology Madras       Chennai     Tamil Nadu   
1         Indian Institute of Technology Delhi     New Delhi          Delhi   
2        Indian Institute of Technology Bombay        Mumbai    Maharashtra   
3        Indian Institute of Technology Kanpur        Kanpur  Uttar Pradesh   
4     Indian Institute of Technology Kharagpur     Kharagpur    West Bengal   
..                                         ...           ...            ...   
268          C.V. Raman College of Engineering   Bhubneshwar         Odisha   
269  Maharashtra Institute of Technology, Pune          Pune    Maharashtra   
270            Sri Sai Ram Engineering College       Chennai     Tamil Nadu   
271        ST. Joseph's College of Engineering       Chennai     Tamil Nadu   
272        K.S.Rangasamy College of Technology  Tiruchengode     Tamil Nadu   



# CLUSTERING ALGORITHM

In [17]:
import pandas as pd

In [18]:
data = pd.read_csv("EngineeringRanking_Final.csv")
data.fillna(0, inplace=True)
data.tail()

Unnamed: 0,Institute Id,Institute Name,City,State,Rank_24,Score_23,Rank_23,TLR_23,RPC_23,GO_23,...,GO_17,OI_17,Perception_17,Score_16,Rank_16,TLR_16,RPC_16,GO_16,OI_16,Perception_16
268,IR17-ENGG-1-26228,C.V. Raman College of Engineering,Bhubneshwar,Odisha,0.0,0.0,0.0,0.0,0.0,0.0,...,49.95,72.48,21.66,0.0,0,0.0,0.0,0.0,0.0,0.0
269,IR17-ENGG-2-10476,"Maharashtra Institute of Technology, Pune",Pune,Maharashtra,0.0,0.0,0.0,0.0,0.0,0.0,...,52.13,63.39,1.46,0.0,0,0.0,0.0,0.0,0.0,0.0
270,IR17-ENGG-2-12411,Sri Sai Ram Engineering College,Chennai,Tamil Nadu,0.0,0.0,0.0,0.0,0.0,0.0,...,59.32,68.75,4.86,0.0,0,0.0,0.0,0.0,0.0,0.0
271,IR17-ENGG-2-12581,ST. Joseph's College of Engineering,Chennai,Tamil Nadu,0.0,0.0,0.0,0.0,0.0,0.0,...,49.39,64.92,2.8,0.0,0,0.0,0.0,0.0,0.0,0.0
272,IR17-ENGG-2-1-2810997882,K.S.Rangasamy College of Technology,Tiruchengode,Tamil Nadu,0.0,0.0,0.0,0.0,0.0,0.0,...,46.44,60.98,5.85,0.0,0,0.0,0.0,0.0,0.0,0.0


In [19]:
def display_city_list(state):
    cities = data[data['State'] == state]['City'].unique()
    print("\nCities in", state, "are:")
    for city in cities:
        print(city)

def display_state_list():
    states = data['State'].unique()
    print("States with institutes are:")
    for state in states:
        print(state)
        
def display_institutes_by_city(city):
    city_data = data[data['City'] == city]
    if len(city_data) == 0:
        print("No institutes found in", city)
        return
    print("\nInstitutes in", city, "from 2016 to 2021:")
    print('-' * 150)
    print("{:<70} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
        "Institute Name", "2016", "2017", "2018", "2019", "2020", "2021"))
    print('-' * 150)
    for index, row in city_data.iterrows():
        print("{:<70} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
            row['Institute Name'],
            row['Rank_16'], row['Rank_17'], row['Rank_18'], row['Rank_19'], row['Rank_20'], row['Rank_21']
        ))
    print('-' * 150)


# Main program
display_state_list()
state = input("Enter state: ")
display_city_list(state)
city = input("Enter City: ")
display_institutes_by_city(city)

States with institutes are:
Tamil Nadu
Delhi
Maharashtra
Uttar Pradesh
West Bengal
Uttarakhand
Assam
Telangana
Karnataka
Jharkhand
Madhya Pradesh
Punjab
Odisha
Bihar
Gujarat
Kerala
Rajasthan
Himachal Pradesh
Haryana
Andhra Pradesh
Meghalaya
Chhattisgarh
Jammu and Kashmir
Chandigarh
Goa
Tripura
Manipur
Pondicherry
Arunachal Pradesh
Andaman and Nicobar Islands
Enter state: Maharashtra

Cities in Maharashtra are:
Mumbai
Nagpur
Pune
Sangli
Navi Mumbai
Mumbai Suburban
Enter City: Mumbai

Institutes in Mumbai from 2016 to 2021:
------------------------------------------------------------------------------------------------------------------------------------------------------
Institute Name                                                         2016       2017       2018       2019       2020       2021      
------------------------------------------------------------------------------------------------------------------------------------------------------
Indian Institute of Technology Bo

# Prediction of Rank_24

In [20]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.ensemble import RandomForestRegressor

In [21]:
# Load the data into a pandas DataFrame
data = pd.read_csv("EngineeringRanking_Final.csv")

In [22]:
# Replace all NaN values with 0
data.fillna(0, inplace=True)

In [23]:
# Reshape the data into a time series format
time_series_data = data.melt(id_vars=['Institute Id', 'Institute Name', 'City', 'State'],
                             value_vars=['Rank_23', 'Score_23', 'TLR_23', 'RPC_23', 'GO_23', 'OI_23', 'Perception_23'],
                             var_name='Metric', value_name='Value')

In [24]:
# Group the data by institute and create a time series for each institute
grouped = time_series_data.groupby(['Institute Id', 'Institute Name', 'City', 'State', 'Metric'])

In [25]:
# Train an ARIMA model for each institute's time series
forecasts = []
for group_name, group in grouped:
    institute_id, institute_name, city, state, metric = group_name
    if metric == 'Rank':
        model = ARIMA(group['Value'], order=(1, 1, 1))
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=1)
        forecasts.append([institute_id, institute_name, city, state, metric, forecast[0]])

In [26]:
# Alternatively, train a Random Forest Regression model
features = ['Score_23', 'TLR_23', 'RPC_23', 'GO_23', 'OI_23', 'Perception_23']
target = 'Rank_23'
X = data[features]
y = data[target]
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)
predictions = model.predict(X)

In [27]:
# Round the predictions to the nearest whole number
predictions_rounded = [round(pred) for pred in predictions]

In [28]:
# Combine the predictions for 2024
predictions_2024 = pd.DataFrame({'Institute Id': data['Institute Id'],
                                  'Institute Name': data['Institute Name'],
                                  'City': data['City'],
                                  'State': data['State'],
                                  'Predicted Rank 2024': predictions_rounded})

print(predictions_2024)


                 Institute Id                             Institute Name  \
0                 IR-E-U-0456      Indian Institute of Technology Madras   
1                 IR-E-I-1074       Indian Institute of Technology Delhi   
2                 IR-E-U-0306      Indian Institute of Technology Bombay   
3                 IR-E-I-1075      Indian Institute of Technology Kanpur   
4                 IR-E-U-0573   Indian Institute of Technology Kharagpur   
..                        ...                                        ...   
268         IR17-ENGG-1-26228          C.V. Raman College of Engineering   
269         IR17-ENGG-2-10476  Maharashtra Institute of Technology, Pune   
270         IR17-ENGG-2-12411            Sri Sai Ram Engineering College   
271         IR17-ENGG-2-12581        ST. Joseph's College of Engineering   
272  IR17-ENGG-2-1-2810997882        K.S.Rangasamy College of Technology   

             City          State  Predicted Rank 2024  
0         Chennai     Tamil Nad

In [29]:
# Save predictions_2024 into a CSV file
predictions_2024.to_csv("Predictions of Rank_24.csv", index=False)