In [6]:
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import pandas as pd
import time
from sklearn.preprocessing import MinMaxScaler
import uuid

# ----------------------------- User Inputs --------------------------------
stream = input("Enter your stream: ")
branches = input("Enter 5 preferred branches (comma-separated): ").split(",")
branches = [b.strip() for b in branches[:5]]  # Limit to 5
your_location = input("Enter your location: ")
family_income = int(input("Enter your family income: "))
inc_Scale = float(input("Enter your flexibility scale (1-5) for fees: "))
pref_Scale = float(input("Enter your flexibility scale (1-5) for location: "))
rank_scale = float(input("Enter your flexibility scale (1-5) for rank: "))
rating_scale = float(input("Enter your flexibility scale (1-5) for rating: "))
preferred_locations = input("Enter your preferred locations (comma-separated): ").split(",")
preferred_locations = [loc.strip() for loc in preferred_locations]


In [7]:

# ----------------------------- Load Data ----------------------------------
df = pd.read_csv('C:/Users/admin/Desktop/career/careerpeek.ai/engineering.csv')
df['First_Year_Fees'] = df['First_Year_Fees'].astype(str).str.replace(',', '').astype(float)
df['Cutoff'] = pd.to_numeric(df['Cutoff'], errors='coerce')
df['Income_Diff'] = abs(df['First_Year_Fees'] - family_income)

# Normalization
scaler = MinMaxScaler()
columns_to_normalize = ['First_Year_Fees', 'Average Package', 'Highest Package', 'National Rank']
df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])


In [8]:
def calculate_cost_matrix(df, branch=None):
    # If branch is specified, filter colleges offering that branch
    
    df_filtered = df

    geolocator = Nominatim(user_agent=f"distance_calculator_{uuid.uuid4()}", timeout=5)
    location_coords = {}
    for loc in preferred_locations:
        try:
            location = geolocator.geocode(loc)
            if location:
                location_coords[loc] = (location.latitude, location.longitude)
        except:
            pass
        time.sleep(1)

    distance_matrix = []
    for index, row in df_filtered.iterrows():
        row_distances = []
        try:
            college_loc = geolocator.geocode(row['City'])
            if college_loc is None:
                row_distances = [None] * len(preferred_locations)
            else:
                college_coords = (college_loc.latitude, college_loc.longitude)
                for loc in preferred_locations:
                    if loc in location_coords:
                        dist = geodesic(location_coords[loc], college_coords).kilometers
                        row_distances.append(dist)
                    else:
                        row_distances.append(None)
        except:
            row_distances = [None] * len(preferred_locations)
        distance_matrix.append(row_distances)
        time.sleep(1)

    ds = pd.DataFrame(distance_matrix, columns=preferred_locations, index=df_filtered.index)
    ds.fillna(ds.max().max(), inplace=True)
    ds_normalized = pd.DataFrame(scaler.fit_transform(ds), columns=preferred_locations, index=df_filtered.index)
    df_filtered['Avg_Location_Distance'] = ds_normalized.mean(axis=1)
    df_filtered['Normalized_Fees'] = scaler.fit_transform(df_filtered[['Income_Diff']])

    # Normalize cutoff inversely
    df_filtered['Cutoff'] = df_filtered['Cutoff'].fillna(df_filtered['Cutoff'].max())
    df_filtered['Cutoff_Score'] = (df_filtered['Cutoff'].max() - df_filtered['Cutoff']) / (df_filtered['Cutoff'].max() - df_filtered['Cutoff'].min())

    # Calculate heuristic
    df_filtered['Heuristic'] = (rating_scale * df_filtered['Rating'] + rank_scale * df_filtered['National Rank'] + df_filtered['Average Package']) * 0.04

    cost_matrix = []
    for idx, row in df_filtered.iterrows():
        costy = (
            pref_Scale * row['Avg_Location_Distance'] +
            inc_Scale * row['Normalized_Fees'] +
            0.2 * (1 - row['Cutoff_Score'])
        )
        if costy <= row['Heuristic']:
            costy = row['Heuristic'] + 0.1

        cost_matrix.append([row['College'], round(costy, 4), round(row['Heuristic'], 4)])

    cost_matrix = pd.DataFrame(cost_matrix, columns=['College', 'Cost', 'Heuristic'])
    cost_matrix['Total'] = cost_matrix['Cost'] + cost_matrix['Heuristic']
    sorted_colleges = cost_matrix.sort_values(by='Total').head(10)

    if not sorted_colleges.empty:
        if branch:
            print(f"\n Top 10 Colleges Offering {branch} Based on Cost and Heuristic:\n")
        else:
            print(f"\n Top 10 Colleges Based on Cost and Heuristic:\n")
        print(sorted_colleges)

    return sorted_colleges

In [9]:
for branch in branches:
    top_colleges = calculate_cost_matrix(df, branch)



 Top 10 Colleges Offering AI/ML Based on Cost and Heuristic:

                                              College    Cost  Heuristic  \
25  The LNM Institute of Information Technology - ...  0.7840     0.6840   
2                           BITS South Goa, South Goa  0.7836     0.7222   
0                 BITS Pilani (Pilani Campus), Pilani  0.8440     0.7440   
14  Thapar Institute of Engineering and Technology...  0.9526     0.6629   
40  Kalinga Institute of Industrial Technology - K...  0.8956     0.7364   
3                           BITS Hyderabad, Hyderabad  0.8671     0.7671   
23             Amrita Vishwa Vidyapeetham, Coimbatore  1.0174     0.6585   
35         Birla Institute of Technology - BIT, Patna  1.2813     0.6500   
9   Birla Institute of Technology - BIT Mesra, Ranchi  1.3634     0.6278   
24  K. J. Somaiya School of Engineering - KJSCE, M...  1.1795     0.9171   

     Total  
25  1.4680  
2   1.5058  
0   1.5880  
14  1.6155  
40  1.6320  
3   1.6342  
23  1.675