In [2]:
print("project")

project


In [6]:
import pandas as pd

# Load your CSV file
df = pd.read_csv("jossa_combined.csv")

# Show the structure of the data
print(df.info())
print("\nFirst 5 rows:")
print(df.head())

# Show missing values
print("\nMissing values:")
print(df.isnull().sum())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45727 entries, 0 to 45726
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Institute              45727 non-null  object
 1   Academic Program Name  45727 non-null  object
 2   Quota                  45727 non-null  object
 3   Category               45727 non-null  object
 4   Gender                 45727 non-null  object
 5   Opening Rank           45727 non-null  object
 6   Closing Rank           45727 non-null  object
 7   Round                  45727 non-null  int64 
 8   Type Of University     45727 non-null  object
dtypes: int64(1), object(8)
memory usage: 3.1+ MB
None

First 5 rows:
                                     Institute  \
0  Indian Institute  of Technology Bhubaneswar   
1  Indian Institute  of Technology Bhubaneswar   
2  Indian Institute  of Technology Bhubaneswar   
3  Indian Institute  of Technology Bhubaneswar   
4  Indian I

In [12]:
# 1. Check unique values in University Type
print("Type Of University:", df['Type Of University'].unique())

# 2. Convert 'Opening Rank' and 'Closing Rank' to integers
# First, replace any non-numeric or missing entries
df['Opening Rank'] = pd.to_numeric(df['Opening Rank'], errors='coerce')
df['Closing Rank'] = pd.to_numeric(df['Closing Rank'], errors='coerce')

# Drop rows where ranks couldn't be converted
df = df.dropna(subset=['Opening Rank', 'Closing Rank'])

# Convert from float to int (now safe to do)
df['Opening Rank'] = df['Opening Rank'].astype(int)
df['Closing Rank'] = df['Closing Rank'].astype(int)

# 3. Check that all key columns have valid values
print("\nQuota:", df['Quota'].unique())
print("Category:", df['Category'].unique())
print("Gender:", df['Gender'].unique())
print("Round:", df['Round'].unique())
print("Type Of University:", df['Type Of University'].unique())


Type Of University: ['IIT' 'NIT']

Quota: ['AI' 'HS' 'OS' 'GO' 'JK' 'LA']
Category: ['OPEN' 'EWS' 'OBC-NCL' 'SC' 'ST' 'OPEN (PwD)' 'OBC-NCL (PwD)' 'EWS (PwD)'
 'SC (PwD)' 'ST (PwD)']
Gender: ['Gender-Neutral' 'Female-only (including Supernumerary)']
Round: [1 2 3 4 5]
Type Of University: ['IIT' 'NIT']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Opening Rank'] = pd.to_numeric(df['Opening Rank'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Closing Rank'] = pd.to_numeric(df['Closing Rank'], errors='coerce')


In [13]:
# Drop rows where Opening or Closing Rank is missing
df = df.dropna(subset=['Opening Rank', 'Closing Rank'])

# Confirm the shape after cleanup
print("Data shape after removing missing ranks:", df.shape)


Data shape after removing missing ranks: (44775, 9)


In [14]:
# 1. Check unique values in University Type
print("Type Of University:", df['Type Of University'].unique())

# 2. Convert 'Opening Rank' and 'Closing Rank' to integers
# First, replace any non-numeric or missing entries
df['Opening Rank'] = pd.to_numeric(df['Opening Rank'], errors='coerce')
df['Closing Rank'] = pd.to_numeric(df['Closing Rank'], errors='coerce')

# Drop rows where ranks couldn't be converted
df = df.dropna(subset=['Opening Rank', 'Closing Rank'])

# Convert from float to int (now safe to do)
df['Opening Rank'] = df['Opening Rank'].astype(int)
df['Closing Rank'] = df['Closing Rank'].astype(int)

# 3. Check that all key columns have valid values
print("\nQuota:", df['Quota'].unique())
print("Category:", df['Category'].unique())
print("Gender:", df['Gender'].unique())
print("Round:", df['Round'].unique())
print("Type Of University:", df['Type Of University'].unique())


Type Of University: ['IIT' 'NIT']

Quota: ['AI' 'HS' 'OS' 'GO' 'JK' 'LA']
Category: ['OPEN' 'EWS' 'OBC-NCL' 'SC' 'ST' 'OPEN (PwD)' 'OBC-NCL (PwD)' 'EWS (PwD)'
 'SC (PwD)' 'ST (PwD)']
Gender: ['Gender-Neutral' 'Female-only (including Supernumerary)']
Round: [1 2 3 4 5]
Type Of University: ['IIT' 'NIT']


In [15]:
def predict_colleges(df, category, gender, quota, round_num, university_type):
    # Apply filters step by step
    filtered = df[
        (df['Category'] == category) &
        (df['Gender'] == gender) &
        (df['Quota'] == quota) &
        (df['Round'] == round_num) &
        (df['Type Of University'] == university_type)
    ]

    # Sort by Opening Rank (ascending) and show top 10
    result = filtered.sort_values(by='Opening Rank').head(10)
    return result


In [16]:
def predict_colleges(
    df,
    category=None,
    gender=None,
    quota=None,
    round_num=None,
    university_type=None,
    min_rank=None,
    max_rank=None,
    preferred_course=None,
    preferred_state=None
):
    filtered = df.copy()

    # Apply filters one by one, only if the user provides the value
    if category:
        filtered = filtered[filtered['Category'] == category]

    if gender:
        filtered = filtered[filtered['Gender'] == gender]

    if quota:
        filtered = filtered[filtered['Quota'] == quota]

    if round_num:
        filtered = filtered[filtered['Round'] == round_num]

    if university_type:
        filtered = filtered[filtered['Type Of University'] == university_type]

    if min_rank is not None:
        filtered = filtered[filtered['Closing Rank'] >= min_rank]

    if max_rank is not None:
        filtered = filtered[filtered['Closing Rank'] <= max_rank]

    if preferred_course:
        filtered = filtered[filtered['Academic Program Name'].str.contains(preferred_course, case=False)]

    if preferred_state:
        filtered = filtered[filtered['Institute'].str.contains(preferred_state, case=False)]

    # Sort by Opening Rank for relevance and show top 20
    result = filtered.sort_values(by='Opening Rank').head(20)
    return result


In [22]:
predict_colleges(
    df,
    category="OBC-NCL",
    gender="Gender-Neutral",
    quota="HS",
    round_num=4,
    university_type="NIT",
    min_rank=5000,
    max_rank=15000,
   )


Unnamed: 0,Institute,Academic Program Name,Quota,Category,Gender,Opening Rank,Closing Rank,Round,Type Of University
36206,"Visvesvaraya National Institute of Technology,...",Electronics and Communication Engineering (4 Y...,HS,OBC-NCL,Gender-Neutral,2810,5027,4,NIT
32449,National Institute of Technology Hamirpur,"Architecture (5 Years, Bachelor of Architecture)",HS,OBC-NCL,Gender-Neutral,2863,12832,4,NIT
34010,"National Institute of Technology, Kurukshetra",Artificial Intelligence and Machine Learning (...,HS,OBC-NCL,Gender-Neutral,3420,6305,4,NIT
32233,National Institute of Technology Durgapur,"Computer Science and Engineering (4 Years, Bac...",HS,OBC-NCL,Gender-Neutral,3463,8278,4,NIT
32022,National Institute of Technology Delhi,"Computer Science and Engineering (4 Years, Bac...",HS,OBC-NCL,Gender-Neutral,3664,5688,4,NIT
32876,"National Institute of Technology Karnataka, Su...","Mechanical Engineering (4 Years, Bachelor of T...",HS,OBC-NCL,Gender-Neutral,3853,10194,4,NIT
35799,Sardar Vallabhbhai National Institute of Techn...,"Artificial Intelligence (4 Years, Bachelor of ...",HS,OBC-NCL,Gender-Neutral,3888,5484,4,NIT
35739,"National Institute of Technology, Warangal","Mechanical Engineering (4 Years, Bachelor of T...",HS,OBC-NCL,Gender-Neutral,3896,5817,4,NIT
36347,"National Institute of Technology, Andhra Pradesh","Computer Science and Engineering (4 Years, Bac...",HS,OBC-NCL,Gender-Neutral,3898,6347,4,NIT
31205,Maulana Azad National Institute of Technology ...,Electronics and Communication Engineering (4 Y...,HS,OBC-NCL,Gender-Neutral,4032,6565,4,NIT


In [23]:
def college_predictor(df, category=None, gender=None, quota=None, round_no=None,
                      university_type=None, min_rank=None, max_rank=None,
                      course_keyword=None, state_keyword=None):
    """
    Filters the JoSAA dataset based on user preferences.
    All parameters are optional. Only non-None filters are applied.
    """
    filtered_df = df.copy()

    if category:
        filtered_df = filtered_df[filtered_df['Category'] == category]
    if gender:
        filtered_df = filtered_df[filtered_df['Gender'] == gender]
    if quota:
        filtered_df = filtered_df[filtered_df['Quota'] == quota]
    if round_no:
        filtered_df = filtered_df[filtered_df['Round'] == round_no]
    if university_type:
        filtered_df = filtered_df[filtered_df['Type Of University'] == university_type]
    if min_rank is not None:
        filtered_df = filtered_df[filtered_df['Closing Rank'] >= min_rank]
    if max_rank is not None:
        filtered_df = filtered_df[filtered_df['Closing Rank'] <= max_rank]
    if course_keyword:
        filtered_df = filtered_df[filtered_df['Academic Program Name'].str.contains(course_keyword, case=False)]
    if state_keyword:
        filtered_df = filtered_df[filtered_df['Institute'].str.contains(state_keyword, case=False)]

    return filtered_df.sort_values(by="Closing Rank").reset_index(drop=True)


In [30]:
result = college_predictor(
    df,
    category="OBC-NCL",
    gender="Gender-Neutral",
    quota="HS",
    round_no=5,
    university_type="NIT",
    min_rank=500,
    max_rank=8000,
)
print(result.head(10))


                                           Institute  \
0         National Institute of Technology, Warangal   
1         National Institute of Technology, Warangal   
2  Visvesvaraya National Institute of Technology,...   
3         National Institute of Technology, Warangal   
4         National Institute of Technology, Rourkela   
5  National Institute of Technology, Tiruchirappalli   
6  National Institute of Technology Karnataka, Su...   
7         National Institute of Technology, Warangal   
8   Malaviya National Institute of Technology Jaipur   
9         National Institute of Technology, Warangal   

                               Academic Program Name Quota Category  \
0  Computer Science and Engineering (4 Years, Bac...    HS  OBC-NCL   
1  Computer Science and Engineering ( Artificial ...    HS  OBC-NCL   
2  Architecture  (5 Years, Bachelor of Architecture)    HS  OBC-NCL   
3  Mathematics and Computing (4 Years, Bachelor o...    HS  OBC-NCL   
4  Architecture  (5 Years, B