Importing the dependencies

In [21]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle

In [22]:
# loading the dataset to a pandas dataframe
dataset = pd.read_csv('/content/scholarship_data.csv')

In [23]:
# printing the first 5 rows of the dataset
dataset.head()

Unnamed: 0,ID,State,Name,Category,Income,Qualification,Description,LINKS,Type
0,1,West Bengal,Swami Vivekananda Merit-cum-Means Scholarship,All,0.0,B. Tech,Financial aid for meritorious and economically...,svmcm.wb.gov.in,Government
1,2,West Bengal,Hindi Scholarship Scheme,All,0.0,12th,Supports study of Hindi in non-Hindi speaking ...,wbchse.wb.gov.in,Government
2,3,West Bengal,Sikshashree Scheme,SC/ST,0.0,Photography Courses,Financial help for SC/ST students in primary s...,wb.gov.in,Government
3,4,West Bengal,Pre-Matric Scholarship for SC/ST,SC/ST,200000.0,9th,Reduces dropout rates among SC/ST students by ...,oasis.gov.in,Government
4,5,West Bengal,Post-Matric Scholarship for SC/ST/OBC,SC/ST/OBC,250000.0,11th,Aids SC/ST/OBC students pursuing post-secondar...,oasis.gov.in,Government


In [24]:
# printing the last 5 rows of the dataset
dataset.tail()

Unnamed: 0,ID,State,Name,Category,Income,Qualification,Description,LINKS,Type
644,645,Bihar,Bihar Pre-Matric Scholarship for SC/ST/OBC/Min...,sc/st/obc,500000.0,SSC,Backward and Extremely Backward Class Welfare ...,https://pmsonline.bih.nic.in/,Government
645,646,Bihar,Bihar Chief Minister's Scholarship Scheme,all,80000.0,undergraduation,"Education Department, Government of Bihar",https://pmsonline.bih.nic.in/,Government
646,647,Bihar,Mukhyamantri Balak/Balika Protsahan Yojana,all,250000.0,ssc,"Education Department, Government of Bihar",https://pmsonline.bih.nic.in/,Government
647,648,Bihar,Chief Minister Medhaavi Vidyarthi Yojana,sc/st/obc,50000.0,12th,Backward and Extremely Backward Class Welfare ...,https://pmsonline.bih.nic.in/,Government
648,649,Bihar,Mukhyamantri Kanya Utthan Yojana,sc/st/obc,50000.0,12th,Backward and Extremely Backward Class Welfare ...,https://pmsonline.bih.nic.in/,Government


In [25]:
# number of datapoints and number of features
dataset.shape

(649, 9)

In [26]:
# getting some information about the dataset
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 649 entries, 0 to 648
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ID             649 non-null    int64  
 1   State          647 non-null    object 
 2   Name           647 non-null    object 
 3   Category       647 non-null    object 
 4   Income         647 non-null    float64
 5   Qualification  643 non-null    object 
 6   Description    647 non-null    object 
 7   LINKS          647 non-null    object 
 8   Type           647 non-null    object 
dtypes: float64(1), int64(1), object(7)
memory usage: 45.8+ KB


In [27]:
# chacking for missing values
dataset.isnull().sum()

Unnamed: 0,0
ID,0
State,2
Name,2
Category,2
Income,2
Qualification,6
Description,2
LINKS,2
Type,2


Handling Missing Values

In [28]:
dataset['Income'].fillna(dataset['Income'].mean(), inplace=True)
dataset['Category'].fillna(dataset['Category'].mode()[0], inplace=True)
dataset['Qualification'].fillna(dataset['Qualification'].mode()[0], inplace=True)
dataset['Type'].fillna(dataset['Type'].mode()[0], inplace=True)
dataset['State'].fillna(dataset['State'].mode()[0], inplace=True)
dataset = dataset.dropna(subset=['Name'])

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset['Income'].fillna(dataset['Income'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset['Category'].fillna(dataset['Category'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the interme

In [29]:
# chacking for missing values
dataset.isnull().sum()

Unnamed: 0,0
ID,0
State,0
Name,0
Category,0
Income,0
Qualification,0
Description,0
LINKS,0
Type,0


Label Encoding of Categorical Features

In [30]:
le_state = LabelEncoder()
le_category = LabelEncoder()
le_qualification = LabelEncoder()
le_type = LabelEncoder()

In [31]:
dataset['State'] = le_state.fit_transform(dataset['State'])
dataset['Category'] = le_category.fit_transform(dataset['Category'])
dataset['Qualification'] = le_qualification.fit_transform(dataset['Qualification'])
dataset['Type'] = le_type.fit_transform(dataset['Type'])

Feature Selection

In [32]:
X = dataset[['State', 'Category', 'Qualification', 'Income', 'Type']]
y = dataset['Name']

Train/Test Split

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Model Training

In [34]:
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

Accuracy

In [35]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 0.84


Save Model and Encoders

In [36]:
with open('saved_model.sav', 'wb') as f:
    pickle.dump((model, le_state, le_category, le_qualification, le_type), f)

print("saved_model.sav created!")

saved_model.sav created!


Prediction Function

In [37]:
def predict_scholarship(state, category, qualification, income, type_):
    try:
        input_data = [[
            le_state.transform([state])[0],
            le_category.transform([category])[0],
            le_qualification.transform([qualification])[0],
            income,
            le_type.transform([type_])[0]
        ]]
        predicted_name = model.predict(input_data)[0]
        result = dataset[dataset['Name'] == predicted_name].iloc[0]
        return {
            'Name': result['Name'],
            'Description': result['Description'],
            'LINKS': result['LINKS']
        }
    except ValueError as e:
        return {"error": str(e)}


Test Prediction

In [38]:
print("Test:", predict_scholarship("West Bengal", "All", "B. Tech", 0, "Government"))

Test: {'Name': 'Swami Vivekananda Merit-cum-Means Scholarship', 'Description': 'Financial aid for meritorious and economically weak students for higher studies.', 'LINKS': 'svmcm.wb.gov.in'}


