# Scholarship Seeker: 
### A Data-Driven Approach to Student Selection


In [1]:
import numpy as np

In [4]:
# Load the 'admission.csv' file into a structured NumPy array
# - dtype=None: Automatically determine the data type for each column
# - delimiter=',': Use comma as the column delimiter
# - skip_header=1: Skip the first row (header) in the CSV file
# - names=("Serial No.","GRE Score","TOEFL Score","University Rating","SOP","LOR ","CGPA","Research","Chance of Admit"):
#   Assign custom names to each column in the array

grad_students = np.genfromtxt('admission.csv',dtype=None, delimiter=',', skip_header=1,
                     names=("Serial No.","GRE Score","TOEFL Score","University Rating","SOP","LOR ","CGPA","Research","Chance of Admit"))
grad_students[:2]

array([(1, 337, 118, 4, 4.5, 4.5, 9.65, 1, 0.92),
       (2, 324, 107, 4, 4. , 4.5, 8.87, 1, 0.76)],
      dtype=[('Serial_No', '<i4'), ('GRE_Score', '<i4'), ('TOEFL_Score', '<i4'), ('University_Rating', '<i4'), ('SOP', '<f8'), ('LOR', '<f8'), ('CGPA', '<f8'), ('Research', '<i4'), ('Chance_of_Admit', '<f8')])

#### 1. What is the average GRE Score of the students in the dataset?



In [16]:
mean_gre_score = grad_students['GRE_Score'].mean()
print(f'The average GRE score of the students is {mean_gre_score}.')

The average GRE score of the students is 316.8075


#### 2. How many students have a TOEFL Score greater than 110?



In [17]:
t_score_gt_110 = len(grad_students[grad_students['TOEFL_Score']>110])
print(f'The number of students having TOEFL Score greater than 110 is {t_score_gt_110}.')

The number of students having TOEFL Score greater than 110 is 120.


#### 3. How many students have conducted research?



In [84]:
no_of_research_stds = len(grad_students[grad_students["Research"]==1])
print(f"Total of {no_of_research_stds} students conducted research.")

Total of 219 students conducted research.


#### 4. Identify the student with the highest Chance of Admit? What are their GRE scores?

In [85]:
highest_chance = grad_students[grad_students['Chance_of_Admit']==grad_students['Chance_of_Admit'].max()]

highest_chance_gre = highest_chance['GRE_Score']

print("Students who has the highest chance of admit: ")

print(highest_chance)

print(f"Their GRE scores are: {highest_chance_gre}")

Students who has the highest chance of admit: 
[( 25, 336, 119, 5, 4. , 3.5, 9.8 , 1, 0.97)
 (144, 340, 120, 4, 4.5, 4. , 9.92, 1, 0.97)
 (203, 340, 120, 5, 4.5, 4.5, 9.91, 1, 0.97)
 (204, 334, 120, 5, 4. , 5. , 9.87, 1, 0.97)]
Their GRE scores are: [336 340 340 334]


#### 5. Identify the students whose TOEFL score is greater than 110.
#### Among them, find the students who have the highest GRE scores. 
#### Ensurethat the top three students have a perfect SOP score. How would you achieve this using Python and NumPy?

In [86]:
# Students whose TOEFL SCORE greater than 110
# Among them find the students who have the highest GRE SCORE
# The top three students must have perfect sop Score
higher_toefl = grad_students[grad_students['TOEFL_Score']>=110]

higher_gre = higher_toefl[higher_toefl['GRE_Score']==higher_toefl["GRE_Score"].max()]

top_three = higher_gre[higher_gre['SOP']==higher_gre["SOP"].max()]

top_three = sorted(top_three, key=lambda x: x['CGPA'], reverse=1)

# after sorting the numpy array is turned to python list
# turning it back to numpy array

top_three = np.array(top_three)
print("These are the top three students: ")
print(top_three)

These are the top three students: 
[(385, 340, 113, 4, 5., 5. , 9.74, 1, 0.96)
 (285, 340, 112, 4, 5., 4.5, 9.66, 1, 0.94)
 ( 82, 340, 120, 4, 5., 5. , 9.5 , 1, 0.96)]
