In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("Admission_Predict.csv", index_col=0)
df.columns = [x.lower().strip() for x in df.columns]
print(df.head())

            gre score  toefl score  university rating  sop  lor  cgpa  \
Serial No.                                                              
1                 337          118                  4  4.5  4.5  9.65   
2                 324          107                  4  4.0  4.5  8.87   
3                 316          104                  3  3.0  3.5  8.00   
4                 322          110                  3  3.5  2.5  8.67   
5                 314          103                  2  2.0  3.0  8.21   

            research  chance of admit  
Serial No.                             
1                  1             0.92  
2                  1             0.76  
3                  1             0.72  
4                  1             0.80  
5                  0             0.65  


In [4]:
admit_mask = df["chance of admit"] > 0.7
print(admit_mask)
# true or false values depending upon the results of the comparison

Serial No.
1       True
2       True
3       True
4       True
5      False
       ...  
396     True
397     True
398     True
399    False
400     True
Name: chance of admit, Length: 400, dtype: bool


In [6]:
print(df.where(admit_mask).head())
# prints all the rows where admit_mask is true

            gre score  toefl score  university rating  sop  lor  cgpa  \
Serial No.                                                              
1               337.0        118.0                4.0  4.5  4.5  9.65   
2               324.0        107.0                4.0  4.0  4.5  8.87   
3               316.0        104.0                3.0  3.0  3.5  8.00   
4               322.0        110.0                3.0  3.5  2.5  8.67   
5                 NaN          NaN                NaN  NaN  NaN   NaN   

            research  chance of admit  
Serial No.                             
1                1.0             0.92  
2                1.0             0.76  
3                1.0             0.72  
4                1.0             0.80  
5                NaN              NaN  


In [7]:
print(df.where(admit_mask).dropna().head())
# dropna removes the NaN rows

            gre score  toefl score  university rating  sop  lor  cgpa  \
Serial No.                                                              
1               337.0        118.0                4.0  4.5  4.5  9.65   
2               324.0        107.0                4.0  4.0  4.5  8.87   
3               316.0        104.0                3.0  3.0  3.5  8.00   
4               322.0        110.0                3.0  3.5  2.5  8.67   
6               330.0        115.0                5.0  4.5  3.0  9.34   

            research  chance of admit  
Serial No.                             
1                1.0             0.92  
2                1.0             0.76  
3                1.0             0.72  
4                1.0             0.80  
6                1.0             0.90  


In [8]:
print(df[df["chance of admit"] > 0.7].head())
# this does dropna and where at the same time

            gre score  toefl score  university rating  sop  lor  cgpa  \
Serial No.                                                              
1                 337          118                  4  4.5  4.5  9.65   
2                 324          107                  4  4.0  4.5  8.87   
3                 316          104                  3  3.0  3.5  8.00   
4                 322          110                  3  3.5  2.5  8.67   
6                 330          115                  5  4.5  3.0  9.34   

            research  chance of admit  
Serial No.                             
1                  1             0.92  
2                  1             0.76  
3                  1             0.72  
4                  1             0.80  
6                  1             0.90  


In [9]:
print(df["gre score"].head())

Serial No.
1    337
2    324
3    316
4    322
5    314
Name: gre score, dtype: int64


In [10]:
print(df[["gre score", "toefl score"]].head())
# a list of columns as strings

            gre score  toefl score
Serial No.                        
1                 337          118
2                 324          107
3                 316          104
4                 322          110
5                 314          103


In [11]:
print(df[df["gre score"] > 320].head())
# some options of what we can do with this manipulation

            gre score  toefl score  university rating  sop  lor  cgpa  \
Serial No.                                                              
1                 337          118                  4  4.5  4.5  9.65   
2                 324          107                  4  4.0  4.5  8.87   
4                 322          110                  3  3.5  2.5  8.67   
6                 330          115                  5  4.5  3.0  9.34   
7                 321          109                  3  3.0  4.0  8.20   

            research  chance of admit  
Serial No.                             
1                  1             0.92  
2                  1             0.76  
4                  1             0.80  
6                  1             0.90  
7                  1             0.75  


In [12]:
print(df["chance of admit"] > 0.7 and df["chance of admit"] < 0.9) 
# this does not work since python cannot compre two series using and/or

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [14]:
print((df["chance of admit"] > 0.7) & (df["chance of admit"] < 0.9))
# instead we must use the & and | operators to represent and/or, respectively 
# it only works with the parentheses separating each part of the expression

Serial No.
1      False
2       True
3       True
4       True
5      False
       ...  
396     True
397     True
398    False
399    False
400    False
Name: chance of admit, Length: 400, dtype: bool


In [16]:
print((df["chance of admit"].gt(0.7)) & (df["chance of admit"].lt(0.9)))
# gt represents greater than and lt represents less than

Serial No.
1      False
2       True
3       True
4       True
5      False
       ...  
396     True
397     True
398    False
399    False
400    False
Name: chance of admit, Length: 400, dtype: bool


In [17]:
print(df["chance of admit"].gt(0.7).lt(0.9))
# same thing but minimized and readable

Serial No.
1      False
2      False
3      False
4      False
5       True
       ...  
396    False
397    False
398    False
399     True
400    False
Name: chance of admit, Length: 400, dtype: bool
