# Admission Case Study for Simpson's Paradox

# Load Data

In [36]:
import pandas as pd
import numpy as np

In [37]:
data = pd.read_csv("admission.csv")

In [38]:
data.head()

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
1,56105,male,Physics,True
2,31441,female,Chemistry,False
3,51765,male,Physics,True
4,53714,female,Physics,True


# Explore initial info of data

In [39]:
data.shape

(500, 4)

In [40]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   student_id  500 non-null    int64 
 1   gender      500 non-null    object
 2   major       500 non-null    object
 3   admitted    500 non-null    bool  
dtypes: bool(1), int64(1), object(2)
memory usage: 12.3+ KB


# Data Manipulation

## 1.Proportion and admission rate for each gender

In [6]:
data.head()

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
1,56105,male,Physics,True
2,31441,female,Chemistry,False
3,51765,male,Physics,True
4,53714,female,Physics,True


In [42]:
# Proportion of students that are female
female_df = data[data["gender"] == "female"]
print("Female Student Proportion: ", female_df.shape[0] / data.shape[0] * 100)

Female Student Proportion:  51.4


In [43]:
# Proportion of students that are male
male_df = data[data["gender"] == "male"]
print("Male Student Proportion: ", male_df.shape[0] / data.shape[0] *100)

Male Student Proportion:  48.6


In [45]:
# Admission rate for females
admitted_female = female_df[female_df["admitted"] == True]
print("Admission rate for Females: ", admitted_female.shape[0] / female_df.shape[0] * 100)

Admission rate for Females:  28.793774319066145


In [46]:
# Admission rate for males
admitted_male = male_df[male_df["admitted"] == True]
print("Admission rate for Males: ", admitted_male.shape[0] / male_df.shape[0] * 100)

Admission rate for Males:  48.559670781893004


## 2.Proportion and admission rate for physics majors of each gender

In [47]:
data.head()

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
1,56105,male,Physics,True
2,31441,female,Chemistry,False
3,51765,male,Physics,True
4,53714,female,Physics,True


In [72]:
# What proportion of female students are majoring in physics?
female_phy_rate = data.query('gender == "female" and major == "Physics"').count() / female_df.count()
print("Proportion of Female Students majoring in Physics: ", female_phy_rate[0] * 100)

Proportion of Female Students majoring in Physics:  12.062256809338521


In [73]:
# What proportion of male students are majoring in physics?
male_phy_rate = data.query('gender == "male" and major == "Physics"').count() / male_df.count()
print("Proportion of Male Students majoring in Physics: ", male_phy_rate[0] * 100)

Proportion of Male Students majoring in Physics:  92.5925925925926


In [96]:
# Admission rate for female physics majors
female_phy_df = data[(data["gender"] == "female") & (data["major"] == "Physics")]
print("Admission rate for female physic majors: ", female_phy_df["admitted"].mean())

Admission rate for female physic majors:  0.7419354838709677


In [97]:
# Admission rate for male physics majors
male_phy_df = data[(data["gender"] == "male") & (data["major"] == "Physics")]
print("Admission rate for Male physic majors: ", male_phy_df["admitted"].mean())

Admission rate for Male physic majors:  0.5155555555555555


## 3.Proportion and admission rate for chemistry majors of each gender

In [90]:
# What proportion of female students are majoring in chemistry?
female_chem_df = data.query('gender == "female" and major == "Chemistry"')
female_chem_proportion = female_chem_df.shape[0] / female_df.shape[0]
print("Proportion of female students majoring in chemistry: ", female_chem_proportion)

Proportion of female students majoring in chemistry:  0.8793774319066148


In [91]:
# What proportion of male students are majoring in chemistry?
male_chem_df = data.query('gender == "male" and major == "Chemistry"')
male_chem_proportion = male_chem_df.shape[0] / male_df.shape[0]
print("Proportion of male students majoring in chemistry: ", male_chem_proportion)

Proportion of male students majoring in chemistry:  0.07407407407407407


In [94]:
# Admission rate for female chemistry majors
female_chem_admission_rate = female_chem_df["admitted"].mean()
print("Admission rate for female chemistry majors: ", female_chem_admission_rate)

Admission rate for female chemistry majors:  0.22566371681415928


In [99]:
# Admission rate for male chemistry majors
male_chem_admission_rate = male_chem_df["admitted"].mean()
print("Admission rate for male chemistry majors: ", male_chem_admission_rate)

Admission rate for male chemistry majors:  0.1111111111111111


## 4.Admission rate for each major


In [101]:
# Admission rate for physics majors
phy_admission_rate = data.query('major == "Physics"')["admitted"].mean()
print("Admission rate for physics majors: ", phy_admission_rate)

Admission rate for physics majors:  0.54296875


In [102]:
# Admission rate for chemistry majors
chem_admission_rate = data.query('major == "Chemistry"')["admitted"].mean()
print("Admission rate for Chemistry majors: ", chem_admission_rate)

Admission rate for Chemistry majors:  0.21721311475409835
