# Gender Bias Study
Use `admissions.csv` for this exercise.

In [1]:
# Load and view first few lines of dataset
import pandas as pd
import numpy as np


In [67]:
df = pd.read_csv("admission.csv")

In [68]:
df.head()

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
1,56105,male,Physics,True
2,31441,female,Chemistry,False
3,51765,male,Physics,True
4,53714,female,Physics,True


In [69]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   student_id  500 non-null    int64 
 1   gender      500 non-null    object
 2   major       500 non-null    object
 3   admitted    500 non-null    bool  
dtypes: bool(1), int64(1), object(2)
memory usage: 12.3+ KB


In [70]:
df.describe()

Unnamed: 0,student_id
count,500.0
mean,41495.084
std,10065.292424
min,24348.0
25%,32429.75
50%,41460.5
75%,50687.75
max,58298.0


In [7]:
df.shape

(500, 4)

### Proportion and admission rate for each sex

In [14]:
# Proportion of students that are female & male
female_students = df[(df.gender=="female")].count()
female_students

student_id    257
gender        257
major         257
admitted      257
dtype: int64

In [15]:
male_students = df[(df.gender=="male")].count()
male_students

student_id    243
gender        243
major         243
admitted      243
dtype: int64

In [19]:
total_students = df["gender"].count()
total_students

500

In [23]:
prop_fem = (female_students / total_students) *100
print(str(prop_fem) + "%")

student_id    51.4
gender        51.4
major         51.4
admitted      51.4
dtype: float64%


In [24]:
prop_male = (male_students / total_students) *100
prop_male

student_id    48.6
gender        48.6
major         48.6
admitted      48.6
dtype: float64

#### About 51.4% of all students are female.
#### About 48.6% of all students are male.

In [38]:
# Admission rate for females & males
adm = df.groupby(['admitted', 'gender']).count()
adm

Unnamed: 0_level_0,Unnamed: 1_level_0,student_id,major
admitted,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
False,female,183,183
False,male,125,125
True,female,74,74
True,male,118,118


In [39]:
total_admit = df[(df.admitted==True)].count()
total_admit

student_id    192
gender        192
major         192
admitted      192
dtype: int64

In [132]:
t_a = df["admitted"].value_counts()
t_a

False    308
True     192
Name: admitted, dtype: int64

In [43]:
female_admit = (74 / total_admit)*100
female_admit

student_id    38.541667
gender        38.541667
major         38.541667
admitted      38.541667
dtype: float64

In [44]:
male_admit = (118 / total_admit)*100
male_admit

student_id    61.458333
gender        61.458333
major         61.458333
admitted      61.458333
dtype: float64

#### About 38.54% of all admitted students are female.
#### About 61.45% of all admitted students are male.

### Proportion and admission rate for physics majors of each sex

In [74]:
# What proportion of female and male students are majoring in physics?
physics_majors = df.groupby('major').count()
physics_majors

Unnamed: 0_level_0,student_id,gender,admitted
major,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chemistry,244,244,244
Physics,256,256,256


In [106]:
female_students = df[df["gender"]=="female"]
fem_majors = female_students["major"].value_counts(normalize=True)
fem_majors

Chemistry    0.879377
Physics      0.120623
Name: major, dtype: float64

In [104]:
male_students = df[df["gender"]=="male"]
male_majors = male_students["major"].value_counts(normalize=True)
male_majors

Physics      0.925926
Chemistry    0.074074
Name: major, dtype: float64

#### About 12.1% of all female students are Physics majors.
#### About 92.6% of all male students are Physics majors.

In [108]:
# Admission rate for female physics majors
female_students_phy = female_students[female_students["major"]=="Physics"]
female_students_phy

Unnamed: 0,student_id,gender,major,admitted
4,53714,female,Physics,True
47,54442,female,Physics,True
59,27446,female,Physics,True
66,29216,female,Physics,False
85,30213,female,Physics,False
93,30188,female,Physics,True
125,24463,female,Physics,True
144,32288,female,Physics,True
148,31506,female,Physics,False
150,25363,female,Physics,True


In [111]:
fem_adm_maj = female_students_phy["admitted"].value_counts(normalize=True)
fem_adm_maj

True     0.741935
False    0.258065
Name: admitted, dtype: float64

In [112]:
# Admission rate for male physics majors
male_students_phy = male_students[male_students["major"]=="Physics"]
male_students_phy

Unnamed: 0,student_id,gender,major,admitted
1,56105,male,Physics,True
3,51765,male,Physics,True
6,25946,male,Physics,True
8,55247,male,Physics,False
9,35838,male,Physics,True
...,...,...,...,...
489,56171,male,Physics,False
491,25641,male,Physics,False
495,26950,male,Physics,False
498,52067,male,Physics,True


In [113]:
male_adm_maj = male_students_phy["admitted"].value_counts(normalize=True)
male_adm_maj

True     0.515556
False    0.484444
Name: admitted, dtype: float64

#### 75% of females, 51.5% of males 

### Proportion and admission rate for chemistry majors of each sex

In [116]:
female_students_chem = female_students[female_students["major"]=="Chemistry"]
female_students_chem 

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
2,31441,female,Chemistry,False
5,50693,female,Chemistry,False
7,27648,female,Chemistry,True
11,53708,female,Chemistry,True
...,...,...,...,...
488,33713,female,Chemistry,True
492,26737,female,Chemistry,False
494,30192,female,Chemistry,True
496,25775,female,Chemistry,True


In [118]:
fem_adm_maj_chem = female_students_chem["admitted"].value_counts(normalize=True)
fem_adm_maj_chem

False    0.774336
True     0.225664
Name: admitted, dtype: float64

In [119]:
male_students_chem = male_students[male_students["major"]=="Chemistry"]
male_students_chem 

Unnamed: 0,student_id,gender,major,admitted
22,42508,male,Chemistry,False
84,35357,male,Chemistry,False
109,41460,male,Chemistry,False
129,47442,male,Chemistry,False
199,27315,male,Chemistry,False
224,30486,male,Chemistry,False
253,57881,male,Chemistry,False
258,29033,male,Chemistry,False
290,28846,male,Chemistry,False
314,39609,male,Chemistry,False


In [120]:
male_adm_maj_chem = male_students_chem["admitted"].value_counts(normalize=True)
male_adm_maj_chem

False    0.888889
True     0.111111
Name: admitted, dtype: float64

### 22.6% of females and 11.1% of males 

In [128]:
#print(f"{f_chem_students} female students are Chemistry majors")

# How many female students are majoring in chemistry?
fem_adm_maj_chem2 = female_students_chem["admitted"].value_counts()
fem_adm_maj_chem2

False    175
True      51
Name: admitted, dtype: int64

#### 51 female students are majoring in Chem 

In [178]:
# print(f"{m_chem_students} male students are Chemistry majors")

# How many male students are majoring in chemistry?
male_adm_maj_chem2 = male_students_chem["admitted"].value_counts()
male_adm_maj_chem2

False    16
True      2
Name: admitted, dtype: int64

#### 2 male students are majoring in Chem 

In [177]:
# Admission rate for female chemistry majors
fem_adm_maj_chem2 = female_students_chem["admitted"].value_counts(normalize=True)
fem_adm_maj_chem2

False    0.774336
True     0.225664
Name: admitted, dtype: float64

#### 22.6% admit rate for female chem majors 

In [176]:
# Admission rate for male chemistry majors
male_adm_maj_chem2 = male_students_chem["admitted"].value_counts(normalize=True)
male_adm_maj_chem2

False    0.888889
True     0.111111
Name: admitted, dtype: float64

#### 11.1% admit rate for male chem majors 

### Admission rate for each major

In [147]:
# Admission rate for physics majors
total_admit = df[df["admitted"]==True]
physics_admit = total_admit[(total_admit.major=="Physics")].count()
physics_admit

student_id    139
gender        139
major         139
admitted      139
dtype: int64

In [148]:
# Admission rate for chemistry majors
chem_admit = total_admit[(total_admit.major=="Chemistry")].count()
chem_admit

student_id    53
gender        53
major         53
admitted      53
dtype: int64

In [169]:
admit_major = total_admit.groupby("major")["admitted"].count()

In [170]:
admit_major 

major
Chemistry     53
Physics      139
Name: admitted, dtype: int64

In [174]:
admit_each_major =  admit_major / sum(admit_major)

In [175]:
print(admit_each_major)

major
Chemistry    0.276042
Physics      0.723958
Name: admitted, dtype: float64


## Chemistry has a 27.6% admit rate and Physics has a 72.4% admit rate