# UC Berkeley EECS/CS Demographic Overview

This notebook contains a comprehensive analysis of EECS & L&S CS census data. All data is sourced from Cal Answers census data.

In [189]:
from utils import census
from tabulate import tabulate
import pandas as pd

In [190]:
data = census.load_census_df()

Columns: 
 • Academic Yr
 • Gender Desc (renamed to: Gender)
 • Major Short Nm (renamed to: Major)
 • Prorated Student Major Cnt Sum
 • Semester Year Letter Cd Concat
 • Semester Year Name Concat (renamed to: Semester/Year)
 • Short Ethnic Desc (renamed to: Ethnicity L3)
 • Student Headcount (renamed to: Headcount)
 • Student Headcount-DEC
 • Ucb Level2 Ethnic Rollup Desc (renamed to: Ethnicity L2)
 • Ungrad Grad Cd
 • Ungrad Grad Cd.1


In [191]:
data.head(2)

Unnamed: 0,Ethnicity L2,Ethnicity L3,Semester/Year,Gender,Headcount,Academic Yr,Major
0,African American,African American/Black,1983 Fall,Female,2,1983-84,Afr Amer Stds-Social Sci
1,African American,African American/Black,1983 Fall,Male,1,1983-84,Afr Amer Stds-Social Sci


In [192]:
filtered = data[data['Semester/Year'] == '2020 Fall']

In [193]:
eecs = filtered[filtered['Major'].isin(['Electrical Eng & Comp Sci'])]
cs = filtered[filtered['Major'].isin(['L&S Computer Science'])]
undeclared = filtered[filtered['Major'].isin(['Letters & Sci Undeclared'])]

In [194]:
def display(table):
    table = table.copy()
    for col in ['Gender', 'Ethnicity L2', 'Ethnicity L3']:
        if col in table.columns:
            table = table[table[col] != 'Decline to State']
    table['Ratio'] = table['Headcount'].map(lambda h : "{0:.2f}%".format(100 * (h / sum(table['Headcount']))))
    print(tabulate(table, showindex='never'))

# Ethnicity (Level 3)

In [195]:
print('Fall 2020 Census: EECS by Ethnicity (Level 3)')
display(eecs.groupby('Ethnicity L3').sum().sort_values('Headcount', ascending=False).reset_index())

Fall 2020 Census: EECS by Ethnicity (Level 3)
-----------------------------  ---  ------
Chinese                        411  26.46%
South Asian                    330  21.25%
International                  236  15.20%
White                          210  13.52%
Mexican American/Chicano       107  6.89%
Korean                          68  4.38%
Vietnamese                      51  3.28%
Other Hispanic/Latino           42  2.70%
African American/Black          31  2.00%
Filipino                        28  1.80%
Other Asian                     19  1.22%
Japanese                        15  0.97%
Pacific Islander                 3  0.19%
Native American/Alaska Native    2  0.13%
-----------------------------  ---  ------


In [196]:
print('Fall 2020 Census: L&S CS by Ethnicity (Level 3)')
display(cs.groupby('Ethnicity L3').sum().sort_values('Headcount', ascending=False).reset_index())

Fall 2020 Census: L&S CS by Ethnicity (Level 3)
-----------------------------  ---  ------
Chinese                        533  32.56%
International                  362  22.11%
South Asian                    310  18.94%
White                          195  11.91%
Korean                          66  4.03%
Vietnamese                      60  3.67%
Filipino                        26  1.59%
Mexican American/Chicano        24  1.47%
Other Asian                     22  1.34%
Other Hispanic/Latino           15  0.92%
Japanese                        12  0.73%
African American/Black           9  0.55%
Native American/Alaska Native    3  0.18%
-----------------------------  ---  ------


# Ethnicity (Level 2)

In [197]:
print('Fall 2020 Census: EECS by Ethnicity (Level 2)')
display(eecs.groupby('Ethnicity L2').sum().sort_values('Headcount', ascending=False).reset_index())

Fall 2020 Census: EECS by Ethnicity (Level 2)
-----------------------------  ---  ------
Asian                          922  59.37%
International                  236  15.20%
White                          210  13.52%
Chicano/Latino                 149  9.59%
African American                31  2.00%
Pacific Islander                 3  0.19%
Native American/Alaska Native    2  0.13%
-----------------------------  ---  ------


In [198]:
print('Fall 2020 Census: L&S CS by Ethnicity (Level 2)')
display(cs.groupby('Ethnicity L2').sum().sort_values('Headcount', ascending=False).reset_index())

Fall 2020 Census: L&S CS by Ethnicity (Level 2)
-----------------------------  ----  ------
Asian                          1029  62.86%
International                   362  22.11%
White                           195  11.91%
Chicano/Latino                   39  2.38%
African American                  9  0.55%
Native American/Alaska Native     3  0.18%
-----------------------------  ----  ------


# Gender

In [199]:
print('Fall 2020 Census: EECS by Gender')
display(eecs.groupby('Gender').sum().sort_values('Headcount', ascending=False).reset_index())

Fall 2020 Census: EECS by Gender
------  ----  ------
Male    1302  77.68%
Female   374  22.32%
------  ----  ------


In [200]:
print('Fall 2020 Census: L&S CS by Gender')
display(cs.groupby('Gender').sum().sort_values('Headcount', ascending=False).reset_index())

Fall 2020 Census: L&S CS by Gender
------  ----  ------
Male    1278  72.41%
Female   487  27.59%
------  ----  ------


# Gender & Ethnicity (Level 2) 

In [201]:
print('Fall 2020 Census: EECS by Gender')
display(eecs.groupby(['Gender', 'Ethnicity L2']).sum().reset_index()
        .sort_values(by=['Headcount'], ascending=False))

Fall 2020 Census: EECS by Gender
------  -----------------------------  ---  ------
Male    Asian                          707  45.67%
Female  Asian                          214  13.82%
Male    International                  196  12.66%
Male    White                          161  10.40%
Male    Chicano/Latino                 119  7.69%
Female  White                           46  2.97%
Female  International                   39  2.52%
Female  Chicano/Latino                  30  1.94%
Male    African American                18  1.16%
Female  African American                13  0.84%
Male    Pacific Islander                 3  0.19%
Male    Native American/Alaska Native    2  0.13%
------  -----------------------------  ---  ------


In [202]:
print('Fall 2020 Census: L&S CS by Gender')
display(cs.groupby(['Gender', 'Ethnicity L2']).sum().reset_index()
        .sort_values(by=['Headcount'], ascending=False))

Fall 2020 Census: L&S CS by Gender
------  -----------------------------  ---  ------
Male    Asian                          753  46.06%
Female  Asian                          276  16.88%
Male    International                  249  15.23%
Male    White                          156  9.54%
Female  International                  111  6.79%
Female  White                           39  2.39%
Male    Chicano/Latino                  30  1.83%
Female  Chicano/Latino                   9  0.55%
Female  African American                 5  0.31%
Male    African American                 4  0.24%
Male    Native American/Alaska Native    2  0.12%
Female  Native American/Alaska Native    1  0.06%
------  -----------------------------  ---  ------


# Gender & Ethnicity (Level 3) 

In [203]:
print('Fall 2020 Census: EECS by Gender & Ethnicity (Level 3)')
display(eecs.groupby(['Gender', 'Ethnicity L3']).sum().reset_index()
        .sort_values(by=['Headcount'], ascending=False))

Fall 2020 Census: EECS by Gender & Ethnicity (Level 3)
------  -----------------------------  ---  ------
Male    Chinese                        308  19.90%
Male    South Asian                    254  16.41%
Male    International                  196  12.66%
Male    White                          161  10.40%
Female  Chinese                        103  6.65%
Male    Mexican American/Chicano        87  5.62%
Female  South Asian                     75  4.84%
Male    Korean                          55  3.55%
Female  White                           46  2.97%
Male    Vietnamese                      39  2.52%
Female  International                   39  2.52%
Male    Other Hispanic/Latino           32  2.07%
Male    Filipino                        23  1.49%
Female  Mexican American/Chicano        20  1.29%
Male    African American/Black          18  1.16%
Male    Other Asian                     15  0.97%
Female  African American/Black          13  0.84%
Male    Japanese                        

In [204]:
print('Fall 2020 Census: CS by Gender & Ethnicity (Level 3)')
display(cs.groupby(['Gender', 'Ethnicity L3']).sum().reset_index()
        .sort_values(by=['Headcount'], ascending=False))

Fall 2020 Census: CS by Gender & Ethnicity (Level 3)
------  -----------------------------  ---  ------
Male    Chinese                        365  22.32%
Male    International                  249  15.23%
Male    South Asian                    248  15.17%
Female  Chinese                        168  10.28%
Male    White                          156  9.54%
Female  International                  111  6.79%
Female  South Asian                     62  3.79%
Male    Korean                          51  3.12%
Male    Vietnamese                      43  2.63%
Female  White                           39  2.39%
Male    Filipino                        21  1.28%
Male    Mexican American/Chicano        18  1.10%
Female  Vietnamese                      17  1.04%
Male    Other Asian                     17  1.04%
Female  Korean                          15  0.92%
Male    Other Hispanic/Latino           12  0.73%
Male    Japanese                         8  0.49%
Female  Mexican American/Chicano         6