In [2]:
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format
from itables import show


## Import pre-cleaned data and find most viable "EARN" variables

In [3]:
df = pd.read_csv('Data/clean_field_of_study.csv', dtype={'CIPFIELD': str, 'CIPCODE': str})

# Look at which "EARN" variables have the most datapoints
earn_col_counts = {}
for col in df.columns:
    if "EARN" in col:
        earn_col_counts[col] = len(df[col].unique()) -1 # subtract 1 to account for NaN

# sort dictionary by value to see which variables have the most data
earn_col_counts = {k: v for k, v in sorted(earn_col_counts.items(), key=lambda item: item[1], reverse=True)}
list(earn_col_counts.items())[:5] # Display first 5 items

[('EARN_MDN_HI_1YR', 264),
 ('EARN_MDN_1YR', 254),
 ('EARN_MDN_4YR', 244),
 ('EARN_PELL_WNE_MDN_1YR', 226),
 ('EARN_NE_MDN_3YR', 219)]

## Filter data down to the two key variables discovered above
* EARN_MDN_1YR: 'Median earnings of graduates working and not enrolled 1 year after completing'   
* EARN_MDN_4YR: 'Median earnings of graduates working and not enrolled 4 years after completing'

In [4]:
# We decide to us EARN_MDN_1YR and EARN_MDN_4YR
df = df[df.columns.to_list()[:7] + ['EARN_MDN_1YR', 'EARN_MDN_4YR']] 
df.to_csv('Data/ouchie.csv', index=False)

# Get counts of nans
both_nan_count = df[(df['EARN_MDN_1YR'].isna()) & (df['EARN_MDN_4YR'].isna())].shape[0]
either_nan_count = df[(df['EARN_MDN_1YR'].isna()) | (df['EARN_MDN_4YR'].isna())].shape[0] - both_nan_count
print(f'Total Row Count: {len(df)}\nBoth NaN: {both_nan_count}\nEither NaN: {either_nan_count}')

# Drop rows where both EARN vars are nan (for easier viewing)
df.dropna(inplace=True)
# df = df.dropna(subset=['EARN_MDN_1YR', 'EARN_MDN_4YR'], how='all')
show(df)

Total Row Count: 554
Both NaN: 262
Either NaN: 85


Unnamed: 0,INSTNM,OPEID6,CONTROL,MAIN,CIPDEF,CIPFIELD,CIPCODE,EARN_MDN_1YR,EARN_MDN_4YR
Loading... (need help?),,,,,,,,,


## Look at average earnings by University, by Field, and by Field taken at a given University

In [5]:
# View average median earnings for entire universities (mean of all programs with public record)
univ_group_mean = df.groupby('INSTNM').agg({'EARN_MDN_1YR': 'mean', 'EARN_MDN_4YR': 'mean'}).reset_index()
univ_group_mean = univ_group_mean.sort_values(by='EARN_MDN_4YR', ascending=False)
univ_group_mean

Unnamed: 0,INSTNM,EARN_MDN_1YR,EARN_MDN_4YR
0,Brigham Young University,47174.15,63373.25
2,University of Utah,47227.86,61987.43
6,Westminster College,46325.4,60817.4
4,Utah Valley University,44682.09,57600.16
5,Weber State University,46260.09,55969.97
3,Utah State University,39905.87,52193.71
1,Southern Utah University,35300.25,44503.17


In [6]:
# View average median earnings for each field of study (mean of all programs @all universities with public record)
field_group_mean = df.groupby(['CIPDEF']).agg({'EARN_MDN_1YR': 'mean', 'EARN_MDN_4YR': 'mean'}).reset_index()
field_group_mean = field_group_mean.sort_values(by='EARN_MDN_4YR', ascending=False)
field_group_mean

Unnamed: 0,CIPDEF,EARN_MDN_1YR,EARN_MDN_4YR
7,COMPUTER AND INFORMATION SCIENCES AND SUPPORT ...,69317.67,88258.0
10,ENGINEERING.,69412.13,87750.6
19,MATHEMATICS AND STATISTICS.,57342.5,79298.0
8,CONSTRUCTION TRADES.,68886.0,78807.0
11,ENGINEERING/ENGINEERING-RELATED TECHNOLOGIES/T...,65609.0,78380.5
6,COMMUNICATIONS TECHNOLOGIES/TECHNICIANS AND SU...,48703.0,74650.0
4,"BUSINESS, MANAGEMENT, MARKETING, AND RELATED S...",55107.69,72721.94
26,TRANSPORTATION AND MATERIALS MOVING.,50560.5,70107.5
22,PHYSICAL SCIENCES.,50117.33,69189.33
1,ARCHITECTURE AND RELATED SERVICES.,44067.5,62267.0


In [9]:
# View average median earnings for each field of study for each university
univ_group = df.groupby(['INSTNM', 'CIPDEF']).agg({'EARN_MDN_1YR': 'mean', 'EARN_MDN_4YR': 'mean'}).reset_index()
show(univ_group)
pivot_1yr = univ_group.pivot(index='INSTNM', columns='CIPDEF', values='EARN_MDN_1YR')
pivot_4yr = univ_group.pivot(index='INSTNM', columns='CIPDEF', values='EARN_MDN_4YR')
show(pivot_1yr, caption="1 Year Median Earnings by Field of Study")
show(pivot_4yr, caption="4 Year Median Earnings by Field of Study")

INSTNM,CIPDEF,EARN_MDN_1YR,EARN_MDN_4YR
Loading... (need help?),,,


CIPDEF,AGRICULTURAL/ANIMAL/PLANT/VETERINARY SCIENCE AND RELATED FIELDS.,ARCHITECTURE AND RELATED SERVICES.,"AREA, ETHNIC, CULTURAL, GENDER, AND GROUP STUDIES.",BIOLOGICAL AND BIOMEDICAL SCIENCES.,"BUSINESS, MANAGEMENT, MARKETING, AND RELATED SUPPORT SERVICES.","COMMUNICATION, JOURNALISM, AND RELATED PROGRAMS.",COMMUNICATIONS TECHNOLOGIES/TECHNICIANS AND SUPPORT SERVICES.,COMPUTER AND INFORMATION SCIENCES AND SUPPORT SERVICES.,CONSTRUCTION TRADES.,EDUCATION.,ENGINEERING.,ENGINEERING/ENGINEERING-RELATED TECHNOLOGIES/TECHNICIANS.,ENGLISH LANGUAGE AND LITERATURE/LETTERS.,FAMILY AND CONSUMER SCIENCES/HUMAN SCIENCES.,"FOREIGN LANGUAGES, LITERATURES, AND LINGUISTICS.",HEALTH PROFESSIONS AND RELATED PROGRAMS.,HISTORY.,"HOMELAND SECURITY, LAW ENFORCEMENT, FIREFIGHTING AND RELATED PROTECTIVE SERVICES.","LIBERAL ARTS AND SCIENCES, GENERAL STUDIES AND HUMANITIES.",MATHEMATICS AND STATISTICS.,MULTI/INTERDISCIPLINARY STUDIES.,"PARKS, RECREATION, LEISURE, FITNESS, AND KINESIOLOGY.",PHYSICAL SCIENCES.,PSYCHOLOGY.,PUBLIC ADMINISTRATION AND SOCIAL SERVICE PROFESSIONS.,SOCIAL SCIENCES.,TRANSPORTATION AND MATERIALS MOVING.,VISUAL AND PERFORMING ARTS.
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Loading... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,


CIPDEF,AGRICULTURAL/ANIMAL/PLANT/VETERINARY SCIENCE AND RELATED FIELDS.,ARCHITECTURE AND RELATED SERVICES.,"AREA, ETHNIC, CULTURAL, GENDER, AND GROUP STUDIES.",BIOLOGICAL AND BIOMEDICAL SCIENCES.,"BUSINESS, MANAGEMENT, MARKETING, AND RELATED SUPPORT SERVICES.","COMMUNICATION, JOURNALISM, AND RELATED PROGRAMS.",COMMUNICATIONS TECHNOLOGIES/TECHNICIANS AND SUPPORT SERVICES.,COMPUTER AND INFORMATION SCIENCES AND SUPPORT SERVICES.,CONSTRUCTION TRADES.,EDUCATION.,ENGINEERING.,ENGINEERING/ENGINEERING-RELATED TECHNOLOGIES/TECHNICIANS.,ENGLISH LANGUAGE AND LITERATURE/LETTERS.,FAMILY AND CONSUMER SCIENCES/HUMAN SCIENCES.,"FOREIGN LANGUAGES, LITERATURES, AND LINGUISTICS.",HEALTH PROFESSIONS AND RELATED PROGRAMS.,HISTORY.,"HOMELAND SECURITY, LAW ENFORCEMENT, FIREFIGHTING AND RELATED PROTECTIVE SERVICES.","LIBERAL ARTS AND SCIENCES, GENERAL STUDIES AND HUMANITIES.",MATHEMATICS AND STATISTICS.,MULTI/INTERDISCIPLINARY STUDIES.,"PARKS, RECREATION, LEISURE, FITNESS, AND KINESIOLOGY.",PHYSICAL SCIENCES.,PSYCHOLOGY.,PUBLIC ADMINISTRATION AND SOCIAL SERVICE PROFESSIONS.,SOCIAL SCIENCES.,TRANSPORTATION AND MATERIALS MOVING.,VISUAL AND PERFORMING ARTS.
INSTNM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Loading... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,
