# Education Cost-Benefit
---


### Set Up

In [1]:
# Dependencies
import requests
import pandas as pd
import json

In [2]:
# Data file paths
bach_source = "Resources/payscaleBachelors.csv"
alum_source = "Resources/payscaleAlumni.csv"
assc_source = "Resources/payscaleAssoc.csv"

In [61]:
# Open source files
raw_bach = pd.read_csv(bach_source)
raw_alum = pd.read_csv(alum_source)
raw_assc = pd.read_csv(assc_source)

#raw_assc.columns

In [62]:
# Extract relevant info
base_bach = raw_bach[['School Name', 'School Type', 'Early Career Pay', 'Mid-Career Pay', '% STEM Degrees']]
base_alum = raw_alum[['School Name', 'Early Career Pay', 'Mid-Career Pay']]
base_assc = raw_assc[['School Name', 'School Sector', 'Early Career Pay', 'Mid-Career Pay', '% STEM Degrees']]


# Merge Alum and Bach data
comb_alum = pd.merge(base_bach, base_alum, on="School Name", how="left", suffixes=("", " BS Plus"))

# Fix merged school type null values
#comb_alum["School Type BS Only"] = comb_alum["School Type BS Only"].fillna(comb_alum["School Type BS Plus"])
# Fix merged % STEM null values
#comb_alum["% STEM Degrees BS Only"] = comb_alum["% STEM Degrees BS Only"].fillna(comb_alum["% STEM Degrees BS Plus"])

# Find difference between Alum and Bach data
comb_alum["EC Pay Growth"] = comb_alum["Early Career Pay BS Plus"] - comb_alum["Early Career Pay"]
comb_alum["EC Pay Pct"] = comb_alum["EC Pay Growth"] / comb_alum["Early Career Pay"]
comb_alum["MC Pay Growth"] = comb_alum["Mid-Career Pay BS Plus"] - comb_alum["Mid-Career Pay"]
comb_alum["MC Pay Pct"] = comb_alum["MC Pay Growth"] / comb_alum["Mid-Career Pay"]

#display(comb_alum)
comb_alum.isna().sum()

School Name                 0
School Type                 0
Early Career Pay            0
Mid-Career Pay              0
% STEM Degrees              1
Early Career Pay BS Plus    2
Mid-Career Pay BS Plus      2
EC Pay Growth               2
EC Pay Pct                  2
MC Pay Growth               2
MC Pay Pct                  2
dtype: int64

In [57]:
# Strip out School Type Data
#comb_alum["Private"] = comb_alum.loc[:, "School Type"].apply(lambda s:"Private" in s)
#comb_alum["State"] = comb_alum.loc[:, "School Type"].apply(lambda s:"State" in s)
#comb_alum["Ivy League"] = comb_alum.loc[:, "School Type"].apply(lambda s:"Ivy League" in s)
#comb_alum["Sports"] = comb_alum.loc[:, "School Type"].apply(lambda s:"Sports" in s)
#comb_alum["Religious"] = comb_alum.loc[:, "School Type"].apply(lambda s:"Religious" in s)
#comb_alum["Research"] = comb_alum.loc[:, "School Type"].apply(lambda s:"Research" in s)
#comb_alum["Engineering"] = comb_alum.loc[:, "School Type"].apply(lambda s:"Engineering" in s)


In [65]:
# Create slices of data based on School Types
privt_data = comb_alum[comb_alum["School Type"].str.contains("Private") == True]
state_data = comb_alum[comb_alum["School Type"].str.contains("State") == True]
ivylg_data = comb_alum[comb_alum["School Type"].str.contains("Ivy League") == True]
sport_data = comb_alum[comb_alum["School Type"].str.contains("Sport") == True]
relig_data = comb_alum[comb_alum["School Type"].str.contains("Religious") == True]
rsrch_data = comb_alum[comb_alum["School Type"].str.contains("Research") == True]
engnr_data = comb_alum[comb_alum["School Type"].str.contains("Engineering") == True]

In [71]:
display(ivylg_data)

Unnamed: 0,School Name,School Type,Early Career Pay,Mid-Career Pay,% STEM Degrees,Early Career Pay BS Plus,Mid-Career Pay BS Plus,EC Pay Growth,EC Pay Pct,MC Pay Growth,MC Pay Pct
1,Princeton University,"Ivy League, Private School, Research Universit...",69800,147800,0.47,71000.0,151900.0,1200.0,0.017192,4100.0,0.02774
8,Harvard University,"Ivy League, Private School, Research Universit...",69200,140700,0.19,71200.0,147500.0,2000.0,0.028902,6800.0,0.04833
10,Dartmouth College,"Ivy League, Private School, Research Universit...",66300,140300,0.32,68300.0,150800.0,2000.0,0.030166,10500.0,0.07484
19,University of Pennsylvania,"Ivy League, Private School, Research Universit...",68100,134800,0.22,70500.0,143200.0,2400.0,0.035242,8400.0,0.062315
24,Yale University,"Ivy League, Private School, Research Universit...",66800,132100,0.23,70300.0,143300.0,3500.0,0.052395,11200.0,0.084784
25,Brown University,"Ivy League, Private School, Research Universit...",63000,132000,0.39,64600.0,135400.0,1600.0,0.025397,3400.0,0.025758
43,Columbia University in the City of New York,"Ivy League, Private School, Research Universit...",66000,124700,0.3,67500.0,130100.0,1500.0,0.022727,5400.0,0.043304
48,Cornell University,"Ivy League, Private School, Research Universit...",65600,123900,0.43,67700.0,129600.0,2100.0,0.032012,5700.0,0.046005


---
### Level of Education

In [None]:
# How do best Associates compare to best Bachelors

# How do best Associates compare to bachelors means

# How do best Associates compare to worst Bachelors

---
### School Type Factors

In [None]:
# Identify schools that are Ivy League vs not

# Find the mean early, mid-career salaries and potential

