In [55]:
import pandas as pd
import numpy as np

In [56]:
#read in employment files
years = list(range(2013, 2023))
employmentTables = []
for year in years:
    fileName = ["Employment Summary_", str(year), ".xlsx"]
    fileName = "".join(fileName)
    file = pd.read_excel(fileName)
    employmentTables.append(file)

In [57]:
#add year column to employment files
year = 2012
for i in range(0,10):
    employmentTables[i]["Year"] = year
    year += 1

In [58]:
#cleaning column names for consistency
allYears = pd.concat(employmentTables[0:10])
allYears.columns = allYears.columns.str.replace("FullTimeLongTerm", "FTLT")
allYears.columns = allYears.columns.str.replace("PartTimeLongTerm", "PTLT")
allYears.columns = allYears.columns.str.replace("FullTimeShortTerm", "FTST")
allYears.columns = allYears.columns.str.replace("PartTimeShortTerm", "PTST")

In [59]:
#pivoting from wide to long

#full time long term df
FTLTCols = [ colNm for colNm in (allYears.columns) if "PT" not in colNm and "ST" not in colNm]
FullTimeLT = allYears.loc[:, allYears.columns.isin(FTLTCols)].copy()
FullTimeLT.insert(loc=1, column='term', value="longterm")
FullTimeLT.insert(loc=1, column='time', value="fulltime")
FullTimeLT.columns = FullTimeLT.columns.str.replace("FTLT", "")

#full time short term df
FTSTCols = [ colNm for colNm in (allYears.columns) if "PT" not in colNm and "LT" not in colNm]
FullTimeST = allYears.loc[:, allYears.columns.isin(FTSTCols)].copy()
FullTimeST.insert(loc=1, column='term', value="shortterm")
FullTimeST.insert(loc=1, column='time', value="fulltime")
FullTimeST.columns = FullTimeST.columns.str.replace("FTST", "")

#part time long term df
PTLTCols = [ colNm for colNm in (allYears.columns) if "FT" not in colNm and "ST" not in colNm]
PartTimeLT = allYears.loc[:, allYears.columns.isin(PTLTCols)].copy()
PartTimeLT.insert(loc=1, column='term', value="longterm")
PartTimeLT.insert(loc=1, column='time', value="parttime")
PartTimeLT.columns = PartTimeLT.columns.str.replace("PTLT", "")

#part time short term df
PTSTCols = [ colNm for colNm in (allYears.columns) if "FT" not in colNm and "LT" not in colNm]
PartTimeST = allYears.loc[:, allYears.columns.isin(PTSTCols)].copy()
PartTimeST.insert(loc=1, column='term', value="shortterm")
PartTimeST.insert(loc=1, column='time', value="parttime")
PartTimeST.columns = PartTimeST.columns.str.replace("PTST", "")

#gradschool
gradCols = [colNm for colNm in (allYears.columns) if "FT" not in colNm and "ST" not in colNm and "PT" not in colNm and "LT" not in colNm]
grad = allYears.loc[:, allYears.columns.isin(gradCols)].copy()
grad.insert(loc=1, column='term', value="gradSchool")
grad.insert(loc=1, column='time', value="gradSchool")
grad["EnrolledInGraduateStudies"] = grad['EnrolledInGraduateStudiesNumber'].add(grad['PursuingGraduateDegreeNumber'], fill_value = 0)

#unemployed
unemployedCols = [colNm for colNm in (allYears.columns) if "FT" not in colNm and "ST" not in colNm and "PT" not in colNm and "LT" not in colNm]
unemployed = allYears.loc[:, allYears.columns.isin(unemployedCols)].copy()
unemployed.insert(loc=1, column='term', value="unemployed")
unemployed.insert(loc=1, column='time', value="unemployed")
unemployed["Unemployed"] = unemployed['UnEmployedNotSeekingNumber'].add(unemployed['UnEmployedSeekingNumber'], fill_value = 0).add(unemployed['UnEmployedStartDateDeferredNumber'], fill_value = 0)

#unknows
unknownCols = [colNm for colNm in (allYears.columns) if "FT" not in colNm and "ST" not in colNm and "PT" not in colNm and "LT" not in colNm]
unknown = allYears.loc[:, allYears.columns.isin(unknownCols)].copy()
unknown.insert(loc=1, column='term', value="unknown")
unknown.insert(loc=1, column='time', value="unknown")
unknown.columns = unknown.columns.str.replace("EmploymentStatusUnknownNumber", "status_Unknown")

#append dfs
allYears = pd.concat([PartTimeST, PartTimeLT, FullTimeLT, FullTimeST, grad, unemployed, unknown])



In [60]:
#remove irrelevant columns

#remove summary statistic columns
summaryColumns = [colNm for colNm in (allYears.columns) if "Number"  in colNm]
summaryColumns
allYears = allYears.drop(summaryColumns, axis = 1)

#remove addition summary statistic columns
allYears = allYears.drop(['1-10', '101-250', '11-25', '2-10', '251-500', '26-50', '501-PLUS',  '51-100', 'Solo'], axis = 1)
allYears = allYears.drop(['PublicInterest', 'BusinessIndustry', 'Unknown', 'Government', 'PublicInterest',
                         'Clerkships_Federal', 'Clerkships_StateLocal', 'Clerkships_Other',
                        'EmployerTypeUnknown', 'Total', 'Clerkships_Tribal', 'Education',
                         'Clerkships_International'], axis = 1)

#remove state statistics
allYears = allYears.drop(['FirstLargestEmployment',
       'SecondLargestEmployment', 'ThirdLargestEmployment',
       'EmployedInForeignCountries'], axis = 1)

#remove university funded breakdown columns
allYears = allYears.drop(['Funded_BarPassge', 'Funded_BarPassgeTotalEmployed', 'Funded_JDAdvantage', 
                          'Funded_JDAdvantageTotalEmployed', 'Funded_Profession', 'Funded_ProfessionTotalEmployed',
                         'Funded_TotTotalEmployed', 'Funded_TotTotalEmployed', 
                          'Funded_NONProfessional', 'Funded_NONProfessionalTotalEmployed', 
                          'Funded_OtherPosition', 'Funded_OtherPosition',
                          'Funded_OtherPositionTotalEmployed', 'Funded_Tot'], axis =1)


In [61]:
#merge columns with name/category changes

#solo vs 2-10 law firms
allYears['1-10-'] = allYears['1-10-'].add(allYears['2-10-'], fill_value = 0).add(allYears['Solo-'], fill_value =0)
allYears = allYears.drop(['2-10-', 'Solo-'], axis =1)

#clerkships other broken out to intl and tribal in 2021, merge
allYears["Clerkships_Other_"] = allYears["Clerkships_Other_"].add(allYears["Clerkships_Tribal_"],fill_value =0).add(allYears["Clerkships_International_"], fill_value = 0)
allYears = allYears.drop(["Clerkships_Tribal_", "Clerkships_International_", "Employed_LawSchool"], axis =1)

#other nameing convention change
allYears["Employed_OtherPosition"] = allYears["Employed_NonProfessionPosition"].add(allYears["Employed_OtherPosition"], fill_value = 0)
allYears = allYears.drop(["Employed_NonProfessionPosition"], axis =1)
allYears.columns

Index(['SchoolName', 'time', 'term', 'Employed_BarPassageRequired',
       'Employed_JDAdvantagePTLST', 'Employed_ProfessionPosition',
       'Employed_Undeterminable', '11-25-', '26-50-', '51-100-', '101-250-',
       '251-500-', '501-', 'Unknown-', 'BusinessIndustry_', 'Government_',
       'PublicInterest_', 'Clerkships_Federal_', 'Clerkships_StateLocal_',
       'Clerkships_Other_', 'Education_', 'EmployerTypeUnknown_', 'Total_',
       'Year', '1-10-', 'Employed_OtherPosition', 'Employed_JDAdvantage',
       'EnrolledInGraduateStudies', 'Unemployed', 'status_Unknown'],
      dtype='object')

In [62]:
#rearrang ecolumns
allYears = allYears.reindex(columns= ['SchoolName', 'Year','time', 'term', 'Employed_BarPassageRequired',
       'Employed_JDAdvantage', 'Employed_ProfessionPosition','Employed_OtherPosition', 'Employed_by_LawSchool',
       'Employed_Undeterminable', 'EnrolledInGraduateStudies','Unemployed', 'status_Unknown','1-10-', '11-25-', '26-50-', '51-100-', '101-250-',
       '251-500-', '501-', 'BusinessIndustry_', 'Government_',
       'PublicInterest_', 'Clerkships_Federal_', 'Clerkships_StateLocal_',
       'Clerkships_Other_', 'Education_', 'EmployerTypeUnknown_', 'Total_'])
allYears["status_Unknown"]

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
191    0.0
192    2.0
193    0.0
194    1.0
195    1.0
Name: status_Unknown, Length: 14098, dtype: float64

In [63]:
#split out just employement data
employment = allYears[['SchoolName', 'Year','time', 'term', 'Employed_BarPassageRequired',
       'Employed_JDAdvantage', 'Employed_ProfessionPosition','Employed_OtherPosition', 
       'Employed_by_LawSchool','Employed_Undeterminable', 
       'EnrolledInGraduateStudies','Unemployed', 'status_Unknown']].copy()

employment

Unnamed: 0,SchoolName,Year,time,term,Employed_BarPassageRequired,Employed_JDAdvantage,Employed_ProfessionPosition,Employed_OtherPosition,Employed_by_LawSchool,Employed_Undeterminable,EnrolledInGraduateStudies,Unemployed,status_Unknown
0,"AKRON, UNIVERSITY OF",2012,parttime,shortterm,2.0,,0.0,0.0,,0.0,,,
1,"ALABAMA, UNIVERSITY OF",2012,parttime,shortterm,0.0,,0.0,0.0,,0.0,,,
2,ALBANY LAW SCHOOL OF UNION UNIVERSITY,2012,parttime,shortterm,8.0,,2.0,4.0,,1.0,,,
3,AMERICAN UNIVERSITY,2012,parttime,shortterm,9.0,,7.0,3.0,,3.0,,,
4,APPALACHIAN SCHOOL OF LAW,2012,parttime,shortterm,2.0,,0.0,1.0,,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
191,WILLAMETTE UNIVERSITY,2021,unknown,unknown,,,,,,,,,0.0
192,WILLIAM AND MARY LAW SCHOOL,2021,unknown,unknown,,,,,,,,,2.0
193,"WISCONSIN, UNIVERSITY OF",2021,unknown,unknown,,,,,,,,,0.0
194,"WYOMING, UNIVERSITY OF",2021,unknown,unknown,,,,,,,,,1.0


In [64]:
#wide to long
employment = pd.melt(employment, 
                id_vars=['SchoolName', 'Year', 'time', 'term'], 
                value_vars=['Employed_BarPassageRequired',
                               'Employed_JDAdvantage', 'Employed_ProfessionPosition',
                               'Employed_OtherPosition', 'Employed_by_LawSchool',
                               'Employed_Undeterminable', 'EnrolledInGraduateStudies', 
                               'Unemployed', "status_Unknown"])
employment = employment.rename(columns = {'variable': 'employment_type', 'value': 'number_grads'})
employment

Unnamed: 0,SchoolName,Year,time,term,employment_type,number_grads
0,"AKRON, UNIVERSITY OF",2012,parttime,shortterm,Employed_BarPassageRequired,2.0
1,"ALABAMA, UNIVERSITY OF",2012,parttime,shortterm,Employed_BarPassageRequired,0.0
2,ALBANY LAW SCHOOL OF UNION UNIVERSITY,2012,parttime,shortterm,Employed_BarPassageRequired,8.0
3,AMERICAN UNIVERSITY,2012,parttime,shortterm,Employed_BarPassageRequired,9.0
4,APPALACHIAN SCHOOL OF LAW,2012,parttime,shortterm,Employed_BarPassageRequired,2.0
...,...,...,...,...,...,...
126877,WILLAMETTE UNIVERSITY,2021,unknown,unknown,status_Unknown,0.0
126878,WILLIAM AND MARY LAW SCHOOL,2021,unknown,unknown,status_Unknown,2.0
126879,"WISCONSIN, UNIVERSITY OF",2021,unknown,unknown,status_Unknown,0.0
126880,"WYOMING, UNIVERSITY OF",2021,unknown,unknown,status_Unknown,1.0


In [65]:
#employment.to_csv("/Users/natdeacon/Desktop/GitHub/Law_School_Outcomes_Economics/employmentClean.csv")

In [76]:
#employment.groupby(['SchoolName', 'Year', 'employment_type']).agg({"number_grads":"sum"})
#employment[employment['term' == 'shortterm']]
employment[(employment["employment_type"] == 'EnrolledInGraduateStudies') & 
           (employment["time"] == 'gradSchool') &
          (employment["SchoolName"] == 'AKRON, UNIVERSITY OF')]

Unnamed: 0,SchoolName,Year,time,term,employment_type,number_grads
92644,"AKRON, UNIVERSITY OF",2012,gradSchool,gradSchool,EnrolledInGraduateStudies,3.0
92847,"AKRON, UNIVERSITY OF",2013,gradSchool,gradSchool,EnrolledInGraduateStudies,1.0
93050,"AKRON, UNIVERSITY OF",2014,gradSchool,gradSchool,EnrolledInGraduateStudies,1.0
93254,"AKRON, UNIVERSITY OF",2015,gradSchool,gradSchool,EnrolledInGraduateStudies,4.0
93458,"AKRON, UNIVERSITY OF",2016,gradSchool,gradSchool,EnrolledInGraduateStudies,1.0
93661,"AKRON, UNIVERSITY OF",2017,gradSchool,gradSchool,EnrolledInGraduateStudies,1.0
93864,"AKRON, UNIVERSITY OF",2018,gradSchool,gradSchool,EnrolledInGraduateStudies,1.0
94067,"AKRON, UNIVERSITY OF",2019,gradSchool,gradSchool,EnrolledInGraduateStudies,0.0
94265,"AKRON, UNIVERSITY OF",2020,gradSchool,gradSchool,EnrolledInGraduateStudies,0.0
94462,"AKRON, UNIVERSITY OF",2021,gradSchool,gradSchool,EnrolledInGraduateStudies,1.0
