In [14]:
import pandas as pd
import numpy as np
import re
import requests
from bs4 import BeautifulSoup

# Scraping

1. [UCHICAGO-2024-25](#uchicago-2024-25)
2. [UCB-ECON-2024-2025](#ucb-econ-2024-2025)
3. [UCB-Econ-23-24-PHD](#UCB-Econ-23-24-PHD)
4. [UCB-ARE](#UCB-ARE)
5. [HASS](#HASS)
6. [UCHICAGO-BS](#UCHICAGO-BS)
7. [UCHICAGO-Public-Policy](#UCHICAGO-Public-Policy)


In [15]:
ucichagowebsite = "https://economics.uchicago.edu/people/job-market-candidates"
response = requests.get(ucichagowebsite)
soup = BeautifulSoup(response.text, 'html.parser')

### UCHICAGO-2024-25

In [16]:
chicago_candidate_23_24 = pd.read_csv('data/UChicago_ECON_jm_candidates.csv')

chicago_candidate_23_24

Unnamed: 0.1,Unnamed: 0,Name,Primary Research Focus,Secondary Research Focus,References,Email,Website,Website Link,Job Market Paper,Job Market Paper Link,School,Year,Department
0,0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
1,1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics
2,2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
3,3,Zhiyu Fu,"Primary Research Focus: International Finance,...",Secondary Research Focus: Macro Finance,"References: Greg Kaplan (Co-chair), Ralph Koij...",Email: zhiyufu@uchicago.edu,Fu Personal Website,https://fuzhiyu.me,Capital Flows and the Making of Risky Currencies,https://fuzhiyu.me/assets/pdf/capital_flow_and...,UChicago,2023-2024,Economics
4,4,Michael Galperin,"Primary Research Focus: Labor Economics, Publi...",Secondary Research Focus: Economics of Education,"References: Michael Greenstone, Magne Mogstad,...",Email: mgalperin@uchicago.edu,Galperin Personal Website,https://michaelgalperin.github.io,"Targeting, Selection, and the Impact of Grant ...",https://michaelgalperin.github.io/files/jmp.pdf,UChicago,2023-2024,Economics
5,5,Esperanza Johnson,Primary Research Focus: Industrial Organization,Secondary Research Focus: Economics of Education,"References: Ali Hortacsu (Chair), Michael Dine...",Email: esperanza@uchicago.edu,Johnson Personal Website,https://www.esperanzajohnson.com/,Supply Responses to Targeted Government Aid: E...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
6,6,Toshiaki Komatsu,"Primary Research Focus: International Trade, M...","References: Felix Tintelnot (Chair), Magne Mog...","Job Market Paper Title: ""Job Ladder over Produ...",Komatsu Personal Website,Skip to main content,#main-content,Job Ladder over Production Networks,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
7,7,Nadav Kunievsky,"Primary Research Focus: Labor Economics, Appli...",Secondary Research Focus: Industrial Organization,"References: Stephane Bonhomme (Chair), Ben Bro...",nadavkunievsky@uchicago.edu,Kunievsky Personal Website,https://nkunievsky.github.io/,Kunievsky Personal Website,https://nkunievsky.github.io/,UChicago,2023-2024,Economics
8,8,Ivan Chit Pan Kwok,"Primary Research Focus: Labor Economics, Behav...",Secondary Research Focus: Industrial Organizat...,"References: Leonardo Bursztyn (Chair), Devin P...",ivankwok@uchicago.edu | Kwok Personal Website,Skip to main content,#main-content,Kwok Personal Website,https://sites.google.com/view/ivancpkwok/,UChicago,2023-2024,Economics
9,9,Marco Loseto,Primary Research Focus: Industrial Organizatio...,Secondary Research Focus: Household Finance,"References: Ali Hortacsu (Co-Chair), Lars Hans...",Email: mloseto@uchicago.edu,Loseto Personal Website,https://mloseto.github.io/,"Plan Menus, Retirement Portfolios, and Investo...",https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics


In [17]:
chicago_candidate_23_24.to_csv('chicago_candidate_23_24.csv', index=False)

### UCB-ECON-2024-2025

In [18]:

url = "https://econ.berkeley.edu/graduateplacement/job-market-candidates-phd"

response = requests.get(url)

soup_2 = BeautifulSoup(response.content, 'html.parser')

candidates_section = soup_2.find('div', class_='view-content')
with open('candidates_section.html', 'w', encoding='utf-8') as file:
    file.write(str(candidates_section))



In [19]:
candidate_blocks = soup_2.find_all('div', class_='teaser_display')

ucb_candidates_data = []

for block in candidate_blocks:
    entry_year_tag = block.find('div', class_='display-program-entry')
    entry_year = entry_year_tag.get_text(strip=True) if entry_year_tag else None

    name_tag = block.find('div', class_='display-name')
    name = name_tag.get_text(strip=True) if name_tag else None

    fields_tag = block.find('div', class_='display-fields')
    fields = fields_tag.get_text(strip=True) if fields_tag else None

    homepage_tag = block.find('div', class_='display-homepage')
    homepage_link = homepage_tag.find('a')['href'] if homepage_tag and homepage_tag.find('a') else None

    email_tag = block.find('div', class_='display-email')
    email_link = email_tag.find('a')['href'].replace('mailto:', '') if email_tag and email_tag.find('a') else None

    ucb_candidates_data.append({
        'Name': name,
        'Entry Year': entry_year,
        'Fields': fields,
        'Website': homepage_link,
        'Email': email_link
    })

df_ucb_2024_2025 = pd.DataFrame(ucb_candidates_data)
df_ucb_2024_2025.to_csv('ucb_2024_2025.csv', index=False)
df_ucb_2024_2025['School'] = 'UC Berkeley'
df_ucb_2024_2025['Year'] = '2024-2025'
df_ucb_2024_2025.head()

Unnamed: 0,Name,Entry Year,Fields,Website,Email,School,Year
0,"Baker, Sarah",Program Entry 2019,"Public Finance, Urban Economics, Applied Micro...",https://www.sarah-baker.com/,sarah.baker@berkeley.edu,UC Berkeley,2024-2025
1,"Bakhareva, Daria",Program Entry,"Behavioral and Experimental Economics, Politic...",https://sites.google.com/view/dbakhareva/home,dbakhareva@berkeley.edu,UC Berkeley,2024-2025
2,"Boctor, Valerie",Program Entry 2018,"Household Finance, Macroeconomics",https://valerieboctor.github.io/,valboctor@berkeley.edu,UC Berkeley,2024-2025
3,"Breach, Tomas",Program Entry 2018,"Macroeconomics, Financial Economics",https://sites.google.com/berkeley.edu/tomas-br...,tomas_breach@berkeley.edu,UC Berkeley,2024-2025
4,"Candia Gonzalez, Bernardo",Program Entry 2018,"Macroeconomics, International Economics",https://www.bernardocandia.com/,bernardo_candia@berkeley.edu,UC Berkeley,2024-2025


In [20]:
chicago_candidate_23_24['School'] = 'UChicago'
chicago_candidate_23_24['Year'] = '2023-2024'
chicago_candidate_23_24.head()
#historical placements -ARE , HASS, UCHICAGO - BS, Public policy


Unnamed: 0.1,Unnamed: 0,Name,Primary Research Focus,Secondary Research Focus,References,Email,Website,Website Link,Job Market Paper,Job Market Paper Link,School,Year,Department
0,0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
1,1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics
2,2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
3,3,Zhiyu Fu,"Primary Research Focus: International Finance,...",Secondary Research Focus: Macro Finance,"References: Greg Kaplan (Co-chair), Ralph Koij...",Email: zhiyufu@uchicago.edu,Fu Personal Website,https://fuzhiyu.me,Capital Flows and the Making of Risky Currencies,https://fuzhiyu.me/assets/pdf/capital_flow_and...,UChicago,2023-2024,Economics
4,4,Michael Galperin,"Primary Research Focus: Labor Economics, Publi...",Secondary Research Focus: Economics of Education,"References: Michael Greenstone, Magne Mogstad,...",Email: mgalperin@uchicago.edu,Galperin Personal Website,https://michaelgalperin.github.io,"Targeting, Selection, and the Impact of Grant ...",https://michaelgalperin.github.io/files/jmp.pdf,UChicago,2023-2024,Economics


In [21]:
berkley_ARE = 'https://are.berkeley.edu/job-candidates/past-placements'
response_ARE = requests.get(berkley_ARE)
soup_ARE = BeautifulSoup(response_ARE.content, 'html.parser')

candidate_blocks_ARE = soup_ARE.find_all('div', class_='teaser_display')

ucb_ARE_data = []

for block in candidate_blocks_ARE:
    name_tag = block.find('div', class_='display-name')
    

soup_ARE
with open('ucb_ARE.html', 'w', encoding='utf-8') as file:
    file.write(str(soup_ARE))

### UCB-Econ-23-24-PHD

In [22]:
UCB_ECON_PHD_2023_24 = [
    {
        'Name': 'Felipe Arteaga',
        'Degree': 'B.S. (Industrial Engineering), Pontificial Universidad Católica de Chile M.S. (Industrial Engineering), Pontificial Universidad Católica de Chile',
        'Primary Desired Research and Fields of Interest': 'Industrial Organization Labor Economics',
        'Dissertation Title': 'Essays on Empirical Market Design',
        'Expected Date of Completion': 'May 2024',
        'References': 'Ben Handel*, Christopher Walters, Christopher Neilson'
    },
    {
        'Name': 'Hadar Avivi',
        'Degree': 'B.A. (Economics), Tel Aviv University',
        'Primary Desired Research and Fields of Interest': 'Labor Economics Applied Econometrics',
        'Dissertation Title': 'Essays in Intergenerational Mobility and Inequality',
        'Expected Date of Completion': 'May 2024',
        'References': 'Patrick Kline*, Christopher Walters, Hilary Hoynes'
    },
    {
        'Name': 'Martin Caruso-Bloeck',
        'Degree': 'B.A. (Economics), Universidad Nacional de La Plata',
        'Primary Desired Research and Fields of Interest': 'Macroeconomics International Economics',
        'Dissertation Title': 'Essays in Macroeconomics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Yuriy Gorodnichenko*, David Romer, Jon Steinsson'
    },
    {
        'Name': 'Andrea Cerrato',
        'Degree': 'B.Sc. (Economics, Management, Finance), Bocconi University M.Sc. (Economics), London School of Economics',
        'Primary Desired Research and Fields of Interest': 'Macroeconomics Labor Economics',
        'Dissertation Title': 'Essays in Macroeconomics and Labor Economics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Emi Nakamura*, Enrico Moretti, Benjamin Schoefer, Christina Romer'
    },
    {
        'Name': 'Bryan Chu',
        'Degree': 'B.Sc. (Applied Mathematics-Economics), Brown University (Magna Cum Laude)',
        'Primary Desired Research and Fields of Interest': 'Health Economics Industrial Organization Labor Economics Organizational Economics',
        'Dissertation Title': 'Essays in Hospital Organization, Infrastructure, and Productivity',
        'Expected Date of Completion': 'May 2024',
        'References': 'Ben Handel*, Jon Kolstad, Nano Barahona'
    },
    {
        'Name': 'Kevin Dano',
        'Degree': 'Diplôme d’ingénieur (Economics and Statistics), ENSAE Paris Tech M.Sc. (Economics), London School of Economics and Political Science',
        'Primary Desired Research and Fields of Interest': 'Econometrics Economic Theory',
        'Dissertation Title': 'Essays in Panel Data and Network Econometrics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Bryan Graham*, Stephane Bonhomme, Demian Pouzo'
    },
    {
        'Name': 'Madeline Duhon',
        'Degree': 'B.A. (Economics), Stanford University (with distinction) M.S. (Management Science & Engineering), Stanford University Ph.D. (Economics), University of California, Berkeley',
        'Primary Desired Research and Fields of Interest': 'Development Economics Labor Economics',
        'Dissertation Title': 'Essays in Development and Demography',
        'Expected Date of Completion': 'Conferred: May 2022',
        'References': 'Edward Miguel*, Benjamin Faber, Supreet Kaur'
    },
    {
        'Name': 'Nick Gebbia',
        'Degree': 'B.A. (Economics), Pomona College',
        'Primary Desired Research and Fields of Interest': 'Public Economics Labor Economics Applied Microeconomics',
        'Dissertation Title': 'Essays in Public and Labor Economics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Patrick Kline*, Jesse Rothstein, Dmitry Taubinsky, Emmanuel Saez'
    },
    {
        'Name': 'Abhi Gupta',
        'Degree': 'B.S. (Mathematics, Statistics, Economics), University of Chicago',
        'Primary Desired Research and Fields of Interest': 'Macroeconomics International Economics',
        'Dissertation Title': 'Essays in Empirical Macroeconomics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Yuriy Gorodnichenko*, Jon Steinsson, David Romer'
    },
    {
        'Name': 'Collin Jones',
        'Degree': 'B.A. (Economics and Mathematics), College of William & Mary',
        'Primary Desired Research and Fields of Interest': 'Macroeconomics Finance',
        'Dissertation Title': 'Essays in Macroeconomics and Finance',
        'Expected Date of Completion': 'May 2024',
        'References': 'David Romer*, David Sraer, Jón Steinsson'
    },
    {
        'Name': 'Sreeraahul Kancherla',
        'Degree': 'B.A. (Mathematics and Economics), University of Notre Dame (dual honors, summa cum laude)',
        'Primary Desired Research and Fields of Interest': 'Public Economics Labor Economics',
        'Dissertation Title': 'Essays in Public and Labor Economics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Emmanuel Saez*, Jesse Rothstein*, Danny Yagan'
    },
    {
        'Name': 'Oliver Kim',
        'Degree': 'A.B. (Economics), Harvard College, (Magna cum laude, high honors)',
        'Primary Desired Research and Fields of Interest': 'Development Economics Economic History International Economics',
        'Dissertation Title': 'Essays in Development Economics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Edward Miguel*, Benjamin Faber, Jon Steinsson, Barry Eichengreen'
    },
    {
        'Name': 'Lukas Leucht',
        'Degree': 'B.S. (Economics), University of Munich (LMU) B.A. (Philosophy), University of Munich (LMU) M.S. (Economics), Barcelona Graduate School of Economics M.S. (Business & Public Policy), UC Berkeley, Haas School of Business',
        'Primary Desired Research and Fields of Interest': 'Political Economy Economic History Organizational Economics',
        'Dissertation Title': 'Essays on Political Economy and Historical Development',
        'Expected Date of Completion': 'May 2024',
        'References': 'Ernesto Dal Bo*, Guo Xu*, Noam Yuchtman, Barry Eichengreen'
    },
    {
        'Name': 'Felipe Lobel',
        'Degree': 'B.A. (Economics), Universidade Federal do Rio de Janeiro',
        'Primary Desired Research and Fields of Interest': 'Public Economics Labor Economics Development Economics',
        'Dissertation Title': 'Essays in Inequality and Tax Policy',
        'Expected Date of Completion': 'May 2024',
        'References': 'Emmanuel Saez*, Patrick Kline*, Alan Auerbach, Benjamin Schoefer'
    },
    {
        'Name': 'Muhammad Zia Mehmood',
        'Degree': 'B.Sc. (Economics), Lahore University of Management Sciences (honors) M.P.P (Public Policy), Harvard University M.Sc. (Business Administration), University of California, Berkeley',
        'Primary Desired Research and Fields of Interest': 'Development Economics Political Economy',
        'Dissertation Title': 'Essays on Development and Political Economy',
        'Expected Date of Completion': 'May 2024',
        'References': 'Edward Miguel*, Frederico Finan*, Steve Tadelis, Guo Xu'
    },
    {
        'Name': 'Miguel Ortiz',
        'Degree': 'B.A. (Economics), Universidad de los Andes (summa cum laude)',
        'Primary Desired Research and Fields of Interest': 'Political Economy Development Economics Psychology and Economics',
        'Dissertation Title': 'Essays in Development Economics and Political Economy',
        'Expected Date of Completion': 'May 2024',
        'References': 'Ernesto Dal Bo*, Francesco Trebbi*, Edward Miguel, Frederico Finan'
    },
    {
        'Name': 'Farzad Pourbabaee',
        'Degree': 'B.Sc. (Electrical Engineering), Sharif University of Technology M.A. (Mathematics), McMaster University M.A. (Statistics), University of California, Berkeley Ph.D. (Economics), University of California, Berkeley',
        'Primary Desired Research and Fields of Interest': 'Economic Theory Microeconomics',
        'Dissertation Title': 'Essays in Venture Capital, Reputation and Learning',
        'Expected Date of Completion': 'Conferred: May 2021',
        'References': 'Robert Anderson*, Chris Shannon, Federico Echenique, Gustavo Manso, Omer Tamuz'
    },
    {
        'Name': 'Cesia Sanchez',
        'Degree': 'B.A. (Economics), Texas A&M University, College Station',
        'Primary Desired Research and Fields of Interest': 'Labor Economics Applied Microeconomics Demography',
        'Dissertation Title': 'Essays in Demography, Retirement, and Labor Economics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Jesse Rothstein*, Barry Eichengreen, Ellora Derenoncourt, Jonathan Meer'
    },
    {
        'Name': 'Yassine Sbai Sassi',
        'Degree': 'Diplôme D\'Ingénieur (Applied Mathematics and Economics), Ecole Polytechnique Master (Economics), Université Paris Saclay Diplôme D\'Ingénieur (Finance and Risk Management), ENSAE Paris',
        'Primary Desired Research and Fields of Interest': 'Econometrics Economic Theory',
        'Dissertation Title': 'Essays in Network Econometrics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Bryan Graham*, Michael Jansson, Demian Pouzo'
    },
    {
        'Name': 'Landin Smith',
        'Degree': 'B.A. (International Economics and Business), University of Mississippi M.A. (International and Development Economics), Yale University',
        'Primary Desired Research and Fields of Interest': 'Labor Economics Applied Microeconomics Development Economics',
        'Dissertation Title': 'Essays on Education and Development',
        'Expected Date of Completion': 'May 2024',
        'References': 'Edward Miguel*, Jesse Rothstein, Enrico Moretti, Barry Eichengreen'
    },
    {
        'Name': 'Nicholas Swanson',
        'Degree': 'B.A. (Economics), University of Cambridge (First-Class Honors) M.Sc. (Economics), London School of Economics (Distinction)',
        'Primary Desired Research and Fields of Interest': 'Development Economics Psychology and Economics Labor Economics',
        'Dissertation Title': 'Essays in Development Economics',
        'Expected Date of Completion': 'May 2024',
        'References': 'Supreet Kaur*, Edward Miguel*, Ned Augenblick, Frederico Finan'
    },
    {
        'Name': 'Andrew Tai',
        'Degree': 'B.A. (Economics and Applied Mathematics), University of California, Berkeley (with Highest Honors)',
        'Primary Desired Research and Fields of Interest': 'Economic Theory',
        'Dissertation Title': 'Essays in Matching and Rationalizability',
        'Expected Date of Completion': 'May 2024',
        'References': 'Haluk Ergin*, Chris Shannon, Federico Echenique'
    },
    {
        'Name': 'John Wieselthier',
        'Degree': 'B.A. (Economics and Mathematics), University of California, Berkeley',
        'Primary Desired Research and Fields of Interest': 'Labor Economics Urban Economics',
        'Dissertation Title': 'Labor, Income, and Crime',
        'Expected Date of Completion': 'May 2024',
        'References': 'Patrick Kline*, Hilary Hoynes, Christopher Walters'
    },
    {
        'Name': 'David Wu',
        'Degree': 'B.A. (Economics), Peking University B.S. (Applied Mathematics), Peking University',
        'Primary Desired Research and Fields of Interest': 'Development Economics Labor Economics Political Economy',
        'Dissertation Title': 'Essays on Firm Hiring in Ethiopia',
        'Expected Date of Completion': 'May 2024',
        'References': 'Edward Miguel*, Christopher Walters*, Supreet Kaur, Benjamin Schoefer'
    },
    {
        'Name': 'Kai Zen',
        'Degree': 'B.Comm./B. Ec. (Finance, Economics, Econometrics), University of New South Wales B. Ec. (Economics), University of New South Wales (Honours 1st Class and the University Medal)',
        'Primary Desired Research and Fields of Interest': 'Labor Economics Public Finance',
        'Dissertation Title': 'Essays on Labor Economics',
        'Expected Date of Completion': 'May 2024',
        'References': 'David Card*, Sydnee Caldwell, Christopher Walters'
    }
]

UCB_ECON_PHD_2023_24_df = pd.DataFrame(UCB_ECON_PHD_2023_24)
UCB_ECON_PHD_2023_24_df['School'] = 'UC Berkeley'
UCB_ECON_PHD_2023_24_df['Year'] = '2023-2024'
UCB_ECON_PHD_2023_24_df.head()


Unnamed: 0,Name,Degree,Primary Desired Research and Fields of Interest,Dissertation Title,Expected Date of Completion,References,School,Year
0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024
1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024
2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024
3,Andrea Cerrato,"B.Sc. (Economics, Management, Finance), Boccon...",Macroeconomics Labor Economics,Essays in Macroeconomics and Labor Economics,May 2024,"Emi Nakamura*, Enrico Moretti, Benjamin Schoef...",UC Berkeley,2023-2024
4,Bryan Chu,"B.Sc. (Applied Mathematics-Economics), Brown U...",Health Economics Industrial Organization Labor...,"Essays in Hospital Organization, Infrastructur...",May 2024,"Ben Handel*, Jon Kolstad, Nano Barahona",UC Berkeley,2023-2024


### UCB-ARE


In [23]:
link_UCB_ARE = "https://are.berkeley.edu/job-candidates/past-placements"

response = requests.get(link_UCB_ARE)

soup = BeautifulSoup(response.content, 'html.parser')

with open("UCB_ARE_page.html", "w", encoding="utf-8") as file:
    file.write(response.text)

In [24]:
job_placement_names = []
job_placement_positions = []
job_placement_departments = []
job_placement_institutions = []
job_placement_years = []

placement_years = soup.find_all('h2')
placement_tables = soup.find_all('table')

for placement_year, placement_table in zip(placement_years, placement_tables):
    placement_year_text = placement_year.get_text(strip=True)  # Extract the year from the <h2> tag
    placement_rows = placement_table.find_all('tr')
    for placement_row in placement_rows:
        placement_cols = placement_row.find_all('td')
        if len(placement_cols) == 4:  # Ensure there are four columns
            job_placement_names.append(placement_cols[0].get_text(strip=True))
            job_placement_positions.append(placement_cols[1].get_text(strip=True))
            job_placement_departments.append(placement_cols[2].get_text(strip=True))
            job_placement_institutions.append(placement_cols[3].get_text(strip=True))
            job_placement_years.append(placement_year_text)  # Append the year to the job_placement_years list

UCB_ARE_job_placement_data = {
    'Name': job_placement_names,
    'Position': job_placement_positions,
    'Department': job_placement_departments,
    'Institution': job_placement_institutions,
    'Entry Year': job_placement_years
}
UCB_ARE_job_placement_df = pd.DataFrame(UCB_ARE_job_placement_data)

UCB_ARE_job_placement_df['School'] = 'UC Berkeley'
UCB_ARE_job_placement_df['Institution'] = 'ARE'
UCB_ARE_job_placement_df['Year'] = '2023-2024'
UCB_ARE_job_placement_df.head()


Unnamed: 0,Name,Position,Department,Institution,Entry Year,School,Year
0,"DanielAgness(DEV, LAB)",Assistant Professor,Economics,ARE,2024,UC Berkeley,2023-2024
1,"Pierre Biscaye(DEV, ERE)",Chaire Professeur Junior,CERDI,ARE,2024,UC Berkeley,2023-2024
2,"Jesse Buchsbaum(ERE, IO)",Fellow,,ARE,2024,UC Berkeley,2023-2024
3,"Michelle Chen(DEV, IO, POL)",Economist,DIME,ARE,2024,UC Berkeley,2023-2024
4,Daniel Kannell (ERE),Economist,,ARE,2024,UC Berkeley,2023-2024


## HASS

In [25]:
HASS_data = {
    2024: [
        {"name": "Sandy Campbell", "position": "Postdoc", "past_institution": "University of California, Los Angeles", "entry_time": 2024},
        {"name": "Andrew Choi", "position": "Postdoc", "past_institution": "University College London", "entry_time": 2024},
        {"name": "Tianyu Han", "position": "Assistant Professor", "past_institution": "Hong Kong University of Science and Technology", "entry_time": 2024},
        {"name": "Lukas Leucht", "position": "Assistant Professor", "past_institution": "University of Oxford", "entry_time": 2024},
        {"name": "Kimberlyn Munevar", "position": "Assistant Professor", "past_institution": "Texas A&M University", "entry_time": 2024},
        {"name": "Miguel Ortiz", "position": "Assistant Professor", "past_institution": "University of British Columbia – Vancouver School of Economics", "entry_time": 2024},
        {"name": "Charlotte (Charlie) Townsend", "position": "Postdoc", "past_institution": "Cornell University – ILR School", "entry_time": 2024},
        {"name": "Matteo Tranchero", "position": "Assistant Professor", "past_institution": "University of Pennsylvania – The Wharton School", "entry_time": 2024}
    ],
    2023: [
        {"name": "Stephen Baum", "position": "Postdoc", "past_institution": "Washington University in St. Louis", "entry_time": 2023},
        {"name": "Derek Brown", "position": "Assistant Professor", "past_institution": "Columbia Business School", "entry_time": 2023},
        {"name": "Can Huang", "position": "Assistant Professor", "past_institution": "University of Illinois Urbana-Champaign", "entry_time": 2023},
        {"name": "Jinsung Hwang", "position": "Assistant Professor", "past_institution": "Hankuk University of Foreign Studies", "entry_time": 2023},
        {"name": "Maris Jensen", "position": "Assistant Professor", "past_institution": "University of Iowa", "entry_time": 2023},
        {"name": "Dominik Jurek", "position": "Associate", "past_institution": "Cornerstone Research", "entry_time": 2023},
        {"name": "Sooji Kim", "position": "Associate", "past_institution": "Cornerstone Research", "entry_time": 2023},
        {"name": "Sonya Mishra", "position": "Assistant Professor", "past_institution": "Dartmouth College – Tuck School of Business", "entry_time": 2023},
        {"name": "Melissa Wang", "position": "Data Scientist", "past_institution": "Yendo", "entry_time": 2023},
        {"name": "Simon Xu", "position": "Scientist", "past_institution": "Harvard University – Harvard Business School", "entry_time": 2023},
        {"name": "Jesse (Yunfei) Yao", "position": "Assistant Professor", "past_institution": "The Chinese University of Hong Kong", "entry_time": 2023},
        {"name": "Xiao Yin", "position": "Lecturer", "past_institution": "University College London", "entry_time": 2023},
        {"name": "Fan Zhang", "position": "Assistant Professor", "past_institution": "Nova School of Business and Economics", "entry_time": 2023}
    ],
    2022: [
        {"name": "Samanvaya Agarwal", "position": "Senior Associate", "past_institution": "Charles River Associates", "entry_time": 2022},
        {"name": "Kristin Donnelly", "position": "Assistant Professor", "past_institution": "University of Chicago – Booth", "entry_time": 2022},
        {"name": "Łukasz Langer", "position": "PhD Associate", "past_institution": "Cornerstone Research", "entry_time": 2022},
        {"name": "Petr Martynov", "position": "Economist", "past_institution": "Uber", "entry_time": 2022},
        {"name": "Alexey Sinyashin", "position": "Senior Associate", "past_institution": "Charles River Associates", "entry_time": 2022},
        {"name": "Daniel H. Stein", "position": "People Data Scientist", "past_institution": "Searchlight", "entry_time": 2022},
        {"name": "Young Yoon", "position": "Staff Data Scientist", "past_institution": "Walmart Global Tech", "entry_time": 2022}
    ],
    2021: [
        {"name": "Byung Hyun Ahn", "position": "Researcher", "past_institution": "Dimensional Fund Advisors", "entry_time": 2021},
        {"name": "Andres Gonzalez-Lira", "position": "Postdoctoral Associate, Yale; Assistant Professor", "past_institution": "Pontificia Universidad Católica de Chile", "entry_time": 2021},
        {"name": "Muhammad Yasir Khan", "position": "Assistant Professor", "past_institution": "University of Pittsburgh", "entry_time": 2021},
        {"name": "Mohammad Abbas Rezaei", "position": "Data Scientist", "past_institution": "Uber", "entry_time": 2021},
        {"name": "Thiago Scot", "position": "Research Economist", "past_institution": "World Bank’s Development Impact Group (DIME)", "entry_time": 2021},
        {"name": "Mehmet Seflek", "position": "Data Scientist", "past_institution": "neo.tax", "entry_time": 2021},
        {"name": "Vincent Skiera", "position": "Quantitative Researcher", "past_institution": "Two Sigma Investments", "entry_time": 2021},
        {"name": "Gauri Subramani", "position": "Assistant Professor", "past_institution": "Lehigh University", "entry_time": 2021},
        {"name": "Stephen Walker", "position": "Consultant", "past_institution": "Self-employed", "entry_time": 2021},
        {"name": "Zihao (Harry) Zhou", "position": "Assistant Professor", "past_institution": "University College London – School of Management", "entry_time": 2021}
    ],
    2019: [
        {"name": "Abhay Aneja", "position": "Assistant Professor", "past_institution": "Berkeley Law"},
        {"name": "Laura Boudreau", "position": "Assistant Professor", "past_institution": "Columbia Business School – Economics Division"},
        {"name": "Rachel Habbert", "position": "Senior Behavioral Scientist", "past_institution": "Cultivate"},
        {"name": "Hyoseok Kang", "position": "Assistant Professor", "past_institution": "USC"},
        {"name": "Richard Lu", "position": "Data Scientist", "past_institution": "Square"},
        {"name": "Paulo Manoel", "position": "Assistant Professor", "past_institution": "University of Kentucky"},
        {"name": "Eddie Ning", "position": "Assistant Professor", "past_institution": "Cheung Kong Graduate School of Business"},
        {"name": "Michael O’Donnell", "position": "Assistant Professor", "past_institution": "Georgetown"},
        {"name": "Derek Schatz", "position": "Senior Consulting Analyst", "past_institution": "Accenture"},
        {"name": "Daron Sharps", "position": "Data Science Fellow", "past_institution": "Insight"}
    ],
     2018: [
        {"name": "Carlos Avenancio-León", "position": "Postdoc, Assistant Professor", "past_institution": "Indiana University – Bloomington"},
        {"name": "Nan Chen", "position": "Assistant Professor", "past_institution": "National University of Singapore"},
        {"name": "Tristan Fitzgerald", "position": "Assistant Professor", "past_institution": "Texas A&M University"},
        {"name": "Sheisha Kulkarni", "position": "Postdoc, Assistant Professor", "past_institution": "The University of Virginia"},
        {"name": "Henry Laurion", "position": "Assistant Professor", "past_institution": "University of Colorado Boulder"},
        {"name": "Sibo Lu", "position": "Data Scientist", "past_institution": "Upwork"},
        {"name": "Sanaz Mobasseri", "position": "Assistant Professor", "past_institution": "Boston University"},
        {"name": "Andrew Schwartz", "position": "Visiting Assistant Professor", "past_institution": "University of Georgia"},
        {"name": "Aisling Scott", "position": "People Analytics Team", "past_institution": "Facebook"},
        {"name": "Samuel Tan", "position": "Assistant Professor", "past_institution": "Singapore Management University"}
    ],
    2017: [
        {"name": "Yoonha Kim", "position": "Visiting Assistant Professor", "past_institution": "Georgetown University"},
        {"name": "Sanket Korgaonkar", "position": "Assistant Professor of Real Estate", "past_institution": "Pennsylvania State University"},
        {"name": "Jeffrey Kuhn", "position": "Assistant Professor", "past_institution": "University of North Carolina at Chapel Hill"},
        {"name": "Haoyang Liu", "position": "Assistant Professor", "past_institution": "Florida State University"},
        {"name": "Parham Holakouee", "position": "Principal and Head of Operations", "past_institution": "Global Innovation Catalyst, LCC"},
        {"name": "Weiyi Ng", "position": "Assistant Professor", "past_institution": "National University of Singapore"},
        {"name": "Hannah Perfecto", "position": "Assistant Professor of Marketing", "past_institution": "Washington University in St. Louis"},
        {"name": "Santiago Truffa", "position": "Visiting Assistant Professor", "past_institution": "Tulane University"},
        {"name": "Yu Wang", "position": "Assistant Professor", "past_institution": "University of Houston"},
        {"name": "Calvin Zhang", "position": "Financial Economist", "past_institution": "Philadelphia Federal Reserve Bank"},
        {"name": "Zachary Zhong", "position": "Assistant Professor", "past_institution": "University of Toronto"}
    ],
    2016: [
        {"name": "Korcan Ak", "position": "Quantitative Research Analyst", "past_institution": "Marshall Wace North America LP"},
        {"name": "Kate Ashley", "position": "Assistant Professor", "past_institution": "Montclair State University"},
        {"name": "Moshe Barach", "position": "Visiting Assistant Professor", "past_institution": "Georgetown University"},
        {"name": "Aya Bellicha", "position": "Data Scientist", "past_institution": "Ten-X"},
        {"name": "Andy Chen", "position": "Postdoctoral Research and Teaching Fellow", "past_institution": "University of British Columbia"},
        {"name": "Angus Hildreth", "position": "Assistant Professor of Management and Organizations", "past_institution": "The Samuel Curtis Johnson Graduate School of Management at Cornell University"},
        {"name": "Yu Jin Kim", "position": "Assistant Professor", "past_institution": "Shanghai Science and Technology University (Shanghai Tech)"},
        {"name": "Nirupama Kulkarni", "position": "Research Director", "past_institution": "Centre for Advanced Financial Research and Learning (CAFRAL)"},
        {"name": "Raymond Leung", "position": "Assistant Professor", "past_institution": "Cheung Kong Graduate School of Business"},
        {"name": "Ryan Liu", "position": "Quantitative Researcher", "past_institution": "BlackRock"},
        {"name": "Jenn Logg", "position": "Post-Doctoral Fellow", "past_institution": "Harvard University"},
        {"name": "Gigi Perry", "position": "Founder and Partner", "past_institution": "Dr. Hagit Perry"},
        {"name": "Scott Roeder", "position": "Postdoctoral Researcher", "past_institution": "Washington University in St. Louis"},
        {"name": "Jim Ryans", "position": "Assistant Professor", "past_institution": "London Business School"},
        {"name": "Eliot Sherman", "position": "Assistant Professor", "past_institution": "London Business School"},
        {"name": "Yu-Man Tam", "position": "Financial Economist", "past_institution": "Office of the Comptroller of the Currency"},
        {"name": "Xinxin Wang", "position": "Assistant Professor", "past_institution": "The University of North Carolina at Chapel Hill"},
        {"name": "Jenny Zha", "position": "Assistant Professor", "past_institution": "George Washington University"}
    ],
    2015: [
        {"name": "Anna Amirdjanova", "position": "Economist", "past_institution": "Board of Governors of the Federal Reserve System"},
        {"name": "Jiakai Chen", "position": "Assistant Professor", "past_institution": "University of Hawaii – Manoa"},
        {"name": "Yu-Ping Chen", "position": "Assistant Professor", "past_institution": "National Taiwan University College of Management"},
        {"name": "Bo Cowgill", "position": "Assistant Professor", "past_institution": "Columbia University"},
        {"name": "Francesco D’Acunto", "position": "Assistant Professor", "past_institution": "University of Maryland, College Park"},
        {"name": "Ronald Espinosa", "position": "Quantitative Research Analyst", "past_institution": "Pacific Investment Management Company (PIMCO)"},
        {"name": "Tarek Ghani", "position": "Assistant Professor", "past_institution": "Washington University in St. Louis – Olin Business School"},
        {"name": "Minah Jung", "position": "Assistant Professor", "past_institution": "New York University"},
        {"name": "T. Tony Ke", "position": "Assistant Professor", "past_institution": "Massachusetts Institute of Technology"},
        {"name": "Brian Reschke", "position": "Assistant Professor", "past_institution": "Brigham Young University"},
        {"name": "Alexander Van Zant", "position": "Post-Doctoral Research Fellow", "past_institution": "University of Pennsylvania – Wharton"},
        {"name": "Ying Zhu", "position": "Post-Doctoral Research Fellow", "past_institution": "Michigan State University"}
    ],
    2014: [
        {"name": "Brian Ayash", "position": "Assistant Professor", "past_institution": "Cal Poly San Luis Obispo"},
        {"name": "Ron Berman", "position": "Assistant Professor", "past_institution": "University of Pennsylvania"},
        {"name": "Fernando Comiran", "position": "Assistant Professor", "past_institution": "University of San Francisco"},
        {"name": "Isaac Hacamo", "position": "Assistant Professor", "past_institution": "Indiana University – Bloomington"},
        {"name": "Bo Liao", "position": "Senior Industrial & Operations Engineer", "past_institution": "Sandisk Corporation"},
        {"name": "So Eun Park", "position": "Assistant Professor", "past_institution": "The University of British Columbia"},
        {"name": "Subprasiri (Jackie) Siriviriyakul", "position": "Assistant Professor", "past_institution": "Baruch College – The City University of New York"},
        {"name": "Michael Weber", "position": "Assistant Professor", "past_institution": "University of Chicago"}
    ],
    2013: [
        {"name": "Pablo Hernandez", "position": "Assistant Professor", "past_institution": "New York University Abu Dhabi"},
        {"name": "Paulo Issler", "position": "Post-Doctorate", "past_institution": "University of California, Berkeley – Fisher Center for Real Estate and Urban Economics"},
        {"name": "Tongil (TI) Kim", "position": "Assistant Professor", "past_institution": "Emory University"},
        {"name": "Amy Nguyen-Chyung", "position": "Assistant Professor", "past_institution": "University of Michigan"},
        {"name": "Pratish Patel", "position": "Assistant Professor", "past_institution": "Cal Poly San Luis Obispo"},
        {"name": "Harm Schuett", "position": "Assistant Professor", "past_institution": "WHU – Otto Beisheim School of Management"},
        {"name": "Orie Shelef", "position": "Post-Doctorate", "past_institution": "Stanford University"},
        {"name": "Shubhranshu Singh", "position": "Assistant Professor", "past_institution": "Johns Hopkins University"},
        {"name": "Mary Kate Stimmler", "position": "Researcher", "past_institution": "Google, Inc."},
        {"name": "Estelle (Yuan) Sun", "position": "Assistant Professor", "past_institution": "Boston University"},
        {"name": "Aydin Uysal", "position": "Researcher", "past_institution": "Menta Capital, LLC"},
        {"name": "Fiona Kun Yao", "position": "Assistant Professor", "past_institution": "University of Illinois, Urbana-Champaign"}
    ]
}




data_flat = []
for year, records in HASS_data.items():
    for record in records:
        data_flat.append(record)

Hass_past_placement_df = pd.DataFrame(data_flat)

Hass_past_placement_df['School'] = 'UC Berkeley'
Hass_past_placement_df['Institution'] = 'HASS'
Hass_past_placement_df['entry_time'] = Hass_past_placement_df['entry_time'].astype('Int64')

Hass_past_placement_df.head()


Unnamed: 0,name,position,past_institution,entry_time,School,Institution
0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024,UC Berkeley,HASS
1,Andrew Choi,Postdoc,University College London,2024,UC Berkeley,HASS
2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024,UC Berkeley,HASS
3,Lukas Leucht,Assistant Professor,University of Oxford,2024,UC Berkeley,HASS
4,Kimberlyn Munevar,Assistant Professor,Texas A&M University,2024,UC Berkeley,HASS


## UCHICAGO-BS

In [26]:
UCHICAGO_BS_url = "https://www.chicagobooth.edu/phd/job-market-candidates"
response = requests.get(UCHICAGO_BS_url)

soup = BeautifulSoup(response.content, 'html.parser')
soup_text = soup.get_text()

with open("UCHICAGO_BS_page.txt", "w", encoding="utf-8") as file:
    file.write(soup_text)

with open("UCHICAGO_BS_page.html", "w", encoding="utf-8") as file:
    file.write(response.text)


In [27]:
accounting_indices = [i for i in range(len(soup_text)) if soup_text.lower().startswith('accounting', i)]
accounting_indices
start = accounting_indices[-1]
end = [i for i in range(len(soup_text)) if soup_text.lower().startswith('jiaqi', i)][-1]
UCHICAGO_BS_text = soup_text[start:end]
UCHICAGO_BS_text


'Accounting \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\r\nJonas Piero Dalmazzo                    \n\nJob Market Paper: "The Effects of Alternative Data on Misreporting"\nResearch Interests: Disclosure, Capital Markets, Misreporting, Regulation, Banking\nDissertation Committee: Phillip B. Berger (Chair), Christian Leuz, Delphine Samuels, João Granja\nPersonal Website\nEmail\nCV\nJonas Piero Dalmazzo\n\n\n\n\n\n\n\n\n\n\n\n\nBehavioral Science\nCheck back later for candidates in Behavioral Science.\n\n\n\n\n\n\n\n\n\n\n\n\nEconometrics and Statistics \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\r\nJizhou Liu                    \n\nJob Market Paper: "Inference for Two-stage Experiments under Covariate-Adaptive Randomization"\nResearch Interests: Econometrics, Causal Inference, Design and Analysis of Randomized Experiments\nDissertation Committee: Christian Hansen (Co-chair), Azeem Shaikh (Co-chair), Tetsuya Kaji, Max Tabord-Meehan\nPersonal Website\nEmail\nCV\nJizhou Liu\n\n\n\n\n\n\n\n\n\n\

In [28]:
lines = UCHICAGO_BS_text.splitlines()

departments = []
names = []
papers = []
interests = []
committees = []

current_department = ""
prev_line = ""

for line in lines:
    line = line.strip()
    
    if line.endswith("Economics") or line.endswith("Statistics") or line.endswith("Finance") or line.endswith("Operations Management") or line.endswith("Accounting"):
        current_department = line
    
    elif "Job Market Paper" in line:
        names.append(prev_line.strip())
        departments.append(current_department)
        papers.append(line.split(":", 1)[1].strip())
    
    elif "Research Interests" in line:
        interests.append(line.split(":", 1)[1].strip())
    elif "Dissertation Committee" in line:
        committees.append(line.split(":", 1)[1].strip())
    
    if line:
        prev_line = line

while len(interests) < len(names):
    interests.append("N/A")
while len(committees) < len(names):
    committees.append("N/A")

df_UCHICAGO_BS_24_25 = pd.DataFrame({
    "Department": departments,
    "Name": names,
    "Job Market Paper": papers,
    "Research Interests": interests,
    "Dissertation Committee": committees
})

df_UCHICAGO_BS_24_25.head()

Unnamed: 0,Department,Name,Job Market Paper,Research Interests,Dissertation Committee
0,Accounting,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del..."
1,Econometrics and Statistics,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-..."
2,Econometrics and Statistics,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te..."
3,Econometrics and Statistics,Cong Zhang,"""Regulatory Uncertainty Pricing in Digital Eco...",Asset Pricing; Applied Econometrics; Financial...,"George M Constantinides (Co-chair), Jeffrey R ..."
4,Economics,Lucy Msall,"""Never-Realized Capital Gains""","Public Economics, Household Finance, Law & Eco...","Marianne Bertrand (co-chair), Mikhail Golosov,..."


In [29]:

websites = []
emails = []
cvs = []

for name in df_UCHICAGO_BS_24_25["Name"]:
    name_found = False
    website = "N/A"
    email = "N/A"
    cv = "N/A"
    from bs4 import BeautifulSoup

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the section containing the candidate's information
    candidate_section = soup.find(text=name)
    if candidate_section:
        parent_div = candidate_section.find_parent('div', class_='copy')

        # Extract the personal website link
        website_tag = parent_div.find('a', text='Personal Website')
        if website_tag:
            website = website_tag['href']

        # Extract the email link
        email_tag = parent_div.find('a', href=True, text='Email')
        if email_tag:
            email = email_tag['href'].replace('mailto:', '')

        # Extract the CV link
        cv_tag = parent_div.find('a', text='CV')
        if cv_tag:
            cv = cv_tag['href']
    
    
    websites.append(website)
    emails.append(email)
    cvs.append(cv)

df_UCHICAGO_BS_24_25["Website"] = websites
df_UCHICAGO_BS_24_25["Email"] = emails
df_UCHICAGO_BS_24_25["CV"] = cvs
df_UCHICAGO_BS_24_25['School'] = 'UC Chicago'
df_UCHICAGO_BS_24_25['Institution'] = 'Booth'
df_UCHICAGO_BS_24_25['entry_time'] = '2024-2025'


df_UCHICAGO_BS_24_25.head()


  candidate_section = soup.find(text=name)
  website_tag = parent_div.find('a', text='Personal Website')
  email_tag = parent_div.find('a', href=True, text='Email')
  cv_tag = parent_div.find('a', text='CV')


Unnamed: 0,Department,Name,Job Market Paper,Research Interests,Dissertation Committee,Website,Email,CV,School,Institution,entry_time
0,Accounting,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025
1,Econometrics and Statistics,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025
2,Econometrics and Statistics,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025
3,Econometrics and Statistics,Cong Zhang,"""Regulatory Uncertainty Pricing in Digital Eco...",Asset Pricing; Applied Econometrics; Financial...,"George M Constantinides (Co-chair), Jeffrey R ...",http://cong-zhang.com/,czhang12@chicagobooth.edu,https://uchicago.box.com/s/oc1sis2xmovdhtm9saf...,UC Chicago,Booth,2024-2025
4,Economics,Lucy Msall,"""Never-Realized Capital Gains""","Public Economics, Household Finance, Law & Eco...","Marianne Bertrand (co-chair), Mikhail Golosov,...",https://voices.uchicago.edu/lucymsall/,,https://uchicago.box.com/s/v0njdu1veyy14am5v9n...,UC Chicago,Booth,2024-2025


## UCHICAGO-Public-policy

In [30]:
UCHICAGO_Public_policy_url = "https://harris.uchicago.edu/academics/degrees/phd"
UCHICAGO_Public_policy_response = requests.get(UCHICAGO_Public_policy_url)
UCHICAGO_Public_policy_soup = BeautifulSoup(UCHICAGO_Public_policy_response.content, 'html.parser')

with open("UCHICAGO_Public_policy_page.html", "w", encoding="utf-8") as file:
    file.write(UCHICAGO_Public_policy_response.text)

In [31]:
# It seems like they yet uploaded anything
# As for the past, there is no specific name but just position
UCHICAGO_Public_policy_past_url = "https://harris.uchicago.edu/findtalent/phdsonthejobmarket/previous-placements"

## All Dataframes

For UCB:

In [32]:
df_ucb_2024_2025.head()
UCBerkeley_ECON_jm_candidates = df_ucb_2024_2025.copy()
UCBerkeley_ECON_jm_candidates['School'] = 'UC Berkeley'
UCBerkeley_ECON_jm_candidates['Department'] = 'Economics'
UCBerkeley_ECON_jm_candidates.to_csv('data/UCBerkeley_ECON_jm_candidates.csv')
UCBerkeley_ECON_jm_candidates.head()


Unnamed: 0,Name,Entry Year,Fields,Website,Email,School,Year,Department
0,"Baker, Sarah",Program Entry 2019,"Public Finance, Urban Economics, Applied Micro...",https://www.sarah-baker.com/,sarah.baker@berkeley.edu,UC Berkeley,2024-2025,Economics
1,"Bakhareva, Daria",Program Entry,"Behavioral and Experimental Economics, Politic...",https://sites.google.com/view/dbakhareva/home,dbakhareva@berkeley.edu,UC Berkeley,2024-2025,Economics
2,"Boctor, Valerie",Program Entry 2018,"Household Finance, Macroeconomics",https://valerieboctor.github.io/,valboctor@berkeley.edu,UC Berkeley,2024-2025,Economics
3,"Breach, Tomas",Program Entry 2018,"Macroeconomics, Financial Economics",https://sites.google.com/berkeley.edu/tomas-br...,tomas_breach@berkeley.edu,UC Berkeley,2024-2025,Economics
4,"Candia Gonzalez, Bernardo",Program Entry 2018,"Macroeconomics, International Economics",https://www.bernardocandia.com/,bernardo_candia@berkeley.edu,UC Berkeley,2024-2025,Economics


In [33]:
UCBerkeley_Econ_jm_placements = UCB_ECON_PHD_2023_24_df.copy()
UCBerkeley_Econ_jm_placements['Department'] = 'Economics'
UCBerkeley_Econ_jm_placements.to_csv('data/UCBerkeley_Econ_jm_placements.csv')
UCBerkeley_Econ_jm_placements.head()


Unnamed: 0,Name,Degree,Primary Desired Research and Fields of Interest,Dissertation Title,Expected Date of Completion,References,School,Year,Department
0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024,Economics
1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024,Economics
2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024,Economics
3,Andrea Cerrato,"B.Sc. (Economics, Management, Finance), Boccon...",Macroeconomics Labor Economics,Essays in Macroeconomics and Labor Economics,May 2024,"Emi Nakamura*, Enrico Moretti, Benjamin Schoef...",UC Berkeley,2023-2024,Economics
4,Bryan Chu,"B.Sc. (Applied Mathematics-Economics), Brown U...",Health Economics Industrial Organization Labor...,"Essays in Hospital Organization, Infrastructur...",May 2024,"Ben Handel*, Jon Kolstad, Nano Barahona",UC Berkeley,2023-2024,Economics


In [34]:
UCBerkeley_HASS_jm_placements = Hass_past_placement_df.copy()
UCBerkeley_HASS_jm_placements['Department'] = 'HASS'
UCBerkeley_HASS_jm_placements.to_csv('data/UCBerkeley_HASS_jm_placements.csv')
UCBerkeley_HASS_jm_placements = UCBerkeley_HASS_jm_placements.drop(columns=['Institution'])
UCBerkeley_HASS_jm_placements.head()


Unnamed: 0,name,position,past_institution,entry_time,School,Department
0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024,UC Berkeley,HASS
1,Andrew Choi,Postdoc,University College London,2024,UC Berkeley,HASS
2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024,UC Berkeley,HASS
3,Lukas Leucht,Assistant Professor,University of Oxford,2024,UC Berkeley,HASS
4,Kimberlyn Munevar,Assistant Professor,Texas A&M University,2024,UC Berkeley,HASS


In [35]:
UCB_ARE_job_placement_df.head()
UCBerkeley_ARE_jm_placements = UCB_ARE_job_placement_df.copy()
UCBerkeley_ARE_jm_placements['Department'] = 'ARE'
UCBerkeley_ARE_jm_placements = UCBerkeley_ARE_jm_placements.drop(columns=['Institution'])
UCBerkeley_ARE_jm_placements.to_csv('data/UCBerkeley_ARE_jm_placements.csv')
UCBerkeley_ARE_jm_placements.head()

Unnamed: 0,Name,Position,Department,Entry Year,School,Year
0,"DanielAgness(DEV, LAB)",Assistant Professor,ARE,2024,UC Berkeley,2023-2024
1,"Pierre Biscaye(DEV, ERE)",Chaire Professeur Junior,ARE,2024,UC Berkeley,2023-2024
2,"Jesse Buchsbaum(ERE, IO)",Fellow,ARE,2024,UC Berkeley,2023-2024
3,"Michelle Chen(DEV, IO, POL)",Economist,ARE,2024,UC Berkeley,2023-2024
4,Daniel Kannell (ERE),Economist,ARE,2024,UC Berkeley,2023-2024


For Uchicago:

In [36]:
def remove_research(str):
    if str.startswith('References: '):
        str = str.remove('References: ')
    return str


In [37]:
chicago_candidate_23_24.head()
UChicago_ECON_jm_candidates = chicago_candidate_23_24.copy()
UChicago_ECON_jm_candidates['Department'] = 'Economics'

UChicago_ECON_jm_candidates.to_csv('data/UChicago_ECON_jm_candidates.csv')
UChicago_ECON_jm_candidates.head()

Unnamed: 0.1,Unnamed: 0,Name,Primary Research Focus,Secondary Research Focus,References,Email,Website,Website Link,Job Market Paper,Job Market Paper Link,School,Year,Department
0,0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
1,1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics
2,2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
3,3,Zhiyu Fu,"Primary Research Focus: International Finance,...",Secondary Research Focus: Macro Finance,"References: Greg Kaplan (Co-chair), Ralph Koij...",Email: zhiyufu@uchicago.edu,Fu Personal Website,https://fuzhiyu.me,Capital Flows and the Making of Risky Currencies,https://fuzhiyu.me/assets/pdf/capital_flow_and...,UChicago,2023-2024,Economics
4,4,Michael Galperin,"Primary Research Focus: Labor Economics, Publi...",Secondary Research Focus: Economics of Education,"References: Michael Greenstone, Magne Mogstad,...",Email: mgalperin@uchicago.edu,Galperin Personal Website,https://michaelgalperin.github.io,"Targeting, Selection, and the Impact of Grant ...",https://michaelgalperin.github.io/files/jmp.pdf,UChicago,2023-2024,Economics


In [46]:
df_UCHICAGO_BS_24_25.head()
UChicago_Business_jm_candidates = df_UCHICAGO_BS_24_25.copy()
UChicago_Business_jm_candidates['Department'] = 'Business'
UChicago_Business_jm_candidates = UChicago_Business_jm_candidates.rename(columns={'Job Market Paper': 'jm_paper'})
UChicago_Business_jm_candidates.to_csv('data/UChicago_Business_jm_candidates.csv')
UChicago_Business_jm_candidates.head()


Unnamed: 0,Department,Name,jm_paper,Research Interests,Dissertation Committee,Website,Email,CV,School,Institution,entry_time
0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025
1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025
2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025
3,Business,Cong Zhang,"""Regulatory Uncertainty Pricing in Digital Eco...",Asset Pricing; Applied Econometrics; Financial...,"George M Constantinides (Co-chair), Jeffrey R ...",http://cong-zhang.com/,czhang12@chicagobooth.edu,https://uchicago.box.com/s/oc1sis2xmovdhtm9saf...,UC Chicago,Booth,2024-2025
4,Business,Lucy Msall,"""Never-Realized Capital Gains""","Public Economics, Household Finance, Law & Eco...","Marianne Bertrand (co-chair), Mikhail Golosov,...",https://voices.uchicago.edu/lucymsall/,,https://uchicago.box.com/s/v0njdu1veyy14am5v9n...,UC Chicago,Booth,2024-2025


In [51]:
UCBerkeley_ARE_jm_placements = pd.read_csv('data/UCBerkeley_ARE_jm_placements.csv')
print('UCBerkeley_ARE_jm_placements:')
display(UCBerkeley_ARE_jm_placements.head(3))

UCBerkeley_ECON_jm_candidates = pd.read_csv('data/UCBerkeley_ECON_jm_candidates.csv')
print('UCBerkeley_ECON_jm_candidates:')
display(UCBerkeley_ECON_jm_candidates.head(3))

UCBerkeley_Econ_jm_placements = pd.read_csv('data/UCBerkeley_Econ_jm_placements.csv')
print('UCBerkeley_Econ_jm_placements:')
display(UCBerkeley_Econ_jm_placements.head(3))

UCBerkeley_HASS_jm_placements = pd.read_csv('data/UCBerkeley_HASS_jm_placements.csv')
print('UCBerkeley_HASS_jm_placements:')
display(UCBerkeley_HASS_jm_placements.head(3))

UChicago_BS_jm_candidates = pd.read_csv('data/UChicago_BS_jm_candidates.csv')
print('UChicago_BS_jm_candidates:')
display(UChicago_BS_jm_candidates.head(3))

UChicago_Business_jm_candidates = pd.read_csv('data/UChicago_Business_jm_candidates.csv')
print('UChicago_Business_jm_candidates:')
display(UChicago_Business_jm_candidates.head(3))

UChicago_ECON_jm_candidates = pd.read_csv('data/UChicago_ECON_jm_candidates.csv')
print('UChicago_ECON_jm_candidates:')
display(UChicago_ECON_jm_candidates.head(3))

UCBerkeley_ARE_jm_placements:


Unnamed: 0.1,Unnamed: 0,Name,Position,Department,Entry Year,School,Year,Corrected Gender,Is Academic,Is Postdoc,Employment Type
0,0,"DanielAgness(DEV, LAB)",Assistant Professor,ARE,2024.0,UC Berkeley,2023-2024,Unknown,1,0,Academic
1,1,"Pierre Biscaye(DEV, ERE)",Chaire Professeur Junior,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Other
2,2,"Jesse Buchsbaum(ERE, IO)",Fellow,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Fellow


UCBerkeley_ECON_jm_candidates:


Unnamed: 0.1,Unnamed: 0,Name,Entry Year,Fields,Website,Email,School,Year,Department
0,0,"Baker, Sarah",Program Entry 2019,"Public Finance, Urban Economics, Applied Micro...",https://www.sarah-baker.com/,sarah.baker@berkeley.edu,UC Berkeley,2024-2025,Economics
1,1,"Bakhareva, Daria",Program Entry,"Behavioral and Experimental Economics, Politic...",https://sites.google.com/view/dbakhareva/home,dbakhareva@berkeley.edu,UC Berkeley,2024-2025,Economics
2,2,"Boctor, Valerie",Program Entry 2018,"Household Finance, Macroeconomics",https://valerieboctor.github.io/,valboctor@berkeley.edu,UC Berkeley,2024-2025,Economics


UCBerkeley_Econ_jm_placements:


Unnamed: 0.1,Unnamed: 0,Name,Degree,Primary Desired Research and Fields of Interest,Dissertation Title,Expected Date of Completion,References,School,Year,Department
0,0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024,Economics
1,1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024,Economics
2,2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024,Economics


UCBerkeley_HASS_jm_placements:


Unnamed: 0.1,Unnamed: 0,name,position,past_institution,entry_time,School,Department,Corrected Gender
0,0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024.0,UC Berkeley,HASS,Female
1,1,Andrew Choi,Postdoc,University College London,2024.0,UC Berkeley,HASS,Male
2,2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024.0,UC Berkeley,HASS,Male


UChicago_BS_jm_candidates:


Unnamed: 0.1,Unnamed: 0,Department,Name,Job Market Paper,Research Interests,Dissertation Committee,Website,Email,CV,School,Institution,entry_time
0,0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025
1,1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025
2,2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025


UChicago_Business_jm_candidates:


Unnamed: 0.1,Unnamed: 0,Department,Name,jm_paper,Research Interests,Dissertation Committee,Website,Email,CV,School,Institution,entry_time
0,0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025
1,1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025
2,2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025


UChicago_ECON_jm_candidates:


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Name,Primary Research Focus,Secondary Research Focus,References,Email,Website,Website Link,Job Market Paper,Job Market Paper Link,School,Year,Department
0,0,0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
1,1,1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics
2,2,2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics


In [52]:
UCBerkeley_ARE_jm_placements.columns = UCBerkeley_ARE_jm_placements.columns.str.lower()
UCBerkeley_ECON_jm_candidates.columns = UCBerkeley_ECON_jm_candidates.columns.str.lower()
UCBerkeley_Econ_jm_placements.columns = UCBerkeley_Econ_jm_placements.columns.str.lower()
UCBerkeley_HASS_jm_placements.columns = UCBerkeley_HASS_jm_placements.columns.str.lower()
UChicago_BS_jm_candidates.columns = UChicago_BS_jm_candidates.columns.str.lower()
UChicago_Business_jm_candidates.columns = UChicago_Business_jm_candidates.columns.str.lower()
UChicago_ECON_jm_candidates.columns = UChicago_ECON_jm_candidates.columns.str.lower()

UCBerkeley_ARE_jm_placements.to_csv('data/UCBerkeley_ARE_jm_placements.csv', index=False)
UCBerkeley_ECON_jm_candidates.to_csv('data/UCBerkeley_ECON_jm_candidates.csv', index=False)
UCBerkeley_Econ_jm_placements.to_csv('data/UCBerkeley_Econ_jm_placements.csv', index=False)
UCBerkeley_HASS_jm_placements.to_csv('data/UCBerkeley_HASS_jm_placements.csv', index=False)
UChicago_BS_jm_candidates.to_csv('data/UChicago_BS_jm_candidates.csv', index=False)
UChicago_Business_jm_candidates.to_csv('data/UChicago_Business_jm_candidates.csv', index=False)
UChicago_ECON_jm_candidates.to_csv('data/UChicago_ECON_jm_candidates.csv', index=False)

In [53]:
UCBerkeley_ARE_jm_placements = pd.read_csv('data/UCBerkeley_ARE_jm_placements.csv')
print('UCBerkeley_ARE_jm_placements:')
display(UCBerkeley_ARE_jm_placements.head(3))

UCBerkeley_ECON_jm_candidates = pd.read_csv('data/UCBerkeley_ECON_jm_candidates.csv')
print('UCBerkeley_ECON_jm_candidates:')
display(UCBerkeley_ECON_jm_candidates.head(3))

UCBerkeley_Econ_jm_placements = pd.read_csv('data/UCBerkeley_Econ_jm_placements.csv')
print('UCBerkeley_Econ_jm_placements:')
display(UCBerkeley_Econ_jm_placements.head(3))

UCBerkeley_HASS_jm_placements = pd.read_csv('data/UCBerkeley_HASS_jm_placements.csv')
print('UCBerkeley_HASS_jm_placements:')
display(UCBerkeley_HASS_jm_placements.head(3))

UChicago_BS_jm_candidates = pd.read_csv('data/UChicago_BS_jm_candidates.csv')
print('UChicago_BS_jm_candidates:')
display(UChicago_BS_jm_candidates.head(3))

UChicago_Business_jm_candidates = pd.read_csv('data/UChicago_Business_jm_candidates.csv')
print('UChicago_Business_jm_candidates:')
display(UChicago_Business_jm_candidates.head(3))

UChicago_ECON_jm_candidates = pd.read_csv('data/UChicago_ECON_jm_candidates.csv')
print('UChicago_ECON_jm_candidates:')
display(UChicago_ECON_jm_candidates.head(3))

UCBerkeley_ARE_jm_placements:


Unnamed: 0,unnamed: 0,name,position,department,entry year,school,year,corrected gender,is academic,is postdoc,employment type
0,0,"DanielAgness(DEV, LAB)",Assistant Professor,ARE,2024.0,UC Berkeley,2023-2024,Unknown,1,0,Academic
1,1,"Pierre Biscaye(DEV, ERE)",Chaire Professeur Junior,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Other
2,2,"Jesse Buchsbaum(ERE, IO)",Fellow,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Fellow


UCBerkeley_ECON_jm_candidates:


Unnamed: 0,unnamed: 0,name,entry year,fields,website,email,school,year,department
0,0,"Baker, Sarah",Program Entry 2019,"Public Finance, Urban Economics, Applied Micro...",https://www.sarah-baker.com/,sarah.baker@berkeley.edu,UC Berkeley,2024-2025,Economics
1,1,"Bakhareva, Daria",Program Entry,"Behavioral and Experimental Economics, Politic...",https://sites.google.com/view/dbakhareva/home,dbakhareva@berkeley.edu,UC Berkeley,2024-2025,Economics
2,2,"Boctor, Valerie",Program Entry 2018,"Household Finance, Macroeconomics",https://valerieboctor.github.io/,valboctor@berkeley.edu,UC Berkeley,2024-2025,Economics


UCBerkeley_Econ_jm_placements:


Unnamed: 0,unnamed: 0,name,degree,primary desired research and fields of interest,dissertation title,expected date of completion,references,school,year,department
0,0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024,Economics
1,1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024,Economics
2,2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024,Economics


UCBerkeley_HASS_jm_placements:


Unnamed: 0,unnamed: 0,name,position,past_institution,entry_time,school,department,corrected gender
0,0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024.0,UC Berkeley,HASS,Female
1,1,Andrew Choi,Postdoc,University College London,2024.0,UC Berkeley,HASS,Male
2,2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024.0,UC Berkeley,HASS,Male


UChicago_BS_jm_candidates:


Unnamed: 0,unnamed: 0,department,name,job market paper,research interests,dissertation committee,website,email,cv,school,institution,entry_time
0,0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025
1,1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025
2,2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025


UChicago_Business_jm_candidates:


Unnamed: 0,unnamed: 0,department,name,jm_paper,research interests,dissertation committee,website,email,cv,school,institution,entry_time
0,0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025
1,1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025
2,2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025


UChicago_ECON_jm_candidates:


Unnamed: 0,unnamed: 0.1,unnamed: 0,name,primary research focus,secondary research focus,references,email,website,website link,job market paper,job market paper link,school,year,department
0,0,0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
1,1,1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics
2,2,2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics


In [54]:
UCBerkeley_ARE_jm_placements = UCBerkeley_ARE_jm_placements.loc[:, ~UCBerkeley_ARE_jm_placements.columns.str.contains('^unnamed')]
UCBerkeley_ARE_jm_placements.to_csv('data/UCBerkeley_ARE_jm_placements.csv', index=False)

UCBerkeley_ECON_jm_candidates = UCBerkeley_ECON_jm_candidates.loc[:, ~UCBerkeley_ECON_jm_candidates.columns.str.contains('^unnamed')]
UCBerkeley_ECON_jm_candidates.to_csv('data/UCBerkeley_ECON_jm_candidates.csv', index=False)

UCBerkeley_Econ_jm_placements = UCBerkeley_Econ_jm_placements.loc[:, ~UCBerkeley_Econ_jm_placements.columns.str.contains('^unnamed')]
UCBerkeley_Econ_jm_placements.to_csv('data/UCBerkeley_Econ_jm_placements.csv', index=False)

UCBerkeley_HASS_jm_placements = UCBerkeley_HASS_jm_placements.loc[:, ~UCBerkeley_HASS_jm_placements.columns.str.contains('^unnamed')]
UCBerkeley_HASS_jm_placements.to_csv('data/UCBerkeley_HASS_jm_placements.csv', index=False)

UChicago_BS_jm_candidates = UChicago_BS_jm_candidates.loc[:, ~UChicago_BS_jm_candidates.columns.str.contains('^unnamed')]
UChicago_BS_jm_candidates.to_csv('data/UChicago_BS_jm_candidates.csv', index=False)

UChicago_Business_jm_candidates = UChicago_Business_jm_candidates.loc[:, ~UChicago_Business_jm_candidates.columns.str.contains('^unnamed')]
UChicago_Business_jm_candidates.to_csv('data/UChicago_Business_jm_candidates.csv', index=False)

UChicago_ECON_jm_candidates = UChicago_ECON_jm_candidates.loc[:, ~UChicago_ECON_jm_candidates.columns.str.contains('^unnamed')]
UChicago_ECON_jm_candidates.to_csv('data/UChicago_ECON_jm_candidates.csv', index=False)

### Gender Guesser
First let's load all the datasets


In [4]:
import pandas as pd
import gender_guesser.detector as gender

In [7]:
UCBerkeley_ARE_jm_placements = pd.read_csv('data/UCBerkeley_ARE_jm_placements.csv')
print('UCBerkeley_ARE_jm_placements:')
display(UCBerkeley_ARE_jm_placements.head(3))

UCBerkeley_Econ_jm_placements = pd.read_csv('data/UCBerkeley_Econ_jm_placements.csv')
print('UCBerkeley_Econ_jm_placements:')
display(UCBerkeley_Econ_jm_placements.head(3))

UCBerkeley_HASS_jm_placements = pd.read_csv('data/UCBerkeley_HASS_jm_placements.csv')
print('UCBerkeley_HASS_jm_placements:')
display(UCBerkeley_HASS_jm_placements.head(3))

UChicago_Business_jm_candidates = pd.read_csv('data/UChicago_Business_jm_candidates.csv')
print('UChicago_Business_jm_candidates:')
display(UChicago_Business_jm_candidates.head(3))

UChicago_ECON_jm_candidates = pd.read_csv('data/UChicago_ECON_jm_candidates.csv')
print('UChicago_ECON_jm_candidates:')
display(UChicago_ECON_jm_candidates.head(3))

UCBerkeley_ARE_jm_placements:


Unnamed: 0,name,position,department,entry year,school,year,corrected gender,is academic,is postdoc,employment type
0,Daniel Agness,Assistant Professor,ARE,2024.0,UC Berkeley,2023-2024,Unknown,1,0,Academic
1,Pierre Biscaye,Chaire Professeur Junior,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Other
2,Jesse Buchsbaum,Fellow,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Fellow


UCBerkeley_Econ_jm_placements:


Unnamed: 0,name,degree,primary desired research and fields of interest,dissertation title,expected date of completion,references,school,year,department
0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024,Economics
1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024,Economics
2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024,Economics


UCBerkeley_HASS_jm_placements:


Unnamed: 0,name,position,past_institution,entry_time,school,department,corrected gender,is_academic,is_postdoc,employment_type
0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024.0,UC Berkeley,HASS,Female,0,1,Postdoc
1,Andrew Choi,Postdoc,University College London,2024.0,UC Berkeley,HASS,Male,0,1,Postdoc
2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024.0,UC Berkeley,HASS,Male,1,0,Academic


UChicago_Business_jm_candidates:


Unnamed: 0,department,name,jm_paper,research interests,dissertation committee,website,email,cv,school,institution,entry_time
0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025
1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025
2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025


UChicago_ECON_jm_candidates:


Unnamed: 0,name,primary research focus,secondary research focus,references,email,website,website link,job market paper,job market paper link,school,year,department
0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics
1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics
2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics


In [11]:
UCBerkeley_ARE_jm_placements.dropna(inplace=True)


In [12]:

detector = gender.Detector()

def get_gender_from_name(name):
    first_name = name.split()[0]
    return detector.get_gender(first_name)

UCBerkeley_ARE_jm_placements['Gender'] = UCBerkeley_ARE_jm_placements['name'].apply(get_gender_from_name)
UCBerkeley_Econ_jm_placements['Gender'] = UCBerkeley_Econ_jm_placements['name'].apply(get_gender_from_name)
UCBerkeley_HASS_jm_placements['Gender'] = UCBerkeley_HASS_jm_placements['name'].apply(get_gender_from_name)
UChicago_Business_jm_candidates['Gender'] = UChicago_Business_jm_candidates['name'].apply(get_gender_from_name)
UChicago_ECON_jm_candidates['Gender'] = UChicago_ECON_jm_candidates['name'].apply(get_gender_from_name)

print('UCBerkeley_ARE_jm_placements with Gender:')
display(UCBerkeley_ARE_jm_placements.head(3))

print('UCBerkeley_Econ_jm_placements with Gender:')
display(UCBerkeley_Econ_jm_placements.head(3))

print('UCBerkeley_HASS_jm_placements with Gender:')
display(UCBerkeley_HASS_jm_placements.head(3))

print('UChicago_Business_jm_candidates with Gender:')
display(UChicago_Business_jm_candidates.head(3))

print('UChicago_ECON_jm_candidates with Gender:')
display(UChicago_ECON_jm_candidates.head(3))




UCBerkeley_ARE_jm_placements with Gender:


Unnamed: 0,name,position,department,entry year,school,year,corrected gender,is academic,is postdoc,employment type,Gender
0,Daniel Agness,Assistant Professor,ARE,2024.0,UC Berkeley,2023-2024,Unknown,1,0,Academic,male
1,Pierre Biscaye,Chaire Professeur Junior,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Other,male
2,Jesse Buchsbaum,Fellow,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Fellow,male


UCBerkeley_Econ_jm_placements with Gender:


Unnamed: 0,name,degree,primary desired research and fields of interest,dissertation title,expected date of completion,references,school,year,department,Gender
0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024,Economics,male
1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024,Economics,unknown
2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024,Economics,male


UCBerkeley_HASS_jm_placements with Gender:


Unnamed: 0,name,position,past_institution,entry_time,school,department,corrected gender,is_academic,is_postdoc,employment_type,Gender
0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024.0,UC Berkeley,HASS,Female,0,1,Postdoc,mostly_female
1,Andrew Choi,Postdoc,University College London,2024.0,UC Berkeley,HASS,Male,0,1,Postdoc,male
2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024.0,UC Berkeley,HASS,Male,1,0,Academic,unknown


UChicago_Business_jm_candidates with Gender:


Unnamed: 0,department,name,jm_paper,research interests,dissertation committee,website,email,cv,school,institution,entry_time,Gender
0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025,male
1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025,unknown
2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025,unknown


UChicago_ECON_jm_candidates with Gender:


Unnamed: 0,name,primary research focus,secondary research focus,references,email,website,website link,job market paper,job market paper link,school,year,department,Gender
0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics,male
1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics,unknown
2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics,male


In [14]:
def clean_gender_column(df):
    def clean_gender(gender):
        if gender.startswith('mostly_'):
            return gender.replace('mostly_', '')
        elif gender in ['male', 'female', 'unknown']:
            return gender
        else:
            return 'unknown'
    
    df['Gender'] = df['Gender'].apply(clean_gender)
    return df

UCBerkeley_ARE_jm_placements = clean_gender_column(UCBerkeley_ARE_jm_placements)
UCBerkeley_Econ_jm_placements = clean_gender_column(UCBerkeley_Econ_jm_placements)
UCBerkeley_HASS_jm_placements = clean_gender_column(UCBerkeley_HASS_jm_placements)
UChicago_Business_jm_candidates = clean_gender_column(UChicago_Business_jm_candidates)
UChicago_ECON_jm_candidates = clean_gender_column(UChicago_ECON_jm_candidates)

print('UCBerkeley_ARE_jm_placements with cleaned Gender:')
display(UCBerkeley_ARE_jm_placements.head(3))

print('UCBerkeley_Econ_jm_placements with cleaned Gender:')
display(UCBerkeley_Econ_jm_placements.head(3))

print('UCBerkeley_HASS_jm_placements with cleaned Gender:')
display(UCBerkeley_HASS_jm_placements.head(3))

print('UChicago_Business_jm_candidates with cleaned Gender:')
display(UChicago_Business_jm_candidates.head(3))

print('UChicago_ECON_jm_candidates with cleaned Gender:')
display(UChicago_ECON_jm_candidates.head(3))

print('Gender value counts for UCBerkeley_ARE_jm_placements:')
print(UCBerkeley_ARE_jm_placements['Gender'].value_counts())

print('Gender value counts for UCBerkeley_Econ_jm_placements:')
print(UCBerkeley_Econ_jm_placements['Gender'].value_counts())

print('Gender value counts for UCBerkeley_HASS_jm_placements:')
print(UCBerkeley_HASS_jm_placements['Gender'].value_counts())

print('Gender value counts for UChicago_Business_jm_candidates:')
print(UChicago_Business_jm_candidates['Gender'].value_counts())

print('Gender value counts for UChicago_ECON_jm_candidates:')
print(UChicago_ECON_jm_candidates['Gender'].value_counts())


UCBerkeley_ARE_jm_placements with cleaned Gender:


Unnamed: 0,name,position,department,entry year,school,year,corrected gender,is academic,is postdoc,employment type,Gender
0,Daniel Agness,Assistant Professor,ARE,2024.0,UC Berkeley,2023-2024,Unknown,1,0,Academic,male
1,Pierre Biscaye,Chaire Professeur Junior,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Other,male
2,Jesse Buchsbaum,Fellow,ARE,2024.0,UC Berkeley,2023-2024,Unknown,0,0,Fellow,male


UCBerkeley_Econ_jm_placements with cleaned Gender:


Unnamed: 0,name,degree,primary desired research and fields of interest,dissertation title,expected date of completion,references,school,year,department,Gender
0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024,Economics,male
1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024,Economics,unknown
2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024,Economics,male


UCBerkeley_HASS_jm_placements with cleaned Gender:


Unnamed: 0,name,position,past_institution,entry_time,school,department,corrected gender,is_academic,is_postdoc,employment_type,Gender
0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024.0,UC Berkeley,HASS,Female,0,1,Postdoc,female
1,Andrew Choi,Postdoc,University College London,2024.0,UC Berkeley,HASS,Male,0,1,Postdoc,male
2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024.0,UC Berkeley,HASS,Male,1,0,Academic,unknown


UChicago_Business_jm_candidates with cleaned Gender:


Unnamed: 0,department,name,jm_paper,research interests,dissertation committee,website,email,cv,school,institution,entry_time,Gender
0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025,male
1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025,unknown
2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025,unknown


UChicago_ECON_jm_candidates with cleaned Gender:


Unnamed: 0,name,primary research focus,secondary research focus,references,email,website,website link,job market paper,job market paper link,school,year,department,Gender
0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics,male
1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics,unknown
2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics,male


Gender value counts for UCBerkeley_ARE_jm_placements:
Gender
male       79
female     45
unknown    34
Name: count, dtype: int64
Gender value counts for UCBerkeley_Econ_jm_placements:
Gender
male       18
unknown     5
female      2
Name: count, dtype: int64
Gender value counts for UCBerkeley_HASS_jm_placements:
Gender
male       61
unknown    32
female     26
Name: count, dtype: int64
Gender value counts for UChicago_Business_jm_candidates:
Gender
unknown    8
male       7
female     4
Name: count, dtype: int64
Gender value counts for UChicago_ECON_jm_candidates:
Gender
male       10
unknown     7
female      1
Name: count, dtype: int64


In [15]:
UCBerkeley_ARE_jm_placements.to_csv('data/UCBerkeley_ARE_jm_placements.csv', index=False)

UCBerkeley_Econ_jm_placements.to_csv('data/UCBerkeley_Econ_jm_placements.csv', index=False)

UCBerkeley_HASS_jm_placements.to_csv('data/UCBerkeley_HASS_jm_placements.csv', index=False)

UChicago_Business_jm_candidates.to_csv('data/UChicago_Business_jm_candidates.csv', index=False)

UChicago_ECON_jm_candidates.to_csv('data/UChicago_ECON_jm_candidates.csv', index=False)

### Personal Website

Tried A more complete list, didn't work
Now try to get all personal websites.

In [3]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [6]:
UCBerkeley_ARE_jm_placements = pd.read_csv('data/UCBerkeley_ARE_jm_placements.csv')
print('UCBerkeley_ARE_jm_placements:')
display(UCBerkeley_ARE_jm_placements.head(3))

UCBerkeley_Econ_jm_placements = pd.read_csv('data/UCBerkeley_Econ_jm_placements.csv')
print('UCBerkeley_Econ_jm_placements:')
display(UCBerkeley_Econ_jm_placements.head(3))

UCBerkeley_HASS_jm_placements = pd.read_csv('data/UCBerkeley_HASS_jm_placements.csv')
print('UCBerkeley_HASS_jm_placements:')
display(UCBerkeley_HASS_jm_placements.head(3))

UChicago_Business_jm_candidates = pd.read_csv('data/UChicago_Business_jm_candidates.csv')
print('UChicago_Business_jm_candidates:')
display(UChicago_Business_jm_candidates.head(3))

UChicago_ECON_jm_candidates = pd.read_csv('data/UChicago_ECON_jm_candidates.csv')
print('UChicago_ECON_jm_candidates:')
display(UChicago_ECON_jm_candidates.head(3))

UCBerkeley_ARE_jm_placements:


Unnamed: 0,name,position,department,entry year,school,year,is academic,is postdoc,employment type,Gender
0,Daniel Agness,Assistant Professor,ARE,2024.0,UC Berkeley,2023-2024,1,0,Academic,male
1,Pierre Biscaye,Chaire Professeur Junior,ARE,2024.0,UC Berkeley,2023-2024,0,0,Other,male
2,Jesse Buchsbaum,Fellow,ARE,2024.0,UC Berkeley,2023-2024,0,0,Fellow,male


UCBerkeley_Econ_jm_placements:


Unnamed: 0,name,degree,primary desired research and fields of interest,dissertation title,expected date of completion,references,school,year,department,Gender
0,Felipe Arteaga,"B.S. (Industrial Engineering), Pontificial Uni...",Industrial Organization Labor Economics,Essays on Empirical Market Design,May 2024,"Ben Handel*, Christopher Walters, Christopher ...",UC Berkeley,2023-2024,Economics,male
1,Hadar Avivi,"B.A. (Economics), Tel Aviv University",Labor Economics Applied Econometrics,Essays in Intergenerational Mobility and Inequ...,May 2024,"Patrick Kline*, Christopher Walters, Hilary Ho...",UC Berkeley,2023-2024,Economics,unknown
2,Martin Caruso-Bloeck,"B.A. (Economics), Universidad Nacional de La P...",Macroeconomics International Economics,Essays in Macroeconomics,May 2024,"Yuriy Gorodnichenko*, David Romer, Jon Steinsson",UC Berkeley,2023-2024,Economics,male


UCBerkeley_HASS_jm_placements:


Unnamed: 0,name,position,past_institution,entry_time,school,department,is_academic,is_postdoc,employment_type,Gender
0,Sandy Campbell,Postdoc,"University of California, Los Angeles",2024.0,UC Berkeley,HASS,0,1,Postdoc,female
1,Andrew Choi,Postdoc,University College London,2024.0,UC Berkeley,HASS,0,1,Postdoc,male
2,Tianyu Han,Assistant Professor,Hong Kong University of Science and Technology,2024.0,UC Berkeley,HASS,1,0,Academic,Male


UChicago_Business_jm_candidates:


Unnamed: 0,department,name,jm_paper,research interests,dissertation committee,website,email,cv,school,institution,entry_time,Gender
0,Business,Jonas Piero Dalmazzo,"""The Effects of Alternative Data on Misreporting""","Disclosure, Capital Markets, Misreporting, Reg...","Phillip B. Berger (Chair), Christian Leuz, Del...",https://www.jonasdalmazzo.com/,jdalmazzo@chicagobooth.edu,https://uchicago.box.com/s/4hin6486up3kn4rpdf6...,UC Chicago,Booth,2024-2025,male
1,Business,Jizhou Liu,"""Inference for Two-stage Experiments under Cov...","Econometrics, Causal Inference, Design and Ana...","Christian Hansen (Co-chair), Azeem Shaikh (Co-...",https://ljz0.github.io/,jliu32@chicagobooth.edu,https://ljz0.github.io/CV.pdf,UC Chicago,Booth,2024-2025,unknown
2,Business,Boxin Zhao,"""Statistical Learning and Optimization under D...","Distribution Shift, Transfer Learning, Probabi...","Mladen Kolar (Co-Chair), Cong Ma (Co-Chair),Te...",https://voices.uchicago.edu/boxinzhao/,boxinz0@chicagobooth.edu,https://uchicago.box.com/s/qzsed2bqddaa6g57nl1...,UC Chicago,Booth,2024-2025,unknown


UChicago_ECON_jm_candidates:


Unnamed: 0,name,primary research focus,secondary research focus,references,email,website,website link,job market paper,job market paper link,school,year,department,Gender
0,Scott Behmer,Public Finance,"Political Economy, Energy and Environmental Ec...","Leonardo Bursztyn (Chair), Mikhail Golosov, Wi...",Email: behmer@uchicago.edu,Behmer Personal Website,https://sites.google.com/uchicago.edu/scott-be...,Sticks vs Carrots: Optimal Climate Policy with...,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics,male
1,Sulagna Dasgupta,"Microeconomic Theory, Mechanism and Informatio...",Experimental and Behavioral Economics,"Ben Brooks, Doron Ravid, Emir Kamenica, Phil Reny",sulagna@uchicago.edu,Dasgupta Personal Website,https://sites.google.com/view/sulagna,Screening Knowledge,https://drive.google.com/file/d/1sdNCkHavbxnyI...,UChicago,2023-2024,Economics,unknown
2,Santiago Franco,"Primary Research Focus: Macroeconomics, Spatia...","Secondary Research Focus: International Trade,...","References: Esteban Rossi-Hansberg (Chair), Uf...",sfranco@uchicago.edu,Franco Personal Website,https://www.santiago-franco.com/,Output Market Power and Spatial Misallocation,https://socialsciences.uchicago.edu/sites/defa...,UChicago,2023-2024,Economics,male


In [9]:
UChicago_Business_jm_candidates['cv'].tolist()


['https://uchicago.box.com/s/4hin6486up3kn4rpdf603s42g1m37tit',
 'https://ljz0.github.io/CV.pdf',
 'https://uchicago.box.com/s/qzsed2bqddaa6g57nl12rpks5wwhb43f',
 'https://uchicago.box.com/s/oc1sis2xmovdhtm9safok50jrxa5qmh7',
 'https://uchicago.box.com/s/v0njdu1veyy14am5v9nz6x04re748nmi',
 'https://uchicago.box.com/s/z2c6s83ly39thubovn98pjm2oyvxbnyn',
 'https://uchicago.box.com/s/g1npy8lklskxkljx514xhgjidznucvih',
 'https://rmmomin.github.io/cv/rayhan_momin_cv.pdf',
 'https://uchicago.box.com/s/pp04y04e4b70ke32ty7wb5qne0uhqbt2',
 'https://uchicago.box.com/s/qr78cn8ubw6nf64hxys1vbkgwypsaxa2',
 'https://uchicago.box.com/s/ev7zcd1sgeuhwsartwbzzzcru3fkt5ke',
 'https://uchicago.box.com/s/zkem81y3w6sa1o9u2i9j8elzxb210fs2',
 'https://uchicago.box.com/s/r3puyrhcfwck76c55t517hmtk2vpxx3m',
 'https://uchicago.box.com/s/n89k10pahixz872yz5nr0va6ok218d8m',
 'https://uchicago.box.com/s/xigmnyikkz3ri7x1s04g538b3w5wvcfi',
 'https://www.rafaelmbatista.com/cv',
 'https://uchicago.box.com/s/veaqhm14ulif2s

In [12]:
names = [name.replace(' ', '_') for name in UChicago_Business_jm_candidates['name'].tolist()]
cv_dict = dict(zip(names, UChicago_Business_jm_candidates['cv'].tolist()))
cv_dict

{'Jonas_Piero_Dalmazzo': 'https://uchicago.box.com/s/4hin6486up3kn4rpdf603s42g1m37tit',
 'Jizhou_Liu': 'https://ljz0.github.io/CV.pdf',
 'Boxin_Zhao': 'https://uchicago.box.com/s/qzsed2bqddaa6g57nl12rpks5wwhb43f',
 'Cong_Zhang': 'https://uchicago.box.com/s/oc1sis2xmovdhtm9safok50jrxa5qmh7',
 'Lucy_Msall': 'https://uchicago.box.com/s/v0njdu1veyy14am5v9nz6x04re748nmi',
 'Fern_Ramoutar': 'https://uchicago.box.com/s/z2c6s83ly39thubovn98pjm2oyvxbnyn',
 'Yixin_Sun': 'https://uchicago.box.com/s/g1npy8lklskxkljx514xhgjidznucvih',
 'Rayhan_Momin': 'https://rmmomin.github.io/cv/rayhan_momin_cv.pdf',
 'Manav_Chaudhary': 'https://uchicago.box.com/s/pp04y04e4b70ke32ty7wb5qne0uhqbt2',
 'Jingoo_Kwon': 'https://uchicago.box.com/s/qr78cn8ubw6nf64hxys1vbkgwypsaxa2',
 'Federico_Mainardi': 'https://uchicago.box.com/s/ev7zcd1sgeuhwsartwbzzzcru3fkt5ke',
 'Ben_Marrow': 'https://uchicago.box.com/s/zkem81y3w6sa1o9u2i9j8elzxb210fs2',
 'Ebru_Kasikaralar': 'https://uchicago.box.com/s/r3puyrhcfwck76c55t517hmtk2vpx

In [14]:
names = [name.replace(' ', '_') for name in UChicago_ECON_jm_candidates['name'].tolist()]
cv_dict = dict(zip(names, UChicago_ECON_jm_candidates['website link'].tolist()))
cv_dict

{'Scott_Behmer': 'https://sites.google.com/uchicago.edu/scott-behmer/',
 'Sulagna_Dasgupta': 'https://sites.google.com/view/sulagna',
 'Santiago_Franco': 'https://www.santiago-franco.com/',
 'Zhiyu_Fu': 'https://fuzhiyu.me',
 'Michael_Galperin': 'https://michaelgalperin.github.io',
 'Esperanza_Johnson': 'https://www.esperanzajohnson.com/',
 'Toshiaki_Komatsu': '#main-content',
 'Nadav_Kunievsky': 'https://nkunievsky.github.io/',
 'Ivan_Chit_Pan_Kwok': '#main-content',
 'Marco_Loseto': 'https://mloseto.github.io/',
 'Sangmin_(Simon)_Oh': 'https://sangmino.github.io/',
 'Aleksei_Oskolkov': 'https://alekseioskolkov.com/',
 'Estéfano_Rubio': 'https://www.estefanorubio.com',
 'Sidharth_Sah': 'https://sidharthsah.github.io/',
 'Harshil_Sahai': '#main-content',
 'Marcos_Sora': 'https://www.marcossora.com',
 'Michael_Varley': 'https://www.michaelvarley.com/',
 'Jingtao_Zheng': '#main-content'}

Now let's try to get the undergraduate institution

In [19]:
UCBerkeley_ARE_jm_placements.rename(columns={'is academic': 'academic'}, inplace=True)
UCBerkeley_ARE_jm_placements.rename(columns={'is postdoc': 'postdoc'}, inplace=True)

UCBerkeley_HASS_jm_placements.rename(columns={'is_academic': 'academic'}, inplace=True)
UCBerkeley_HASS_jm_placements.rename(columns={'is_postdoc': 'postdoc'}, inplace=True)


Index(['name', 'position', 'past_institution', 'entry_time', 'school',
       'department', 'academic', 'postdoc', 'employment_type', 'Gender'],
      dtype='object')

In [22]:
UCBerkeley_ARE_jm_placements.to_csv('data/UCBerkeley_ARE_jm_placements.csv')
UCBerkeley_HASS_jm_placements.to_csv('data/UCBerkeley_HASS_jm_placements.csv')

In [16]:
import os
from PyPDF2 import PdfReader

institution = "UCHICAGO_Buisness"
directory = "data/raw/CVs" + "/" + institution

for filename in os.listdir(directory):
    if filename.endswith(".pdf"):
        filepath = os.path.join(directory, filename)
        
        with open(filepath, "rb") as file:
            pdf = PdfReader(file)
            text = ""
            for page in pdf.pages:
                text += page.extract_text()
            
            print(f"Content of {filename}:")
            print(text)

Content of Yixin_Sun.pdf:
Yixin Sun
ysun9@chicagobooth.edu +14016622015
EDUCATION Ph.D, Economics
Research Fields: Environmental and Development Economics
The University of Chicago Booth School of Business, 2019 to present
Bachelor of Arts, Economics-Statistics
Columbia University, New York, NY, May 2016
WORKS IN
PROGRESSLearning is the in Air, Clearn Air as an Experience Good
(with Budy Resosudarmo and Jeanne Sorin)
Accepted via pre-results review, Journal of Development Economics , eventual publi-
cation venue TBD; AEA RCT Registry No. 0013110
Impact of Agricultural Fires in Indonesia on Childhood Development
(with Budy Resosudarmo)
Worker Incentives and Scalable Waste-Management Solutions: Evidence from Delhi
(with Ashton Pallottini, Jun Wong, and Emma Zhang)
GRANTS AND
FELLOWSHIPS2023-2024 JPAL King Climate Action Initiative Pilot Grant (with Budy Resosudarmo
and Jeanne Sorin)
2023, EPIC-India Development Grant (with Ashton Pallottini, Jun Wong, and Emma
Zhang)
2022-2023, BFI Devel