In [51]:
import json
import pandas as pd
from pathlib import Path

In [52]:
# Defining paths
GROUP_DATA_DIR = Path("/Users/harshul/projects/kgwebsite/group-data")

MEMBERS_DIR_PATH = GROUP_DATA_DIR / 'members/'
WEBSITE_DATA_PATH = GROUP_DATA_DIR / 'website_data/'
CONTENT_DIR_PATH = WEBSITE_DATA_PATH / 'content'


In [53]:

def parse_education_experience_df(member_dir):
    member_json_dir = member_dir / 'jsons'
    education_experience_list = []
    
    for json_file in ['experiences.json', 'education.json']:
        json_path = member_json_dir / json_file
        if json_path.exists():
            with open(json_path, 'r') as f:
                education_experience_list += json.load(f)
    
    return pd.DataFrame(education_experience_list)


In [None]:

def extract_member_academic_role(df):
    role = df.iloc[0].get('role', None)
    degree = df.iloc[0].get('degree', None)

    if role in ['Assistant Professor', 'Professor']:
        return 'Professor'
    elif role in ['Visiting Researcher', 'Postdoctoral Researcher']:
        return 'Postdoctoral Researcher'
    elif degree in ['Masters', 'PhD']:
        return 'Graduate Student'
    elif degree == 'Bachelors':
        return 'Graduate Student' if df.iloc[0].get('degree') is None else 'Undergraduate Student'
    else:
        return ""



In [None]:

current_people_page_list = []
alumni_people_page_list = []

for member_dir in MEMBERS_DIR_PATH.glob('*'):
    member_info_fname = member_dir / 'info.json'
    if not member_info_fname.exists():
        continue

    with open(member_info_fname, 'r') as f:
        member_info = json.load(f)

    education_experience_df = parse_education_experience_df(member_dir)

    # Ensure 'end_date' and 'group' columns are present in the dataframe
    if 'end_date' not in education_experience_df.columns:
        education_experience_df['end_date'] = None
    if 'group' not in education_experience_df.columns:
        education_experience_df['group'] = ''

    current_members = education_experience_df[
        education_experience_df['group'].isin(['Kerzendorf Group', 'DTI', 'ICER']) & 
        education_experience_df['end_date'].isna()
    ]
    alumni_members = education_experience_df[
        education_experience_df['group'].isin(['Kerzendorf Group', 'DTI', 'ICER']) & 
        ~education_experience_df['end_date'].isna()
    ]

    member_id = member_info['id']
    first_name = member_info['first_name']
    last_name = member_info['last_name']
    nickname = member_info.get('nick_name', None)
    name = f'{nickname if nickname else first_name} {last_name}'

    if not current_members.empty:
        current_academic_role = extract_member_academic_role(current_members)
        current_people_page_list.append({"Name": name, "academic_role": current_academic_role, "ID": member_id})
    if not alumni_members.empty:
        alumni_academic_role = extract_member_academic_role(alumni_members)
        alumni_people_page_list.append({"Name": name, "academic_role": alumni_academic_role, "ID": member_id})


In [54]:
current_people_page_list


[{'Name': 'Josh Shields', 'academic_role': '', 'ID': 'josh_shields'},
 {'Name': 'Anirban  Dutta',
  'academic_role': 'Postdoctoral Researcher',
  'ID': 'anirban_dutta'},
 {'Name': 'Erin Visser', 'academic_role': '', 'ID': 'erin_visser'},
 {'Name': 'Vicente  Amado Olivo', 'academic_role': '', 'ID': 'vicente_amado'},
 {'Name': 'Yuki Matsumura', 'academic_role': '', 'ID': 'yuki_matsumura'},
 {'Name': 'Andrew Fullard', 'academic_role': '', 'ID': 'andrew_fullard'},
 {'Name': 'Hayden Monk', 'academic_role': '', 'ID': 'hayden_monk'},
 {'Name': 'Atharva Arya', 'academic_role': '', 'ID': 'atharva_arya'},
 {'Name': 'Sona Chitchyan', 'academic_role': '', 'ID': 'sona_chitchyan'},
 {'Name': 'Iliomar Rodriguez-Ramos',
  'academic_role': '',
  'ID': 'iliomar_rodriguez_ramos'},
 {'Name': 'Jaladh Singhal', 'academic_role': '', 'ID': 'jaladh_singhal'},
 {'Name': "Jack O'Brien", 'academic_role': '', 'ID': 'jack_o_brien'},
 {'Name': 'Cecelia Powers', 'academic_role': '', 'ID': 'cecelia_powers'},
 {'Name':

In [55]:
alumni_people_page_list


[{'Name': 'Josh Shields', 'academic_role': '', 'ID': 'josh_shields'},
 {'Name': 'Isaac Smith', 'academic_role': '', 'ID': 'isaac_smith'},
 {'Name': 'Tripp Dow', 'academic_role': '', 'ID': 'richard_dow'},
 {'Name': 'Kevin Cawley', 'academic_role': '', 'ID': 'kevin_cawley'},
 {'Name': 'Sofia Biriouk', 'academic_role': '', 'ID': 'sofia_biriouk'}]