In [27]:
import json
import pandas as pd
from pathlib import Path

In [28]:
# Defining paths
GROUP_DATA_DIR = Path("/Users/harshul/projects/kgwebsite/group-data")

MEMBERS_DIR_PATH = GROUP_DATA_DIR / 'members/'
WEBSITE_DATA_PATH = GROUP_DATA_DIR / 'website_data/'
CONTENT_DIR_PATH = WEBSITE_DATA_PATH / 'content'


In [29]:
def parse_education_experience_df(member_dir):
    member_json_dir = member_dir / 'jsons'
    education_experience_list = []
    
    for json_file in ['experiences.json', 'education.json']:
        json_path = member_json_dir / json_file
        if json_path.exists():
            with open(json_path, 'r') as f:
                education_experience_list += json.load(f)
    
    return pd.DataFrame(education_experience_list)


In [30]:
def extract_member_academic_role(people_data_frame):
    role = people_data_frame.iloc[0].get('role', None)
    degree = people_data_frame.iloc[0].get('degree', None)
    
    if degree == 'Bachelors':
        if 'end_date' in people_data_frame.iloc[0]:
            return 'Graduate Student'
        else:
            return 'Undergraduate Student'
    elif role in ['Assistant Professor', 'Professor']:
        return 'Professor'
    elif role in ['Visiting Researcher', 'Postdoctoral Researcher']:
        return 'Postdoctoral Researcher'
    elif degree in ['Masters', 'PhD']:
        return 'Graduate Student'
    else:
        return ""


In [31]:
current_people_page_list = []
alumni_people_page_list = []

for member_dir in MEMBERS_DIR_PATH.glob('*'):
    member_info_fname = member_dir / 'info.json'
    if not member_info_fname.exists():
        continue

    with open(member_info_fname, 'r') as f:
        member_info = json.load(f)

    education_experience_df = parse_education_experience_df(member_dir)

    # Ensure 'end_date' and 'group' columns are present in the dataframe
    if 'end_date' not in education_experience_df.columns:
        education_experience_df['end_date'] = None
    if 'group' not in education_experience_df.columns:
        education_experience_df['group'] = ''
    if 'institution' not in education_experience_df.columns:
        education_experience_df['institution'] = ''

    current_members = education_experience_df[
        (education_experience_df['group'].isin(['kerzendorf', 'DTI', 'ICER']) | education_experience_df['institution'] == 'Michigan State University') & 
        education_experience_df['end_date'].isna() 
    ]
    alumni_members = education_experience_df[
        (education_experience_df['group'].isin(['kerzendorf', 'DTI', 'ICER']) | education_experience_df['institution'] == 'Michigan State University') & 
        ~education_experience_df['end_date'].isna()
    ]

    member_id = member_info['id']
    first_name = member_info['first_name']
    last_name = member_info['last_name']
    nickname = member_info.get('nick_name', None)
    name = f'{nickname if nickname else first_name} {last_name}'

    if not current_members.empty:
        current_academic_role = extract_member_academic_role(current_members)
        current_people_page_list.append({"Name": name, "academic_role": current_academic_role, "ID": member_id})
    if not alumni_members.empty:
        alumni_academic_role = extract_member_academic_role(alumni_members)
        alumni_people_page_list.append({"Name": name, "academic_role": alumni_academic_role, "ID": member_id})

In [32]:
current_people_page_list


[]

In [33]:
alumni_people_page_list


[]