In [30]:
import json
import pandas as pd
from pathlib import Path
from datetime import datetime


In [31]:
# Defining paths
GROUP_DATA_DIR = Path("/Users/harshul/projects/kgwebsite/group-data")

MEMBERS_DIR_PATH = GROUP_DATA_DIR / 'members/'
WEBSITE_DATA_PATH = GROUP_DATA_DIR / 'website_data/'
CONTENT_DIR_PATH = WEBSITE_DATA_PATH / 'content'


In [32]:
def parse_education_experience_df(member_dir):
    member_json_dir = member_dir / 'jsons'
    education_experience_list = []

    if (member_experiences_dir := (member_json_dir / 'experiences.json')).exists():
        with open(member_experiences_dir, 'r') as f:
            education_experience_list += json.load(f)
    else:
        print(member_experiences_dir)
    
    if (member_education_dir := (member_json_dir / 'education.json')).exists():
        with open(member_education_dir, 'r') as f:
            education_experience_list += json.load(f)
    
    return pd.DataFrame(education_experience_list)

In [33]:
for member_dir in MEMBERS_DIR_PATH.glob('*'):
    if not (member_info_fname := member_dir / 'info.json').exists():
      continue
    else:
        member_info = json.load(open(member_info_fname, 'r'))
    print(member_dir, type(member_dir))

    education_experience_df = parse_education_experience_df(member_dir)



/Users/harshul/projects/kgwebsite/group-data/members/josh_shields <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/anirban_dutta <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/erin_visser <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/vicente_amado <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/vicente_amado/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/yuki_matsumura <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/yuki_matsumura/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/andrew_fullard <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/isaac_smith <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/hayden_monk <class 'pathlib.PosixPath'>
/Users/harshul/projects/kgwebsite/group-data/members/hayden_monk

In [34]:
member_dir = Path("/Users/harshul/projects/kgwebsite/group-data/members/isaac_smith")
parse_education_experience_df(member_dir)

Unnamed: 0,organisation,city,state,country,role,start_date,end_date,degree,subject,institution
0,Michigan State University,East Lansing,Michigan,USA,Professorial Assistant,2020-01-01,2023-12-31,,,
1,,East Lansing,Michigan,USA,,2020-01-01,2024-12-31,Bachelors,Physics and Mathematics,Michigan State University


In [35]:
def extract_member_academic_role(education_experience_df):
    current_academic_role = ""
    is_current_member = False
    
    if 'end_date' not in education_experience_df.columns:
        education_experience_df['end_date'] = None
    if 'group' not in education_experience_df.columns:
        education_experience_df['group'] = None
    if 'institution' not in education_experience_df.columns:
        education_experience_df['institution'] = None    
    # Filter based on 'institution' or 'group'
    condition_institution = (education_experience_df['institution'] == 'Michigan State University')
    condition_group = education_experience_df['group'].isin(['kerzendorf', 'DTI', 'ICER'])
    
    filtered_e_e_df = education_experience_df[condition_institution | condition_group]
    
    if len(filtered_e_e_df) == 0: 
       pass
    
    elif len(filtered_e_e_df) >= 1:
        role = filtered_e_e_df.iloc[0].get('role', None)  
        degree = filtered_e_e_df.iloc[0].get('degree', None)  
        
        if role in ['Assistant Professor', 'Professor']:
            current_academic_role = 'Professor'
        if role == "Professorial Assistant":
            current_academic_role = 'Professorial Assistant'
        elif role in ['Visiting Researcher', 'Postdoctoral Researcher']:
            current_academic_role = 'Postdoctoral Researcher' 
        elif degree in ['Masters', 'PhD']:
            current_academic_role = 'Graduate Student' 
        elif degree == 'Bachelors':
            current_academic_role = 'Graduate Student' if filtered_e_e_df.iloc[0].get('end_date') is None else 'Undergraduate Student'
        
    has_end_date = any(not pd.isna(date) for date in filtered_e_e_df['end_date'])
    is_current_member = not has_end_date

    return current_academic_role, is_current_member

In [36]:

# for creating two lists

current_people_page_list = []
alumni_people_page_list = []


for member_dir in MEMBERS_DIR_PATH.glob('*'):
    print(member_dir)
    if not (member_info_fname := member_dir / 'info.json').exists():
        continue
    else:
        member_info = json.load(open(member_info_fname, 'r'))
    education_experience_df = parse_education_experience_df(member_dir)
    current_academic_role, is_current_member = extract_member_academic_role(education_experience_df)

    
    first_name = member_info['first_name']
    print(first_name)
    last_name = member_info['last_name']
    nickname = member_info.get('nick_name', None)
    id = member_info['id']
    image_path = member_info['image_path']  
    cover_image_path = member_info['cover_image_path']   

    name = f'{nickname if nickname else first_name} {last_name}'

    if is_current_member:
        current_people_page_list.append({"Name": name, "academic_role": current_academic_role, "ID": id})
    else:
        alumni_people_page_list.append({"Name": name, "academic_role": current_academic_role, "ID": id})




/Users/harshul/projects/kgwebsite/group-data/members/josh_shields
Joshua
/Users/harshul/projects/kgwebsite/group-data/members/anirban_dutta
Anirban 
/Users/harshul/projects/kgwebsite/group-data/members/erin_visser
Erin
/Users/harshul/projects/kgwebsite/group-data/members/.DS_Store
/Users/harshul/projects/kgwebsite/group-data/members/vicente_amado
/Users/harshul/projects/kgwebsite/group-data/members/vicente_amado/jsons/experiences.json
Vicente
/Users/harshul/projects/kgwebsite/group-data/members/yuki_matsumura
/Users/harshul/projects/kgwebsite/group-data/members/yuki_matsumura/jsons/experiences.json
Yuki
/Users/harshul/projects/kgwebsite/group-data/members/andrew_fullard
Andrew
/Users/harshul/projects/kgwebsite/group-data/members/isaac_smith
Isaac
/Users/harshul/projects/kgwebsite/group-data/members/hayden_monk
/Users/harshul/projects/kgwebsite/group-data/members/hayden_monk/jsons/experiences.json
Hayden
/Users/harshul/projects/kgwebsite/group-data/members/atharva_arya
Atharva
/Users/ha

In [37]:
current_people_page_list

[{'Name': 'Josh Shields',
  'academic_role': 'Graduate Student',
  'ID': 'josh_shields'},
 {'Name': 'Anirban  Dutta',
  'academic_role': 'Postdoctoral Researcher',
  'ID': 'anirban_dutta'},
 {'Name': 'Erin Visser', 'academic_role': '', 'ID': 'erin_visser'},
 {'Name': 'Yuki Matsumura',
  'academic_role': 'Graduate Student',
  'ID': 'yuki_matsumura'},
 {'Name': 'Andrew Fullard', 'academic_role': '', 'ID': 'andrew_fullard'},
 {'Name': 'Hayden Monk',
  'academic_role': 'Graduate Student',
  'ID': 'hayden_monk'},
 {'Name': 'Atharva Arya', 'academic_role': '', 'ID': 'atharva_arya'},
 {'Name': 'Tripp Dow', 'academic_role': '', 'ID': 'richard_dow'},
 {'Name': 'Sona Chitchyan', 'academic_role': '', 'ID': 'sona_chitchyan'},
 {'Name': 'Iliomar Rodriguez-Ramos',
  'academic_role': '',
  'ID': 'iliomar_rodriguez_ramos'},
 {'Name': 'Jaladh Singhal', 'academic_role': '', 'ID': 'jaladh_singhal'},
 {'Name': "Jack O'Brien",
  'academic_role': 'Graduate Student',
  'ID': 'jack_o_brien'},
 {'Name': 'Templ

In [38]:
alumni_people_page_list

[{'Name': 'Vicente  Amado Olivo',
  'academic_role': 'Graduate Student',
  'ID': 'vicente_amado'},
 {'Name': 'Isaac Smith',
  'academic_role': 'Undergraduate Student',
  'ID': 'isaac_smith'},
 {'Name': 'Bea Lu', 'academic_role': 'Professorial Assistant', 'ID': 'bea_lu'},
 {'Name': 'Kevin Cawley',
  'academic_role': 'Professorial Assistant',
  'ID': 'kevin_cawley'},
 {'Name': 'Alexander Grunewald',
  'academic_role': 'Undergraduate Student',
  'ID': 'alexander_grunewald'}]

In [39]:
member_dir

PosixPath('/Users/harshul/projects/kgwebsite/group-data/members/wolfgang_kerzendorf')

In [40]:
filtered_e_e_df = filtered_e_e_df[filtered_e_e_df['institution'] == 'Michigan State University']

NameError: name 'filtered_e_e_df' is not defined

In [None]:
filtered_e_e_df