In [31]:

import json
import pandas as pd
from pathlib import Path


In [32]:
# Defining paths
group_data = Path("/Users/harshul/website clone/harshul/test/group-data/")

MEMBERS_DIR_PATH = group_data / 'members/'
WEBSITE_DATA_PATH = group_data / 'website_data/'
CONTENT_DIR_PATH = WEBSITE_DATA_PATH / 'content'


In [34]:
def generate_education_experience_df(top_experience_role, top_education_degree, top_education_end_date):
    data = {
        'type': ['experience', 'education'],
        'role': [top_experience_role, None],
        'degree': [None, top_education_degree], 
        'end_date': [None, top_education_end_date]
    }
    return pd.DataFrame(data)

def determine_academic_role(df):
    for _, row in df.iterrows():
        if row['type'] == 'experience':
            if 'Assistant Professor' in row['role'] or 'Professor' in row['role']:
                return 'Professor'
            elif row['role'] in ['Visiting Researcher', 'Postdoctoral Researcher']:
                return 'Postdoctoral researcher'
        elif row['type'] == 'education':
            if row['degree'] == 'Bachelors':
                return 'Graduate Student' if row['end_date'] is not None else 'Undergraduate Student'
            elif row['degree'] in ['Masters', 'PhD']:
                return 'Graduate Student'

    return 'Other'

def extract_member_data(member_dir):
    if member_dir.name.startswith('.') or not (member_dir / 'info.json').exists():
        return None, None, None, None

    with open(member_dir / 'info.json', 'r') as f:
        member_record = json.load(f)
    member_json_dir = member_dir / 'jsons'

    top_experience_role = ""
    top_education_degree = ""
    top_education_end_date = None
  
    if (member_experiences := (member_json_dir / 'experiences.json')).exists():
        with open(member_experiences, 'r') as f:
            experiences = json.load(f)
            if experiences:
                top_experience_role = experiences[0].get('role', "")
    
    if (member_education := (member_json_dir / 'education.json')).exists():
        with open(member_education, 'r') as f:
            education = json.load(f)
            if education:
                top_education_degree = education[0].get('degree', "")
                top_education_end_date = education[0].get('end_date')

    return member_record, top_experience_role, top_education_degree, top_education_end_date

member_records = []

for member_dir in MEMBERS_DIR_PATH.glob('*'):
    member_record, top_experience_role, top_education_degree, top_education_end_date = extract_member_data(member_dir)
    
    if not member_record:
        continue
    
    create_edu_ex_df = generate_education_experience_df(top_experience_role, top_education_degree, top_education_end_date)
    academic_role = determine_academic_role(create_edu_ex_df)

    nick_name = member_record.get('nick_name', None)
    second_name = member_record.get('last_name', "")
    if not nick_name:
        first_name = member_record.get('first_name', "")
        last_name = member_record.get('last_name', "")
        name = f"{first_name} {last_name}"
    else:
        name = f"{nick_name} {second_name}"

    member_records.append({"Name": name, "Academic Role": academic_role})

education_experience_df = pd.DataFrame(member_records)
print(education_experience_df)

                       Name            Academic Role
0              Josh Shields         Graduate Student
1            Anirban  Dutta  Postdoctoral researcher
2               Erin Visser    Undergraduate Student
3      Vicente  Amado Olivo         Graduate Student
4            Yuki Matsumura         Graduate Student
5            Andrew Fullard         Graduate Student
6               Isaac Smith         Graduate Student
7               Hayden Monk    Undergraduate Student
8              Atharva Arya         Graduate Student
9                 Tripp Dow    Undergraduate Student
10                   Bea Lu    Undergraduate Student
11           Sona Chitchyan                    Other
12  Iliomar Rodriguez-Ramos    Undergraduate Student
13           Jaladh Singhal         Graduate Student
14             Jack O'Brien         Graduate Student
15          Template Person                    Other
16           Cecelia Powers    Undergraduate Student
17             Kevin Cawley         Graduate S