### This notebook consist of code for creating the html files for the website each time data is updated.

##### Set-up

In [317]:
#Importing classes from the Jinja2 library to load and render templates.
import json
import os
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from jinja2.exceptions import UndefinedError
from pathlib import Path

In [318]:
# Defining paths
GROUP_DATA_DIR = Path("/Users/harshul/projects/kgwebsite/group-data")

MEMBERS_DIR_PATH = GROUP_DATA_DIR / 'members/'
WEBSITE_DATA_PATH = GROUP_DATA_DIR / 'website_data/'
CONTENT_DIR_PATH = WEBSITE_DATA_PATH / 'content'
TEMPLATE_DIR_PATH = GROUP_DATA_DIR.parent / 'groupwebsite_generator' / 'templates'
HOSTING_PATH = GROUP_DATA_DIR.parent / 'kerzendorf-group.github.io' 


In [319]:
#Function for creating proper html file names
def page_link(a):
    if ' ' in a:
        return a.replace(' ', '_')
    else:
        return a

In [320]:
# creating dataframe for education and experiences
def parse_education_experience_df(member_dir):
    member_json_dir = member_dir / 'jsons'
    education_experience_list = []

    if (member_experiences_dir := (member_json_dir / 'experiences.json')).exists():
        with open(member_experiences_dir, 'r') as f:
            education_experience_list += json.load(f)
    else:
        print(member_experiences_dir)
    
    if (member_education_dir := (member_json_dir / 'education.json')).exists():
        with open(member_education_dir, 'r') as f:
            education_experience_list += json.load(f)
    
    return pd.DataFrame(education_experience_list)


In [321]:
# To see if member is current member and their roles
member_records = []

def extract_member_academic_role(education_experience_df):
    current_academic_role = ""  
    is_current_member = False
    if 'end_date' not in education_experience_df.columns:
        education_experience_df['end_date'] = None
    filtered_e_e_df = education_experience_df[education_experience_df.end_date.isna()]
    if 'institution' in filtered_e_e_df.columns:
        filtered_e_e_df = filtered_e_e_df[filtered_e_e_df['institution'] == 'Michigan State University']
    else:

        print("No 'institution' column found in dataframe.")
        filtered_e_e_df = pd.DataFrame()

    
    

    if len(filtered_e_e_df) == 0: 
       pass
        
    
    elif len(filtered_e_e_df) == 1:
        role = filtered_e_e_df.iloc[0].get('role', None)  
        degree = filtered_e_e_df.iloc[0].get('degree', None)  
        if role in ['Assistant Professor', 'Professor']:
            current_academic_role = 'Professor'
        elif role in ['Visiting Researcher', 'Postdoctoral Researcher']:
                current_academic_role = 'Postdoctoral Researcher'  
        elif degree in ['Masters', 'PhD']:
                current_academic_role = 'Graduate Student'     
        elif degree == 'Bachelors':

                current_academic_role = 'Graduate Student' if filtered_e_e_df.iloc[0].get('degree')   is None else 'Undergraduate Student'
        is_current_member = True
         

    else:
        raise ValueError('Not sure what happened multiple positions at MSU')
    
    return current_academic_role, is_current_member



In [322]:
# for creating two lists

current_people_page_list = []
alumni_people_page_list = []


for member_dir in MEMBERS_DIR_PATH.glob('*'):
    if not (member_info_fname := member_dir / 'info.json').exists():
        continue
    else:
        member_info = json.load(open(member_info_fname, 'r'))
    education_experience_df = parse_education_experience_df(member_dir)
    current_academic_role, is_current_member = extract_member_academic_role(education_experience_df)

    
    first_name = member_info['first_name']
    last_name = member_info['last_name']
    nickname = member_info.get('nick_name', None)

    name = f'{nickname if nickname else first_name} {last_name}'

    if is_current_member:
        current_people_page_list.append({"Name": name, "academic_role": current_academic_role})
    else:
        alumni_people_page_list.append({"Name": name, "academic_role": current_academic_role})




/Users/harshul/projects/kgwebsite/group-data/members/erin_visser/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/vicente_amado/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/yuki_matsumura/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/isaac_smith/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/hayden_monk/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/atharva_arya/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/bea_lu/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/sona_chitchyan/jsons/experiences.json
No 'institution' column found in dataframe.
/Users/harshul/projects/kgwebsite/group-data/members/iliomar_rodriguez_ramos/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/jaladh_singhal/jsons/experiences.json
/Users/harshul/projects/kgwebsite/group-data/members/jack

In [323]:
current_people_page_list

[{'Name': 'Josh Shields', 'academic_role': 'Graduate Student'},
 {'Name': 'Anirban  Dutta', 'academic_role': 'Postdoctoral Researcher'},
 {'Name': 'Erin Visser', 'academic_role': 'Undergraduate Student'},
 {'Name': 'Vicente  Amado Olivo', 'academic_role': 'Graduate Student'},
 {'Name': 'Yuki Matsumura', 'academic_role': 'Graduate Student'},
 {'Name': 'Andrew Fullard', 'academic_role': ''},
 {'Name': 'Hayden Monk', 'academic_role': 'Undergraduate Student'},
 {'Name': 'Bea Lu', 'academic_role': 'Undergraduate Student'},
 {'Name': "Jack O'Brien", 'academic_role': 'Graduate Student'},
 {'Name': 'Cecelia Powers', 'academic_role': 'Undergraduate Student'},
 {'Name': 'Sofia Biriouk', 'academic_role': 'Undergraduate Student'},
 {'Name': 'Jing Lu', 'academic_role': 'Postdoctoral Researcher'},
 {'Name': 'Wolfgang Kerzendorf', 'academic_role': 'Professor'}]

In [324]:
alumni_people_page_list

[{'Name': 'Isaac Smith', 'academic_role': ''},
 {'Name': 'Atharva Arya', 'academic_role': ''},
 {'Name': 'Tripp Dow', 'academic_role': ''},
 {'Name': 'Sona Chitchyan', 'academic_role': ''},
 {'Name': 'Iliomar Rodriguez-Ramos', 'academic_role': ''},
 {'Name': 'Jaladh Singhal', 'academic_role': ''},
 {'Name': 'Template Person', 'academic_role': ''},
 {'Name': 'Kevin Cawley', 'academic_role': ''},
 {'Name': 'Alexander Grunewald', 'academic_role': ''},
 {'Name': 'Harshul Gupta', 'academic_role': ''}]

In [325]:
#Creating an instance of the Environment class that looks for templates. Page_link is set to the global variable so that it can be accessed by all templates
environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH), extensions=['jinja2.ext.loopcontrols'])
environment.globals['page_link'] = page_link


In [326]:
content_id_data = {"article_id": [], "category": [], "date": [], "tags": []}


for json_file in os.listdir(CONTENT_DIR_PATH):
    if json_file.endswith('.json'):
        json_path = os.path.join(CONTENT_DIR_PATH, json_file)
        with open(json_path, 'r') as file:
            info = json.load(file)
            if info.get('display'):
                content_id_data['article_id'].append(info.get('article_id'))
                content_id_data['category'].append(info.get('category'))
                content_id_data['date'].append(info.get('article_date'))
                content_id_data['tags'].append(info.get('tags'))

content_df = pd.DataFrame(content_id_data)
content_df['date'] = pd.to_datetime(content_df['date'], format='%m-%d-%Y')
content_df = content_df.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)

In [327]:
research_content_unsorted = content_df[content_df['tags'].apply(lambda x: any('research' in tag for tag in x))]
research_content = research_content_unsorted.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  research_content = research_content_unsorted.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)


In [328]:
news_content_unsorted = content_df[content_df['tags'].apply(lambda x: any('news' in tag for tag in x))]
news_content = news_content_unsorted.sort_values(by="date", ascending=False)

In [329]:
latest_content_df = pd.DataFrame()

for category in content_df.category.unique():
    latest_data = pd.Series(content_df[content_df.category == category].iloc[0])
    latest_content_df = latest_content_df._append(latest_data, ignore_index=True)

latest_content_df['date'] = pd.to_datetime(latest_content_df['date'], format='%m-%d-%Y')
latest_content_df = latest_content_df.sort_values(by='date', ascending=False)

In [330]:
json_files = ['general', 'homepage', 'research', 'support', 'contact']
data = {}

for json_file in json_files:
   
    json_file_path = WEBSITE_DATA_PATH / f"{json_file}.json"

    try:
        with open(json_file_path, 'r') as json_var:
            data[json_file] = json.load(json_var)
    except (FileNotFoundError, json.JSONDecodeError):
        pass

In [331]:
!pwd

/Users/harshul/projects/kgwebsite/groupwebsite_generator/notebooks


##### Homepage

In [332]:
homepage_template = environment.get_template('homepage.html.j2')

In [333]:
homepage_content = \
    homepage_template.render(general=data['general'],
                             homepage=data['homepage'],
                             recent_content=latest_content_df.to_dict(orient='records'))

UndefinedError: 'ContentData' is undefined

In [None]:
with open('../kerzendorf-group.github.io/index.html', mode='w', encoding='utf-8') as Homepage:
    Homepage.write(homepage_content)

##### People Page

In [337]:
people_template = environment.get_template("people.html.j2")

In [338]:
people_content = people_template.render(general=data["general"], 
                                        current_members=current_people_page_list)

In [339]:
people_file_path = HOSTING_PATH / "People.html"

with open(people_file_path, mode="w", encoding="utf-8") as people:
    people.write(people_content)

##### Individual People Page

In [None]:
ind_person_template = environment.get_template("individual_person.html.j2")

In [None]:
for person in people_df['id']:
            filename = f"../kerzendorf-group.github.io/members/{ person }/{ person }.html"
            ind_person_content = ind_person_template.render(general=data["general"], 
                                                            member_id=person, 
                                                            content=content_df.to_dict(orient='records'))
            with open(filename, mode="w", encoding="utf-8") as page:
                page.write(ind_person_content)

##### Research Page

In [None]:
research_template = environment.get_template("research.html.j2")

In [None]:
main_page_research_content = research_template.render(general=data["general"],
                                            content=research_content)

In [None]:
with open("../kerzendorf-group.github.io/Research.html", mode="w", encoding="utf-8") as research:
        research.write(main_page_research_content)

In [None]:
sub_research_template = environment.get_template("sub_research_frontpage.html.j2")

In [None]:
for category in content_df.loc[content_df.category != "News", "category"].unique():
        sub_research_content = sub_research_template.render(general=data["general"], 
                                                            research_general=data["research"], 
                                                            content = research_content,
                                                            category = category
                                                            )
        folder_path = f"../kerzendorf-group.github.io/sub_research/{page_link(category.lower())}"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"../kerzendorf-group.github.io/sub_research/{page_link(category.lower())}.html", mode="w", encoding="utf-8") as sub_research:
            sub_research.write(sub_research_content)

##### Individual Research Page

In [None]:
template_no_twitter = environment.get_template("research_page_no_twitter.html.j2")

In [None]:
for ind_research_keys, ind_research_values in research_content.iterrows():
    if "news" not in ind_research_values.category.lower():
        ind_research_content = template_no_twitter.render(general=data["general"], 
                                                          member_ids = people_df['id'],
                                                          nonmem_ids = non_mem_df['id'],
                                                          content = ind_research_values
                                                          
                                                            )
        folder_path = f"../kerzendorf-group.github.io/sub_research/{page_link(ind_research_values.category.lower())}"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"{ folder_path }/{page_link(ind_research_values.article_id.lower())}.html", mode="w", encoding="utf-8") as ind_research_page:
            ind_research_page.write(ind_research_content)

##### News Page

In [None]:
news_content

In [None]:
news_template = environment.get_template("news.html.j2")

In [None]:
news_page_content = news_template.render(general=data["general"],
                                         content=news_content,
                                         member_ids=people_df['id'],
                                         nonmem_ids = non_mem_df['id'],
                                         category="News")

In [None]:
with open("../kerzendorf-group.github.io/News.html", mode="w", encoding="utf-8") as news:
        news.write(news_page_content)

##### Individual News Pages

In [None]:
news_template_no_twitter = environment.get_template("news_page_no_twitter.html.j2")
#news_template_twitter = environment.get_template("news_page_twitter.html.j2")

In [None]:
for ind_news_keys, ind_news_values in news_content.iterrows():
        ind_news_content = news_template_no_twitter.render(general=data["general"], 
                                                          member_ids = people_df['id'],
                                                          nonmem_ids = non_mem_df['id'],
                                                          content = ind_news_values
                                                            )
        folder_path = f"../kerzendorf-group.github.io/news/"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"{ folder_path }/{page_link(ind_news_values.article_id.lower())}.html", mode="w", encoding="utf-8") as ind_news_page:
            ind_news_page.write(ind_news_content)

##### Support Page

In [None]:
support_template = environment.get_template('support.html.j2')

In [None]:
support_content = support_template.render(general=data["general"], support=data["support"])

In [None]:
with open('../kerzendorf-group.github.io/Support.html', mode='w', encoding='utf-8') as support:
    support.write(support_content)

##### Contact

In [None]:
contact_template = environment.get_template('contact.html.j2')

In [None]:
contact_content = contact_template.render(general=data["general"], contact=data["contact"])

In [None]:
with open('../kerzendorf-group.github.io/Contact.html', mode='w', encoding='utf-8') as contact:
    contact.write(contact_content)