### This notebook consist of code for creating the html files for the website each time data is updated.

# Set-up

Importing classes

In [358]:
import json
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
import shutil
from datetime import datetime, date
import numpy as np
from PIL import Image

Defining paths

In [359]:
GROUP_DATA_DIR = Path("../../group-data")
TEMPLATE_DIR_PATH = GROUP_DATA_DIR.parent / "groupwebsite_generator" / "templates"
WEBSITE_DATA_PATH = GROUP_DATA_DIR / "website_data/"
HOSTING_PATH = GROUP_DATA_DIR.parent / "kerzendorf-lab.github.io"
ARTICLE_DIR_PATH = Path("../../research_news/articles")
ARTICLE_IMAGE_DESTINATION_DIR = (HOSTING_PATH / "website_files" / "images" / "article_content")
MEMBERS_DIR_PATH = GROUP_DATA_DIR / "members/"
SUB_RESEARCH_PATH = HOSTING_PATH / "sub_research"
OPPORTUNITIES_PATH = WEBSITE_DATA_PATH / "content" / "opportunities.json"
ROLE_HIERARCHY_PATH = WEBSITE_DATA_PATH / "role_hierarchy.json"
GENERAL_TAGS = [
    "Paper",
    "Poster", 
    "Talk",
    "Award",
    "New Team Member",
    "PhD",
    "Conference",
    "Undergraduate",
    "Event",
    "Achievement"
]

# Define tag colors mapping
TAG_COLORS = {
    'paper': '#FF6B6B',  # Coral red
    'poster': '#4ECDC4', # Turquoise
    'talk': '#45B7D1',   # Light blue
    'award': '#96CEB4',  # Sage green
    'new team member': '#FFBE0B', # Golden yellow
    'phd': '#9B5DE5',    # Purple
    'conference': '#FF006E', # Pink
    'undergraduate': '#8338EC', # Violet
    'event': '#3A86FF',  # Royal blue
    'achievement': '#FB5607', # Orange
    'astrophysics': '#2EC4B6', # Teal
    'machine learning': '#FF9F1C', # Light orange
    'software': '#E71D36', # Bright red
    'research': '#011627', # Dark blue
    'news': '#41EAD4'    # Cyan
}

Setting up jinja environment

In [360]:
# Function to create proper HTML file names by replacing spaces with underscores
def page_link(a):
    """Return the HTML file name after replacing blank spaces(" ") with underscores("-")"""
    return a.replace(" ", "_") if " " in a else a

# Function to get tag color, returns a default if tag not in mapping
def get_tag_color(tag):
    """Get color for a specific tag, with fallback to default"""
    tag = tag.lower()
    return TAG_COLORS.get(tag, '#6c757d')  # Default gray if tag not found



In [361]:
environment = Environment(
    loader=FileSystemLoader(TEMPLATE_DIR_PATH), extensions=["jinja2.ext.loopcontrols", "jinja2.ext.do"]
)
environment.globals["page_link"] = page_link
# Add tag colors to jinja environment globals
environment.globals['tag_colors'] = TAG_COLORS
environment.globals['get_tag_color'] = get_tag_color

# Data Processing Setup

Data Processing Parameters

In [362]:
# Needed columns for articles
ARTICLE_METADATA_FIELDS = [
    "article_id",
    "category",
    "date",
    "tags",
    "title",
    "cover_image",
    "short_description"
]
# Groups and institution used in filtering data
GROUP_FILTER = ["DTI", "TARDIS", "ICER", "kerzendorf"]
INSTITUTION_FILTER = "Michigan State University"

# Map roles to standardized roles for consistency
ROLE_MAP = {
    "Assistant Professor": "Professor",
    "Professorial Assistant": "Undergraduate Student",
    "Visiting Researcher": "Postdoctoral Researcher"
}

# Map degrees to standardized academic levels
DEGREE_MAP = {
    "Masters": "Graduate Student",
    "PhD": "Postdoctorate",  #  if end_date is present
    "Bachelors": "Undergraduate Student",
}

INDIVIDUAL_MEMBER_SECTION_MAP = {
    "education": "Education",
    "experiences": "Experience",
    "projects": "Projects",
    "awards": "Awards & Recognition",
    "outreach": "Outreach Programs",
}

# Functions for Data Handling

In [363]:
def loading_website_data(file_to_load):
    """
    Load data from JSON files specified in a list of file names.

    Parameters:
    ----------
    json_data_list : list of str
        A list of file names (without extension) to load as JSON.

    Returns:
    -------
    dict
        A dictionary where keys are file names and values are the corresponding JSON data.

    Raises:
    ------
    FileNotFoundError:
        If a specified file does not exist.
    json.JSONDecodeError:
        If there's an issue decoding the JSON content from a file.

    """
    loaded_data = {}
    file_matches = WEBSITE_DATA_PATH/ f"{file_to_load}.json"
    if file_matches:
        try:
            with open(file_matches, "r") as json_file:
                loaded_data = json.load(json_file)
        except json.JSONDecodeError:
            print(f"Error decoding JSON in '{file_matches}'.")
    else:
        print(f"File '{file_to_load}.json' not found.")

    return loaded_data

In [364]:
def read_member_data_jsons(file_to_read):
    member_data_list = []
    member_data_df = pd.DataFrame([])
    for single_info_file_path in MEMBERS_DIR_PATH.glob("*/info.json"):
        with open(single_info_file_path, "r") as f_info:
            member_data = json.load(f_info)
        member_unique_id = member_data["id"]
        file_to_read_path = single_info_file_path.parent / "jsons" / file_to_read

        if file_to_read_path.exists():
            with file_to_read_path.open("r") as f_data:
                member_other_data = json.load(f_data)
            for entry in member_other_data:
                entry["id"] = member_unique_id
            member_data_list.append(
                pd.DataFrame(member_other_data)
            )
        # else:
        #     data_path_in_kl = KERZENDORF_GROUP_DATA / "members" / member_unique_id / "jsons" / file_to_read
        #     if data_path_in_kl.exists():
        #         with data_path_in_kl.open("r") as data_file:
        #             member_other_data_kl = json.load(data_file)
        #         for entry in member_other_data_kl:
        #             entry["id"] = member_unique_id
        #         member_data_list.append(
        #             pd.DataFrame(member_other_data_kl)
        #         )

    if member_data_list:
        member_data_df = pd.concat(
            member_data_list, ignore_index=True
        )
        member_data_df.set_index("id", inplace=True)

    return member_data_df

In [365]:
def set_new_image_path(source_dir, old_image_path):
    article_image_path = source_dir.parent / "media" / "images"
    image_source = article_image_path / old_image_path.name
    image_destination = ARTICLE_IMAGE_DESTINATION_DIR / old_image_path.name
    website_files_index = image_destination.parts.index("website_files")
    new_image_path = Path(*image_destination.parts[website_files_index:])
    shutil.copy2(image_source, image_destination)
    return str(new_image_path)

# DataFrame Creation and Processing

Creating dataframes for articles which can be updated further 

In [366]:
# Reading all articles
article_content_list = []
today = date.today()
for content_file_name in ARTICLE_DIR_PATH.rglob('info.json'):
    with open(content_file_name, "r") as fcontent:
        article_content = json.load(fcontent)
    today_datetime = datetime.combine(today, datetime.min.time())
    article_date = datetime.strptime(article_content["date"], "%m-%d-%Y")
    if "kg" in article_content["platforms"] and article_date <= today_datetime:
        image_path = Path(article_content["cover_image"])
        article_content["cover_image"] = set_new_image_path(content_file_name, image_path)
        for content_key, content_value in article_content["content"].items():
            if "img" in content_key:
                new_content_value = set_new_image_path(content_file_name, Path(content_value))
                article_content["content"][content_key] = new_content_value
        article_content_list.append(article_content)
article_content_df = pd.DataFrame(article_content_list)

article_content_df["date"] = pd.to_datetime(
    article_content_df["date"], format="%m-%d-%Y"
)

article_content_df["cover_image_height"] = (
    article_content_df["cover_image_height"].fillna("330px").replace("", "330px")
)
article_content_df["cover_image_width"] = (
    article_content_df["cover_image_width"].fillna("520px").replace("", "520px")
)

#THis line is only for kerzendorf lab and is not needed on dti
article_content_df["category"] = article_content_df["category"].replace(
    "Overview", "Computational Metascience"
)

article_content_df['image_name'] = article_content_df['cover_image'].apply(lambda x: Path(x).name)

In [367]:
news_content_df = article_content_df[
    (article_content_df["category"] == "News")
    | (
        article_content_df["tags"].apply(
            lambda x: "news" in x if isinstance(x, list) else False
        )
    )
].sort_values(by=["date"], ascending=[False])

research_content_df = article_content_df[
    article_content_df["category"] != "News"
].sort_values(by=["category", "date"], ascending=[True, False])

In [368]:
info_json_list = []
for single_info_file_path in MEMBERS_DIR_PATH.glob("*/info.json"):
    with open(single_info_file_path, "r") as f_info:
        member_data = json.load(f_info)
    # if len(member_data.keys()) == 1:
    #     info_json_path = (
    #         KERZENDORF_GROUP_DATA / "members" / member_data["id"] / "info.json"
    #     )
    #     member_images_dir = HOSTING_PATH / "members" / member_data["id"] / "media"
    #     with open(info_json_path, "r") as f_info_kl:
    #         member_data_from_kl = json.load(f_info_kl)
    #     member_images_dir_source = (
    #         KERZENDORF_GROUP_DATA / "members" / member_data["id"] / "media"
    #     )

    #     shutil.copytree(member_images_dir_source, member_images_dir, dirs_exist_ok=True)
    #     info_json_list.append(member_data_from_kl)
    # else:
    info_json_list.append(member_data)
info_json_df = pd.DataFrame(info_json_list)
info_json_df.set_index("id", inplace=True)
info_json_df["full_name"] = info_json_df.apply(
    lambda row: (
        row["nick_name"] + " " + row["last_name"]
        if pd.notna(row["nick_name"])
        else row["first_name"] + " " + row["last_name"]
    ),
    axis=1,
)
info_json_dict = info_json_df.to_dict("index")

In [369]:
exp_df = read_member_data_jsons("experiences.json")[
    [
        "role",
        "start_date",
        "end_date",
        "institution",
        "group",
    ]
]
edu_df = read_member_data_jsons("education.json")[
    ["start_date", "end_date", "institution", "subject", "degree"]
]

In [370]:
edu_df['end_date'] = pd.to_datetime(edu_df['end_date'], format='%Y-%m-%d')
edu_df['start_date'] = pd.to_datetime(edu_df['start_date'], format='%Y-%m-%d')


def most_recent_row(group):
    return group[group["start_date"] == group["start_date"].max()]
edu_df_most_recent = (
    edu_df.groupby("id").apply(most_recent_row).droplevel(0)
)

edu_df_most_recent['academic_role'] = ""
for edu_mem_id, edu_mem_value in edu_df_most_recent.iterrows():
    if edu_mem_value['institution'] == INSTITUTION_FILTER:
        if edu_mem_value['degree'] == "Bachelors":
            edu_df_most_recent.at[edu_mem_id, 'academic_role'] = "Undergraduate Student"
        elif edu_mem_value['degree'] in ["PhD", "Masters"]:
            edu_df_most_recent.at[edu_mem_id, 'academic_role'] = "Graduate Student"
edu_df_most_recent_diff_suffix = edu_df_most_recent.add_suffix("_edu")

In [371]:
social_link_list = []
for single_member_file_path in MEMBERS_DIR_PATH.rglob("social_links.json"):
    with open(single_member_file_path, "r") as fname:
        member_social_link = json.load(fname)
    info_json_file_path = single_member_file_path.parent.parent / "info.json"
    with open(info_json_file_path, "r") as file_info:
        member_info_data = json.load(file_info)
    mem_id = member_info_data["id"]
    member_social_link["id"] = mem_id
    social_link_list.append(member_social_link)
social_links_df = pd.DataFrame(social_link_list)
social_links_df.set_index("id", inplace=True)
social_links_df.fillna("", inplace=True)

In [372]:
recent_content = article_content_df.sort_values(
    by=["category", "date"], ascending=[True, False]
)
# Get the first row for each category using groupby and head
recent_content = recent_content.groupby("category").head(1).copy()

# Page Creation

Function to create a page

In [373]:
def create_page(template, html, **kwargs):
    """
    Create an HTML page using a Jinja2 template and save it to a specified path.

    Parameters:
    ----------
    template : str
        The filename of the Jinja2 template to be used.
    html : str
        The filename of the HTML file to be generated.
    **kwargs : dict
        Additional keyword arguments to be passed to the Jinja2 template for rendering.

    Returns:
    -------
    None

    """
    page_template = environment.get_template(template)
    template_level = html.count("/")
    page_html_path = HOSTING_PATH / html
    page_html_path.parent.mkdir(parents=True, exist_ok=True)
    page_content = page_template.render(TEMPLATE_LEVEL=template_level, **kwargs)
    with open(page_html_path, mode="w", encoding="utf-8") as page:
        page.write(page_content)

# Processing List Of JSON files

In [374]:
# Function Call
general = loading_website_data("general")
homepage = loading_website_data("homepage")
contact = loading_website_data("contact")
research = loading_website_data("research_categories")
support = loading_website_data("support")

# Homepage

Storing selected columns for Homepage only

In [375]:
create_page(
    "homepage.html.j2",
    "index.html",
    general=general,
    homepage=homepage,
    recent_content=recent_content.to_dict(orient="records"),
)

# Current Members Page

In [376]:
exp_df['end_date'] = pd.to_datetime(exp_df['end_date'], format='%Y-%m-%d')
exp_df['start_date'] = pd.to_datetime(exp_df['start_date'], format='%Y-%m-%d')
exp_df = exp_df.fillna("")
filtered_exp_df = exp_df[(exp_df["end_date"].isna()) | (exp_df["end_date"].dt.date >= datetime.now().date())]
def most_recent_row(group):
    sorted_group = group.sort_values(by=['start_date', 'end_date'], ascending=[False, True])
    # Filter the sorted group by the condition that the group name is in GROUP_FILTER
    relevant_group = sorted_group[sorted_group['group'].str.contains('|'.join(GROUP_FILTER))]
    # Return the most recent relevant experience
    return relevant_group.head(1)
filtered_exp_df_most_recent = exp_df.groupby("id").apply(most_recent_row).droplevel(0)
exp_df_most_recent = exp_df.groupby("id").apply(most_recent_row).droplevel(0)
exp_df_most_recent_diff_suffix = exp_df_most_recent.add_suffix('_exp')

In [377]:
merged_edu_exp_df = exp_df_most_recent_diff_suffix.merge(edu_df_most_recent_diff_suffix, on='id', how='outer')

In [378]:
merged_edu_exp_df['isCurrent'] = False
merged_edu_exp_df['current_role'] = ""
for merged_mem_id, merged_mem_value in merged_edu_exp_df.iterrows():
    if merged_mem_value['institution_edu'] == INSTITUTION_FILTER:
        if pd.isna(merged_mem_value['end_date_edu']) or merged_mem_value['end_date_edu'] >= datetime.now():
            merged_mem_value['isCurrent'] = True
            if pd.notna(merged_mem_value['end_date_exp']):
                merged_mem_value['isCurrent'] = False
            acad_role = merged_mem_value.get('academic_role_edu')
            if acad_role:
                merged_mem_value['current_role'] = merged_mem_value["academic_role_edu"]
        else:
            merged_mem_value['isCurrent'] = False
            acad_role = merged_mem_value.get('academic_role_edu')
            if acad_role:
                merged_mem_value['current_role'] = merged_mem_value["academic_role_edu"]
            else:
                merged_mem_value['current_role'] = merged_mem_value["role_exp"]
    elif merged_mem_value['group_exp'] in GROUP_FILTER and (pd.isna(merged_mem_value['end_date_exp']) or merged_mem_value['end_date_exp'] >= datetime.now()):
            merged_mem_value['isCurrent'] = True
            merged_mem_value['current_role'] = merged_mem_value["role_exp"]
    else:
        merged_mem_value['isCurrent'] = False
        acad_role = merged_mem_value.get('academic_role_edu')
        if acad_role:
            merged_mem_value['current_role'] = merged_mem_value["academic_role_edu"]
        else:
            merged_mem_value['current_role'] = merged_mem_value["role_exp"]
    merged_edu_exp_df.loc[merged_mem_id] = merged_mem_value
merged_edu_exp_df['current_role'] = merged_edu_exp_df['current_role'].replace(ROLE_MAP)

In [379]:
current_member_df = merged_edu_exp_df[merged_edu_exp_df['isCurrent'] == True][["current_role"]]
current_member_df_with_info = pd.merge(current_member_df, info_json_df, on='id', how='inner')

In [380]:
alumni_member_df = merged_edu_exp_df[merged_edu_exp_df['isCurrent'] == False][["current_role"]]
alumni_member_df_with_info = pd.merge(alumni_member_df, info_json_df, on='id', how='inner')[['current_role', 'full_name']]

In [381]:
projects_df = read_member_data_jsons("projects.json").sort_values(
    by=["end_date"], ascending=False
)
projects_df['end_date'] = pd.to_datetime(projects_df['end_date'], format='%Y-%m-%d')
projects_df['start_date'] = pd.to_datetime(projects_df['start_date'], format='%Y-%m-%d')
projects_df.fillna("", inplace=True)

In [None]:
for mem_key, mem_value in current_member_df.iterrows():
    if mem_key in projects_df.index:
        mem_projects = projects_df.loc[mem_key]
        if not mem_projects.empty:
            if isinstance(mem_projects, pd.Series):
                current_project_title = mem_projects["project_title"]
            else:
                current_project_title = mem_projects.iloc[0]["project_title"]
    else:
        current_project_title = ""
    current_member_df_with_info.loc[mem_key, "current_project_title"] = current_project_title

Function to sort the members on basis of their roles

In [None]:
with open(ROLE_HIERARCHY_PATH, "r") as file_name:  
    role_hierarchy = json.load(file_name)
current_member_df_with_info['rank'] = current_member_df_with_info['current_role'].map(role_hierarchy)

current_member_df_with_info = current_member_df_with_info.sort_values(by='rank')
current_member_df_with_info = current_member_df_with_info.drop(columns='rank')
current_member_df_with_info[['current_role', 'full_name', 'image_path', 'cover_image_path','current_project_title']]

Unnamed: 0_level_0,current_role,full_name,image_path,cover_image_path,current_project_title
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
wolfgang_kerzendorf,Professor,Wolfgang Kerzendorf,media/images/wolfgang.jpg,media/images/cover.jpg,Supernovae & Computational Metaresearch
anirban_dutta,Postdoctoral Researcher,Anirban Dutta,media/images/anirban_dutta.jpg,media/images/cover.jpg,Non-LTE modeling of supernova spectra
jing_lu,Postdoctoral Researcher,Jing Lu,media/images/jing.jpg,media/images/cover.jpg,Explore the hidden Helium in Type Ic Supernovae
vicente_amado,Graduate Student,Vicente Amado Olivo,media/images/ESD_headshot.jpg,media/images/cover.jpg,Development Of A Global Registry For Peer Revi...
erin_visser,Undergraduate Student,Erin Visser,media/images/erin_visser_website_pic.jpg,media/images/cover.jpg,
alexander_grunewald,Undergraduate Student,Alexander Grunewald,media/images/alexander.jpg,media/images/cover.jpg,Emulator Project
cecelia_powers,Undergraduate Student,Cecelia Powers,media/images/cecelia_powers_profilepic.jpeg,media/images/cover.jpg,
bea_lu,Undergraduate Student,Bea Lu,media/images/bea_lu.jpg,media/images/cover.jpg,Natural Language Processing
ryan_groneck,Undergraduate Student,Ryan Groneck,media/images/Ryan_Groneck_Website_Picture.jpg,media/images/cover.jpg,
logan_mcclellan,Undergraduate Student,Logan McClellan,media/images/logan_mcclellan.jpg,media/images/cover.jpg,


Current Members Page

In [None]:
create_page(
    "current_members.html.j2",
    "current_members.html",
    general=general,
    current_members=current_member_df_with_info,
    socials=social_links_df.to_dict("index")
)

Alumni Members Page

In [None]:
create_page(
    "alumni_members.html.j2",
    "alumni_members.html",
    general=general,
    alumni_members=alumni_member_df_with_info,
)

## Individual People Page

In [None]:
def group_df(df):
    new_df = (df.fillna("").groupby("id")
    .apply(lambda x: x.to_dict(orient="records"))
    .reset_index(name="info")
    .set_index("id")
    .to_dict(orient="index"))
    return new_df

In [None]:
document_df = read_member_data_jsons("documents.json")

In [None]:
outreach_df = read_member_data_jsons("outreach.json")
if not outreach_df.empty:
    outreach_grouped = group_df(outreach_df)

In [None]:
awards_df = read_member_data_jsons("awards.json")
awards_grouped = group_df(awards_df)

exp_grouped = group_df(exp_df)
edu_grouped = group_df(edu_df)
projects_grouped = group_df(projects_df)

In [None]:
info_json_df.fillna("", inplace=True)
for member_id, member_data in info_json_df.iterrows():
    if member_id in current_member_df_with_info.index:
        info_json_df.at[member_id, "academic_role"] = current_member_df_with_info.loc[
            member_id, "current_role"
        ]
        info_json_df.at[member_id, "current_project_title"] = current_member_df_with_info.loc[
            member_id, "current_project_title"
        ]
    elif member_id in alumni_member_df.index:
        info_json_df.at[member_id, "academic_role"] = alumni_member_df.loc[
            member_id, "current_role"
        ]
alumni_member_df.replace("nan", np.nan, inplace=True)
alumni_member_df.fillna("", inplace=True)
current_member_df_with_info.fillna("", inplace=True)

In [None]:
for person_id, person_data in info_json_df.iterrows():
    create_page(
        "individual_person.html.j2",
        f"members/{person_id}/{person_id}.html",
        general=general,
        member_id=person_id,
        member_data=person_data,
        socials=social_links_df.to_dict("index"),
        documents=document_df.to_dict("index"),
        education=edu_grouped,
        experience=exp_grouped,
        projects=projects_grouped,
        awards=awards_grouped,
        outreach=outreach_df,
        section_headings=INDIVIDUAL_MEMBER_SECTION_MAP,
        content=article_content_df.to_dict("index"),
    )

# Contact Page

In [None]:
create_page(
    "contact.html.j2",
    "Contact.html",
    general=general,
    contact=contact
)

# Support Page

In [None]:
create_page(
    "support.html.j2",
    "Support.html",
    general=general,
    support=support
)

# Research Front Page

For adding more columns in dataframe to render front pages and individual article pages

In [None]:
create_page(
    "research.html.j2",
    "Research.html",
    general=general,
    content=research_content_df,
    research=research,
    current_members=info_json_dict,
)

In [None]:
SUB_RESEARCH_PATH.mkdir(parents=True, exist_ok=True)

for category in article_content_df.loc[
    article_content_df.category != "News", "category"
].unique():
    create_page(
        "sub_research_frontpage.html.j2",
        f"sub_research/{page_link(category.lower())}.html",
        general=general,
        research=research,
        content=research_content_df,
        category=category,
        current_members=info_json_dict,
    )

Individual Research Page


In [None]:
for ind_research_keys, ind_research_values in research_content_df.iterrows():
    destination_research_path = f"sub_research/{page_link(ind_research_values.category.lower())}/{page_link(ind_research_values.article_id.lower())}.html"
    if ind_research_values['category'] == "Software":
        destination_research_path = f"sub_research/{page_link(ind_research_values.article_id.lower())}.html"

    folder_path = SUB_RESEARCH_PATH / page_link(ind_research_values.category.lower())
    folder_path.mkdir(parents=True, exist_ok=True)
    create_page(
        "research_page_no_twitter.html.j2",
        destination_research_path,
        general=general,
        content=ind_research_values,
        member_data=info_json_dict,
        article_id=ind_research_values["article_id"],
    )

# News Page

In [None]:
import re

def urlize_content(content):
    """
    Replaces IDs wrapped in [] with corresponding names from an existing DataFrame,
    and wraps the names in anchor tags.

    Args:
        content (str): The text content containing IDs in square brackets.

    Returns:
        str: The updated content with IDs replaced by anchor tags.
    """

    def replace_id(match):
        id_to_fetch= match.group(1)
        replace_string=""
        if id_to_fetch in info_json_df.index:
            name = info_json_df.loc[id_to_fetch, 'full_name']
            if id_to_fetch in current_member_df_with_info.index:
                replace_string =f'<a href="../members/{id_to_fetch}/{id_to_fetch}.html" target="_blank">{name}</a>'
            else:
                replace_string = name
        else:
            replace_string = id_to_fetch.replace('_', ' ').title()

        return replace_string

    urlized_content = re.sub(r'\[(\w+)\]', replace_id, content)

    return urlized_content


In [None]:
for index, row in news_content_df.iterrows():
    content = row['content']
    for content_key in content:
        if "para" in content_key:
            content[content_key] = urlize_content(content[content_key])

In [None]:
create_page(
    "news.html.j2",
    "News.html",
    general=general,
    content=news_content_df,
    category="News",
    member_data=info_json_dict,
)

Individual News Page

In [None]:
for ind_news_keys, ind_news_values in news_content_df.iterrows():
    folder_path = HOSTING_PATH / "news" / page_link(ind_news_values.article_id.lower())
    create_page(
        "news_page_no_twitter.html.j2",
        f"news/{page_link(ind_news_values.article_id.lower())}.html",
        general=general,
        content=ind_news_values,
        member_data=info_json_dict,
        category="News"
    )

# Join Us Page

In [None]:
with open(OPPORTUNITIES_PATH, 'r') as f_opp:
    OPPORTUNITIES = json.load(f_opp)

In [None]:
create_page(
    "join_us.html.j2",
    "Join_Us.html",
    general=general,
    opportunities=OPPORTUNITIES
)

# New Research

In [None]:
# all_research_data = []
# for json_file in RESEARCH_CONTENT_SOURCE.rglob("info.json"):
#     sub_research = []
#     relative_path = json_file.relative_to(RESEARCH_CONTENT_SOURCE.parent).with_suffix("")
#     for sub_dir in json_file.parent.iterdir():
#         if sub_dir.is_dir():
#             if sub_dir.name != "media":
#                 sub_dir_name = sub_dir.name
#                 sub_research.append(sub_dir_name)
#             else:
#                 dest_path = HOSTING_PATH / relative_path.parent
#                 shutil.copytree(sub_dir, dest_path / "media", dirs_exist_ok=True)
   
#     # Parse the JSON file
#     with open(json_file, "r") as f_research:
#         data = json.load(f_research)
#     if 'research_id' in data:
#         data['sub_research'] = sub_research
#         data['url'] = f"{relative_path}.html"
#         all_research_data.append(data)
# all_research_df = pd.DataFrame(all_research_data)
# indexed_research_df = all_research_df.set_index('research_id')
# # Fill all NaN values with empty strings in the DataFrame
# indexed_research_df = indexed_research_df.fillna("")


In [None]:
# d = {}

# for index, article in article_content_df.iterrows():
#     res_articles, news_articles = [], []
#     if pd.notna(article["research_id"]):
#         res_id = article["research_id"]
#         article_id = article["article_id"]
#         if article['category'] == 'Research':
#             res_articles.append((article_id, article['date']))
#         if article['category'] == 'News':
#             news_articles.append((article_id, article['date']))

#         if res_id not in d:
#             d[res_id] = {"res_articles": [], "news_articles": []}
#         d[res_id]["res_articles"].extend(res_articles)
#         d[res_id]["news_articles"].extend(news_articles)

# def get_aggregated_articles(research_id, visited=None):
#     if visited is None:
#         visited = set()

#     # Avoid processing the same research_id multiple times
#     if research_id in visited:
#         return {"res_articles": [], "news_articles": []}
    
#     visited.add(research_id)

#     # Start with articles for the current research_id
#     aggregated_articles = d.get(research_id, {"res_articles": [], "news_articles": []}).copy()

#     # Get sub-research IDs from `indexed_research_df`
#     sub_researches = indexed_research_df.loc[research_id, "sub_research"] if research_id in indexed_research_df.index else []
#     if isinstance(sub_researches, list) and len(sub_researches) > 0:
#         for sub_research in sub_researches:
#             sub_articles = get_aggregated_articles(sub_research, visited)
#             aggregated_articles["res_articles"].extend(sub_articles["res_articles"])
#             aggregated_articles["news_articles"].extend(sub_articles["news_articles"])

#     return aggregated_articles

# f = {}
# for research_index in indexed_research_df.index:
#     f[research_index] = get_aggregated_articles(research_index)


In [None]:
# def sort_articles(articles):
#     # Sort by date in descending order
#     sorted_articles = sorted(articles, key=lambda x: x[1], reverse=True)
#     # Extract only article IDs
#     return [article[0] for article in sorted_articles]

# # Update `f` with sorted articles
# for research_index in f:
#     f[research_index]["res_articles"] = sort_articles(f[research_index]["res_articles"])
#     f[research_index]["news_articles"] = sort_articles(f[research_index]["news_articles"])

# # Add sorted articles to `indexed_research_df`
# indexed_research_df["res_articles"] = indexed_research_df.index.map(
#     lambda idx: f.get(idx, {}).get("res_articles", [])
# )
# indexed_research_df["news_articles"] = indexed_research_df.index.map(
#     lambda idx: f.get(idx, {}).get("news_articles", [])
# )

# # Display the updated DataFrame
# indexed_research_df

In [None]:
# indexed_article_df = article_content_df.set_index('article_id', inplace=False)
# for index, research in indexed_research_df.iterrows():
#     create_page(
#         "sub_research_frontpage.html.j2",
#         research['url'],
#         general=general,
#         data=research,
#         current_research_id=index,
#         indexed_research_df=indexed_research_df,
#         indexed_article_df=indexed_article_df,
#         member_data=info_json_dict
#     )

## Gallery page

In [None]:
GALLERY_CONTENT_SOURCE = WEBSITE_DATA_PATH / "content" / "gallery"
events = []

for event_file in GALLERY_CONTENT_SOURCE.rglob("info.json"):
    with open(event_file, "r") as f_event:
        event_data = json.load(f_event)
    
    if "date" in event_data:
        event_data["date"] = pd.to_datetime(event_data["date"])
    event_id = event_data.get("event_id", "unknown_event")  # Default if event_id is missing
    
    # Define new destination path using event_id
    dest_image_dir = HOSTING_PATH / "website_files" / "images" / "gallery" / event_id / "media" / "images"
    
    # Create destination directory if it doesn't exist
    dest_image_dir.mkdir(parents=True, exist_ok=True)
    
    # Copy images directory to the structured destination
    source_image_dir = event_file.parent / "media" / "images"
    if source_image_dir.exists():
        shutil.copytree(source_image_dir, dest_image_dir, dirs_exist_ok=True)

        # # Update image paths in event data to use website path
        # for key in event_data:
        #     if isinstance(event_data[key], str) and "images" in event_data[key]:
        #         event_data[key] = str(Path("website_files") / "images" / "gallery" / Path(event_data[key]).name)
    for image in event_data.get("images", []):
        image_path = GALLERY_CONTENT_SOURCE / event_id / image["image_path"]
        with Image.open(image_path) as img:
            width, height = img.size
            new_width = int(width * 0.7)  # Reduce by 30%
            new_height = int(height * 0.7)  # Reduce by 30%

            image["scaled_width"] = new_width
            image["scaled_height"] = new_height
    events.append(event_data)

create_page(
    "gallery.html.j2",
    "Gallery.html",
    general=general,
    member_data=info_json_dict,
    events=events
)

## Copy assets

In [None]:
source_assets = GROUP_DATA_DIR.parent / "groupwebsite_generator" / "assets"
shutil.copytree(source_assets, HOSTING_PATH / "assets", dirs_exist_ok=True)

PosixPath('../../kerzendorf-lab.github.io/assets')