### This notebook consist of code for creating the html files for the website each time data is updated.

##### Set-up

In [None]:
# Importing classes
import json
import os
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from jinja2.exceptions import UndefinedError
from pathlib import Path

In [None]:
# Defining paths
GROUP_DATA_DIR = Path("/../../group-data") 
MEMBERS_DIR_PATH = GROUP_DATA_DIR / "members/"
WEBSITE_DATA_PATH = GROUP_DATA_DIR / "website_data/"
CONTENT_DIR_PATH = WEBSITE_DATA_PATH / "content"
TEMPLATE_DIR_PATH = GROUP_DATA_DIR.parent / "groupwebsite_generator" / "templates"
HOSTING_PATH = GROUP_DATA_DIR.parent / "kerzendorf-group.github.io"

In [None]:
# Function to create proper HTML file names by replacing spaces with underscores
def page_link(a):
    return a.replace(" ", "_") if " " in a else a

In [None]:
# Creating an instance of the Environment class that looks for templates. Page_link is set to the global variable so that it can be accessed by all templates
environment = Environment(
    loader=FileSystemLoader(TEMPLATE_DIR_PATH), extensions=["jinja2.ext.loopcontrols"]
)
environment.globals["page_link"] = page_link

In [None]:
json_files = [
    "general",
    "homepage",
    "research",
    "support",
    "contact",
]  # List of JSON files to be processed

data = {}
# Looping through JSON files and loading their content into the 'data' dictionary
for json_file in json_files:
    json_file_path = WEBSITE_DATA_PATH / f"{json_file}.json"

    try:
        with open(json_file_path, "r") as json_var:
            data[json_file] = json.load(json_var)
    except (FileNotFoundError, json.JSONDecodeError):
        pass

Creating dataframes for articles which can be updated further 

In [None]:
def load_content_from_files(columns):
    #eg { x:[a,b,c]}
    content_data = {col: [] for col in columns}

    for json_file in os.listdir(CONTENT_DIR_PATH):
        if json_file.endswith(".json"):
            json_path = os.path.join(CONTENT_DIR_PATH, json_file)
            with open(json_path, "r") as file:
                info = json.load(file)
                # Only load those articles where display is True
                if info.get("display"):
                    for col in columns:
                        content_data[col].append(info.get(col))

    content_df = pd.DataFrame(content_data)
    content_df["date"] = pd.to_datetime(content_df["date"], format="%m-%d-%Y")
    return content_df


def get_latest_content_df(content_df):
    # Sort the entire DataFrame by "category" and "date" in descending order
    sorted_content_df = content_df.sort_values(
        by=["category", "date"], ascending=[True, False]
    )

    # Get the first row for each category using groupby and head
    latest_content_df = sorted_content_df.groupby("category").head(1).copy()
    latest_content_df = latest_content_df.sort_values(by="date", ascending=False)

    return latest_content_df

##### Homepage

Storing selected columns for Homepage only

In [None]:
# Needed columns for homepage
article_columns_initial = [
    "article_id",
    "category",
    "date",
    "tags",
    "title",
    "cover_image",
    "short_description",
]
content_df = load_content_from_files(article_columns_initial)
latest_content_df = get_latest_content_df(content_df)

In [None]:
latest_content_df

In [None]:
# Rendering the homepage template with data
homepage_template = environment.get_template("homepage.html.j2")
homepage_content = homepage_template.render(
    general=data["general"],
    homepage=data["homepage"],
    recent_content=latest_content_df.to_dict(orient="records"),
)
homepage_html_path = HOSTING_PATH / "Index.html"

with open(homepage_html_path, mode="w", encoding="utf-8") as homepage:
    homepage.write(homepage_content)

##### People Page

In [None]:
def parse_member_data(member_dir):
    member_json_dir = Path(member_dir) / "jsons"
    keys_map = {
        "experiences":  {
            "filter_key": "group",
            "filter_using": ["ICER", "kerzendorf"]
        },
        "education": {
            "filter_key": "institution",
            "filter_using": ["Michigan State University"]
        },
        "social_links": {},
        "projects": {}
    }
    res_df = []
    for file_name, filter_info in keys_map.items():
        file_path = member_json_dir / f"{file_name}.json"
        if file_path.exists():
            if file_name in ["social_links", "projects"]:
                # Handle social_links and projects differently if necessary
                continue
            common_dataframe = pd.read_json(file_path)
            print(file_name)
            print(common_dataframe)
            filter_key = filter_info.get("filter_key")
            filter_values = filter_info.get("filter_using")
            if filter_key and filter_values:
                query_str = f"`{filter_key}` in @filter_values"
                filtered_df = common_dataframe.query(query_str)
                res_df.append(filtered_df)
                print(filtered_df)
        else:
            print(f"{file_path} does not exist")

In [None]:
member_dir = Path("/Users/harshul/projects/kgwebsite/group-data/members/sofia_biriouk")
parse_member_data(member_dir)[0]

In [None]:
# Function to extract academic roles from education and experience data


def extract_member_academic_role(education_experience_df):
    # Check if these columns exist in dataframe
    for column in ["end_date", "group", "institution"]:
        if column not in education_experience_df.columns:
            education_experience_df[column] = None

    current_academic_role = None

    role_map = {
        "Assistant Professor": "Professor",
        "Professor": "Professor",
        "Visualization Consultant": "Visualization Consultant",
        "Research Consultant": "Research Consultant",
        "Research Software Engineer": "Research Software Engineer",
        "Professorial Assistant": "Undergraduate",
        "Visiting Researcher": "Postdoctoral Researcher",
        "Postdoctoral Researcher": "Postdoctoral Researcher",
    }

    degree_map = {
        "Masters": "Graduate Student",
        "PhD": "Postdoctorate",  #  if end_date is present
        "Bachelors": "Graduate Student",
    }

    for _, row in education_experience_df.iterrows():
        role = row.get("role", None)
        degree = row.get("degree", None)

        if not current_academic_role:
            current_academic_role = role_map.get(role, "")

            if degree == "PhD" and pd.isna(row["end_date"]):
                current_academic_role = "Graduate Student"  # if end_date is NaN
            elif degree == "Bachelors" and pd.isna(row["end_date"]):
                current_academic_role = "Undergraduate Student"
            elif not current_academic_role and degree in degree_map:
                current_academic_role = degree_map[degree]

    # Check for end dates outside the loop
    has_end_date = all(
        not pd.isna(date) for date in education_experience_df["end_date"]
    )
    is_current_member = not has_end_date

    return current_academic_role, is_current_member

In [None]:
# Lists to store data for current and alumni members

current_people_page_list = []
alumni_people_page_list = []

# Looping through member directories to fetch and process member data
for member_dir in MEMBERS_DIR_PATH.glob("*"):
    print(member_dir)
    if not (member_info_fname := member_dir / "info.json").exists():
        continue
    else:
        member_info = json.load(open(member_info_fname, "r"))
    education_experience_df, social_links, current_project_title = parse_member_data(
        member_dir
    )
    current_academic_role, is_current_member = extract_member_academic_role(
        education_experience_df
    )

    first_name = member_info["first_name"]
    last_name = member_info["last_name"]
    nickname = member_info.get("nick_name", None)
    id = member_info["id"]
    image_path = member_info["image_path"]
    cover_image_path = member_info["cover_image_path"]

    name = f"{nickname if nickname else first_name} {last_name}"

    member_data = {
        "name": name,
        "academic_role": current_academic_role,
        "id": id,
        "current_project_title": current_project_title,
        "image_path": image_path,
        "cover_image_path": cover_image_path,
    }

    member_data.update(social_links)

    if is_current_member:
        current_people_page_list.append(member_data)
    else:
        alumni_people_page_list.append(member_data)

In [None]:
current_people_page_list
# alumni_people_page_list

In [None]:
# Rendering the people page template with data and saving it to a file
people_template = environment.get_template("people.html.j2")
# Passing lists to jinja2 template
people_content = people_template.render(
    general=data["general"],
    current_members=current_people_page_list,
    alumni_members=alumni_people_page_list,
)
people_html_path = HOSTING_PATH / "People.html"

with open(people_html_path, mode="w", encoding="utf-8") as people:
    people.write(people_content)

### Contact Page

In [None]:
contact_template = environment.get_template('contact.html.j2')
contact_html_path = HOSTING_PATH / "Contact.html"
contact_content = contact_template.render(general=data["general"], contact=data["contact"])
with open(contact_html_path, mode='w', encoding='utf-8') as contact:
    contact.write(contact_content)

### Support Page

In [None]:
support_template = environment.get_template('support.html.j2')
support_html_path = HOSTING_PATH / "Support.html"
support_content = support_template.render(general=data["general"], support=data["support"])
with open(support_html_path , mode='w', encoding='utf-8') as support:
    support.write(support_content)

### Research Front Page

For adding more columns in dataframe to render fronnt pages and individual article pages

In [None]:

columns_extended = article_columns_initial + ["author_id"]
content_df = load_content_from_files(columns_extended)
research_content_df = content_df[content_df['category'] != 'News'].sort_values(by=['category', 'date'], ascending=[True, False])
latest_content_df = get_latest_content_df(content_df)

In [None]:
research_template = environment.get_template("research.html.j2")
main_page_research_content = research_template.render(general=data["general"],
                                            content=research_content_df,
                                           research=data["research"] ,
                                           current_members=current_people_page_list)
research_html_path = HOSTING_PATH / "Research.html"
with open(research_html_path , mode="w", encoding="utf-8") as research:
        research.write(main_page_research_content)

In [None]:
sub_research_template = environment.get_template("sub_research_frontpage.html.j2")


for category in content_df.loc[content_df.category != "News", "category"].unique():
        sub_research_content = sub_research_template.render(general=data["general"], 
                                                            research=data["research"], 
                                                            content = latest_content_df,
                                                            category = category,
                                                            current_members=current_people_page_list
                                                            )
        folder_path = f"{HOSTING_PATH}/sub_research/{page_link(category.lower())}"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"{folder_path}.html", mode="w", encoding="utf-8") as sub_research:
            sub_research.write(sub_research_content)