### This notebook consist of code for creating the html files for the website each time data is updated.

##### Set-up

In [5]:
# Change the working directory to the desired location
# temporary, only for local
import os
#os.chdir("/home/bumblebealu/groupwebsite_generator/")
os.chdir("/Users/harshul/website clone/harshul/test/groupwebsite_generator")
os.getcwd()

'/Users/harshul/website clone/harshul/test/groupwebsite_generator'

In [6]:
#Importing classes from the Jinja2 library to load and render templates.
import json
import os
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from jinja2.exceptions import UndefinedError
from pathlib import Path


In [7]:
#Function for creating proper html file names
def page_link(a):
    if ' ' in a:
        return a.replace(' ', '_')
    else:
        return a

In [8]:
MEMBERS_DIR_PATH = Path('../group-data/members/')
member_records = []

file_names = [
    'experiences.json',
    'education.json',
    'awards.json',
    'roles.json',
    'projects.json',
    'website_media.json',
    'posters.json',
    'publications.json',
    'outreach.json'
]

for member_dir in MEMBERS_DIR_PATH.glob('*'):
    # Check if the directory name starts with a dot and skip it if it does
    if member_dir.name.startswith('.'):
        continue
    
    member_record = json.load(open(member_dir / 'info.json'))
    member_json_dir = member_dir / 'jsons'
    
    current_position = {}
    for file_name in file_names:
        file_path = member_json_dir / file_name
        if file_path.exists():
            current_position = json.load(open(file_path))[-1]
            break

    member_record.update(current_position)
    member_records.append(member_record)


In [9]:
members_df=pd.DataFrame.from_records(member_records)

In [10]:
class ContentData:
    def __init__(self, article_id):
        self.article_id = article_id
        self.json_path = self._find_jsons_path(article_id)

    def _find_jsons_path(self, article_id):
        content_dir = "../group-data/website_data/content/"
        for file_name in os.listdir(content_dir):
            file_path = os.path.join(content_dir, file_name)
            if os.path.isfile(file_path):
                with open(file_path, "r") as f:
                    content_data = json.load(f)
                if content_data.get("article_id") == article_id:
                    return file_path

        raise ValueError(f"Article ID '{article_id}' not found.")

    def load_json(self):
        with open(self.json_path, "r") as f:
            data = json.load(f)
        return data


In [11]:
#Creating an instance of the Environment class that looks for templates. Page_link is set to the global variable so that it can be accessed by all templates
environment = Environment(loader=FileSystemLoader('templates/'),extensions=['jinja2.ext.loopcontrols'])
environment.globals['page_link'] = page_link
environment.globals['ContentData'] = ContentData

In [12]:
content_id_data = {"article_id": [], "category": [], "date": [], "tags": []}
content_directory = '../group-data/website_data/content'

for json_file in os.listdir(content_directory):
    if json_file.endswith('.json'):
        json_path = os.path.join(content_directory, json_file)
        with open(json_path, 'r') as file:
            info = json.load(file)
            if info.get('display'):
                content_id_data['article_id'].append(info.get('article_id'))
                content_id_data['category'].append(info.get('category'))
                content_id_data['date'].append(info.get('article_date'))
                content_id_data['tags'].append(info.get('tags'))

content_df = pd.DataFrame(content_id_data)
content_df['date'] = pd.to_datetime(content_df['date'], format='%m-%d-%Y')
content_df = content_df.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)

In [13]:
research_content_unsorted = content_df[content_df['tags'].apply(lambda x: any('research' in tag for tag in x))]
research_content = research_content_unsorted.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  research_content = research_content_unsorted.groupby('category').apply(lambda x: x.sort_values('date', ascending=False)).reset_index(drop=True)


In [14]:
news_content_unsorted = content_df[content_df['tags'].apply(lambda x: any('news' in tag for tag in x))]
news_content = news_content_unsorted.sort_values(by="date", ascending=False)

In [15]:
latest_content_df = pd.DataFrame()

for category in content_df.category.unique():
    latest_data = pd.Series(content_df[content_df.category == category].iloc[0])
    latest_content_df = latest_content_df._append(latest_data, ignore_index=True)

latest_content_df['date'] = pd.to_datetime(latest_content_df['date'], format='%m-%d-%Y')
latest_content_df = latest_content_df.sort_values(by='date', ascending=False)

In [16]:
json_files = ['general', 'homepage', 'research', 'support', 'contact']
data = {}

for json_file in json_files:
    try:
        with open(f"../group-data/website_data/{json_file}.json") as json_var:
            data[json_file] = json.load(json_var)
    except (FileNotFoundError, json.JSONDecodeError):
        pass

##### Homepage

In [17]:
homepage_template = environment.get_template('homepage.html.j2')

In [18]:
homepage_content = \
    homepage_template.render(general=data['general'],
                             homepage=data['homepage'],
                             recent_content=latest_content_df.to_dict(orient='records'))

In [19]:
with open('../kerzendorf-group.github.io/index.html', mode='w', encoding='utf-8') as Homepage:
    Homepage.write(homepage_content)

In [20]:
members_df = pd.DataFrame.from_records(member_records)
members_df


Unnamed: 0,first_name,last_name,nick_name,image_path,cover_image_path,introduction,id,institution,city,state,country,role,start_date,end_date,degree,subject,start-date,entry2,stare
0,Joshua,Shields,Josh,media/images/josh_photo.jpg,media/images/cover.jpg,Josh is a senior graduate student in astrophys...,josh_shields,Ohio state University,,Ohio,USA,Research assistant (astronomy),2016-01-01,2019-12-31,,,,,
1,Anirban,Dutta,,media/images/anirban_dutta.jpg,media/images/cover.jpg,Hi there! This is Anirban.,anirban_dutta,Michigan State University,East Lansing,Michigan,USA,Visiting Researcher,2023-09-01,,,,,,
2,Erin,Visser,,media/images/erin_visser_website_pic.jpg,media/images/cover.jpg,,erin_visser,Michigan State University,East Lansing,Michigan,USA,,2023-09-01,,Bachelors,"Physics and Mathematics, Advanced",,,
3,Vicente,Amado Olivo,,media/images/ESD_headshot.jpg,media/images/cover.jpg,I am a graduate student at Michigan State Univ...,vicente_amado,Michigan State University,East Lansing,Michigan,USA,,2017-01-01,2021-12-31,Bachelors,"Computational Mathematics, Science, and Engine...",,,
4,Yuki,Matsumura,,media/images/yuki_face.png,media/images/cover.jpg,"Hello, I am a first-year physics graduate stud...",yuki_matsumura,California Polytechnic State University,,California,USA,,2020-01-01,2022-12-31,Bachelors,Physics,,,
5,Andrew,Fullard,,media/images/me.jpg,media/images/cover.jpg,I joined the TARDIS group in 2020 and worked o...,andrew_fullard,ICER,East Lansing,Michigan,USA,Research Consultant,2022-01-01,,,,,,
6,Isaac,Smith,,media/images/isaac_image.jpg,media/images/cover.jpg,,isaac_smith,Michigan State University,East Lansing,Michigan,USA,,2020-01-01,2024-12-31,Bachelors,Physics and Mathematics,,,
7,Hayden,Monk,,media/images/hayden.jpg,media/images/cover.jpg,,hayden_monk,Michigan State University,East Lansing,Michigan,USA,,2022-09-01,,Bachelors,Astrophysics,,,
8,Atharva,Arya,,media/images/atharva.jpg,media/images/cover.jpg,I joined TARDIS as a GSoC 2021 student and I h...,atharva_arya,RCOEM,Nagpur,Maharashtra,India,,2019-01-01,2023-12-31,Bachelors,Engineering,,,
9,Richard,Dow,Tripp,media/images/richard.jpg,media/images/cover.jpg,"Hi, my name is Tripp, and I'm a computer scien...",richard_dow,"University of Minnesota, Linguistics Department",,,,Research Assistant,2022,2022,,,,,


##### People Page

In [21]:
members_df = pd.DataFrame.from_records(member_records)
people_template = environment.get_template("people.html.j2")
people_content = people_template.render(general=data["general"], members=members_df)


In [22]:
people_content = people_template.render(general=data["general"], members=members_df)


In [23]:
with open("../kerzendorf-group.github.io/People.html", mode="w", encoding="utf-8") as people:
    people.write(people_content)

##### Individual People Page

In [24]:
ind_person_template = environment.get_template("individual_person.html.j2")

In [25]:
for person in people_df['id']:
            filename = f"../kerzendorf-group.github.io/members/{ person }/{ person }.html"
            ind_person_content = ind_person_template.render(general=data["general"], 
                                                            member_id=person, 
                                                            content=content_df.to_dict(orient='records'))
            with open(filename, mode="w", encoding="utf-8") as page:
                page.write(ind_person_content)

NameError: name 'people_df' is not defined

##### Research Page

In [None]:
research_template = environment.get_template("research.html.j2")

In [None]:
main_page_research_content = research_template.render(general=data["general"],
                                            content=research_content)

In [None]:
with open("../kerzendorf-group.github.io/Research.html", mode="w", encoding="utf-8") as research:
        research.write(main_page_research_content)

In [None]:
sub_research_template = environment.get_template("sub_research_frontpage.html.j2")

In [None]:
for category in content_df.loc[content_df.category != "News", "category"].unique():
        sub_research_content = sub_research_template.render(general=data["general"], 
                                                            research_general=data["research"], 
                                                            content = research_content,
                                                            category = category
                                                            )
        folder_path = f"../kerzendorf-group.github.io/sub_research/{page_link(category.lower())}"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"../kerzendorf-group.github.io/sub_research/{page_link(category.lower())}.html", mode="w", encoding="utf-8") as sub_research:
            sub_research.write(sub_research_content)

##### Individual Research Page

In [None]:
template_no_twitter = environment.get_template("research_page_no_twitter.html.j2")

In [None]:
for ind_research_keys, ind_research_values in research_content.iterrows():
    if "news" not in ind_research_values.category.lower():
        ind_research_content = template_no_twitter.render(general=data["general"], 
                                                          member_ids = people_df['id'],
                                                          nonmem_ids = non_mem_df['id'],
                                                          content = ind_research_values
                                                          
                                                            )
        folder_path = f"../kerzendorf-group.github.io/sub_research/{page_link(ind_research_values.category.lower())}"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"{ folder_path }/{page_link(ind_research_values.article_id.lower())}.html", mode="w", encoding="utf-8") as ind_research_page:
            ind_research_page.write(ind_research_content)

##### News Page

In [None]:
news_content

In [None]:
news_template = environment.get_template("news.html.j2")

In [None]:
news_page_content = news_template.render(general=data["general"],
                                         content=news_content,
                                         member_ids=people_df['id'],
                                         nonmem_ids = non_mem_df['id'],
                                         category="News")

In [None]:
with open("../kerzendorf-group.github.io/News.html", mode="w", encoding="utf-8") as news:
        news.write(news_page_content)

##### Individual News Pages

In [None]:
news_template_no_twitter = environment.get_template("news_page_no_twitter.html.j2")
#news_template_twitter = environment.get_template("news_page_twitter.html.j2")

In [None]:
for ind_news_keys, ind_news_values in news_content.iterrows():
        ind_news_content = news_template_no_twitter.render(general=data["general"], 
                                                          member_ids = people_df['id'],
                                                          nonmem_ids = non_mem_df['id'],
                                                          content = ind_news_values
                                                            )
        folder_path = f"../kerzendorf-group.github.io/news/"
        os.makedirs(folder_path, exist_ok=True)
        with open(f"{ folder_path }/{page_link(ind_news_values.article_id.lower())}.html", mode="w", encoding="utf-8") as ind_news_page:
            ind_news_page.write(ind_news_content)

##### Support Page

In [None]:
support_template = environment.get_template('support.html.j2')

In [None]:
support_content = support_template.render(general=data["general"], support=data["support"])

In [None]:
with open('../kerzendorf-group.github.io/Support.html', mode='w', encoding='utf-8') as support:
    support.write(support_content)

##### Contact

In [None]:
contact_template = environment.get_template('contact.html.j2')

In [None]:
contact_content = contact_template.render(general=data["general"], contact=data["contact"])

In [None]:
with open('../kerzendorf-group.github.io/Contact.html', mode='w', encoding='utf-8') as contact:
    contact.write(contact_content)