In [None]:
# Change the working directory to the desired location
#temporary, only for local
# import os
# os.chdir("/home/bumblebealu/groupwebsite_generator/")
# os.chdir("/Users/harshul/website clone/harshul/test/groupwebsite_generator")
# os.getcwd()

### Setup

In [None]:
import json
import os
import datetime

#### IDs

In [None]:
def process_member_jsons(members_directory, output_directory):
    output_file = 'people_list.json'  # Output file name
    output_path = os.path.join(output_directory, output_file)

    # Clear the output file if it exists
    if os.path.exists(output_path):
        with open(output_path, 'w') as output:
            output.truncate()

    # Create the output file if it doesn't exist
    with open(output_path, 'a') as output:
        if os.stat(output_path).st_size == 0:
            # Write the initial JSON structure if the file was just created
            json.dump({"people": [], "people_directories": []}, output)

    for member_dir in os.listdir(members_directory):
        member_path = os.path.join(members_directory, member_dir)
        if os.path.isdir(member_path):
            jsons_directory = os.path.join(member_path, 'jsons')
            if os.path.isdir(jsons_directory):
                for json_file in os.listdir(jsons_directory):
                    if json_file.endswith('info.json'):
                        json_path = os.path.join(jsons_directory, json_file)
                        with open(json_path, 'r') as file:
                            data = json.load(file)
                            if data.get('display'):
                                id_key = data.get('id')
                                with open(output_path, 'r+') as output:
                                    output_data = json.load(output)
                                    output_data['people'].append(f"members/{id_key}/{id_key}.json")
                                    output_data['people_directories'].append(f"members/{id_key}/jsons")
                                    output.seek(0)  # Move the file pointer to the beginning of the file
                                    output.truncate()  # Clear the file content
                                    json.dump(output_data, output, indent='        ')
                                    output.write('\n')  # Append a newline character

# Specify the directory containing the "members" directory
members_directory = '../group-data/members/'

# Specify the output directory where you want the "ids.json" file to be saved
output_directory = '../group-data/website_data/'

# Call the function to process the member JSON files
process_member_jsons(members_directory, output_directory)

#### Appending Individual Member's JSONs into one
###### Each member will have their own combined JSON that contains awards.json, basic_info.json, etc

In [None]:
# Read in the list of directories and JSON files from the people_list.json file
with open('../group-data/website_data/people_list.json') as f:
    file_list = json.load(f)

# Get the directory path
dir_path = os.path.dirname('../group-data/website_data')

# Loop through each directory in the list
for directory in file_list['people_directories']:
    # Get the directory name
    directory_name = os.path.basename(directory)
    # Remove any file extension from the directory name
    directory_name = directory_name.split('.')[0]
    # Create an empty dictionary to hold the combined JSON data for this directory
    combined_data = {}
    # Loop through each file in the directory
    for filename in os.listdir(os.path.join(dir_path, directory)):
        # Check if the file is a JSON file
        if filename.endswith('.json'):
            # Load the JSON data from the file
            with open(os.path.join(dir_path, directory, filename)) as f:
                json_data = json.load(f)
            # If the JSON data is a dictionary, update the combined_data dictionary with new key-value pairs
            if isinstance(json_data, dict):
                for key, value in json_data.items():
                    if key not in combined_data:
                        combined_data[key] = value
            # If the JSON data is a list, loop through each element and update the combined_data dictionary with new key-value pairs
            elif isinstance(json_data, list):
                for item in json_data:
                    if isinstance(item, dict):
                        for key, value in item.items():
                            if key not in combined_data:
                                combined_data[key] = value
    # Write the combined JSON data to a new file in the same directory as the original files
    with open(os.path.join(dir_path, directory.split('/')[0],directory.split('/')[1], directory.split('/')[1]+'.json'), 'w') as f:
        f.write("[")  # Add opening square bracket
        json.dump(combined_data, f, indent=4)
        f.write("]")  # Add closing square bracket


#### Combining People Categories

In [None]:
# Create the "common" folder if it doesn't exist
common_folder_path = '../group-data/members/common/'
os.makedirs(common_folder_path, exist_ok=True)

In [None]:
def filter_people_by_category(category):
    people = []

    # Load the list of JSON files
    with open('../group-data/website_data/people_list.json') as f:
        file_list = json.load(f)

    # Get the directory path
    dir_path = os.path.dirname('../group-data/website_data')

    # Loop through each file in the list
    for file_name in file_list['people']:
        # Construct the full path to the JSON file
        json_path = os.path.join(dir_path, file_name)
        with open(json_path) as f:
            # Load the data from the file
            data = json.load(f)
            # Check if the data has the specified category
            if any("category" in item and item["category"] == category for item in data):
                people.extend([item for item in data if "category" in item and item["category"] == category])

    # Generate the output file path based on the category
    output_file = f"../group-data/members/common/{category.lower().replace(' ', '_')}.json"

    # Write the combined data to the output JSON file
    with open(output_file, 'w') as f:
        json.dump(people, f, indent=4)


In [None]:
filter_people_by_category("Faculty")
filter_people_by_category("Undergraduate Students")
filter_people_by_category("Graduate Students")
filter_people_by_category("Postdoctoral Researchers")
filter_people_by_category("Researchers")
filter_people_by_category("Research Software Engineer")

#### Combining Research

In [None]:
# Create the "common" folder if it doesn't exist
combined_research_path = '../group-data/website_data/research/sub_research_data/combined'
os.makedirs(combined_research_path, exist_ok=True)

In [None]:
# Read the JSON file that lists the research categories
with open("../group-data/website_data/research_categories.json") as f:
    research_categories = json.load(f)["research_categories"]

# Loop through each research category
for category_path in research_categories:
    # Prepend the base directory path to the category path
    category_path = os.path.join("..", "group-data", "website_data", category_path)

    # Initialize an empty list to hold the data from each JSON file in the category
    category_data = []

    # Loop through each file in the category directory
    for filename in os.listdir(category_path):
        # Check if the file is a JSON file and not 'about.json'
        if filename.endswith(".json") and filename != "about.json":
            # Open the file and load its contents as JSON
            with open(os.path.join(category_path, filename)) as f:
                json_data = json.load(f)
            # Check the value of the "display" key
            if json_data.get("display") == True:
                # Extract the date value and convert it to a datetime object
                json_date = json_data.get("date")  # Replace "date" with the actual key holding the date value
                if json_date:
                    date_object = datetime.datetime.strptime(json_date, "%m-%d-%Y")
                    # Append the date and JSON data to the category list
                    category_data.append((date_object, json_data))

    # Sort the category data based on the date in descending order
    category_data.sort(key=lambda x: x[0] if x[0] is not None else datetime.datetime.min, reverse=True)

    # Check if 'about.json' exists in the category directory
    about_file_path = os.path.join(category_path, "about.json")
    if os.path.exists(about_file_path):
        # Open 'about.json' and load its contents as JSON
        with open(about_file_path) as f:
            about_data = json.load(f)
        # Insert the 'about.json' data at the beginning of the category list
        category_data.insert(0, (None, about_data))  # None for date in about.json


    # Write the combined data to a new JSON file for this category
    category_name = os.path.basename(category_path)
    with open(f"../group-data/website_data/research/sub_research_data/combined/{category_name}.json", "w") as f:
        sorted_category_data = [data[1] for data in category_data]
        json.dump(sorted_category_data, f, indent=4)

#### Appending News

In [None]:
# Specify the directory path where the JSON files are located
directory_path = "../group-data/website_data/news"

# Initialize an empty list to hold the data from each JSON file
combined_data = []

# Loop through each file in the directory
for filename in os.listdir(directory_path):
    # Check if the file is a JSON file
    if filename.endswith(".json") and filename != "combined_news.json":
        # Open the file and load its contents as JSON
        with open(os.path.join(directory_path, filename)) as f:
            json_data = json.load(f)
        # Check the value of the "display" key
        if json_data.get("display"):
            # Extract the date value and convert it to a datetime object
            json_date = json_data.get("date")  # Replace "date" with the actual key holding the date value
            if json_date:
                date_object = datetime.datetime.strptime(json_date, "%m-%d-%Y")
                # Append the date and JSON data to the combined list
                combined_data.append((date_object, json_data))
        else:
            # Exclude data with "display" value of False
            print(f"Skipping '{filename}' as display is False.")
            
# Sort the combined data based on the date in descending order
combined_data.sort(key=lambda x: x[0], reverse=True)

# Extract only the JSON data from the sorted list
sorted_combined_data = [data[1] for data in combined_data]

# Write the combined and sorted data to a new JSON file in the same directory
combined_file_path = os.path.join(directory_path, "combined_news.json")
with open(combined_file_path, "w") as f:
    json.dump(sorted_combined_data, f, indent=4)
