In [1]:
import json
import os

In [None]:
import re

def extract_year(input_string):
    # This regular expression looks for four consecutive digits which represent the year
    match = re.search(r'\b\d{4}\b', input_string)
    if match:
        return match.group(0)  # Return the matched year as a string
    else:
        return "Year not found"

# Example usage
input_string = "Thu November 2nd 2023, 5:30 - 7:00pm"
year = extract_year(input_string)
print("Extracted Year:", year)


In [20]:
import json

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def generate_graph(data):
    nodes = []
    links = []
    event_id_map = {}
    link_id = 0

    for item in data["nodes"]:
        if "person" in item["labels"]:

            
            # Add person node
            person_node = {
                "labels": ["person"],
                "id": item["id"],
                "properties": item["properties"]
            }
            

            nodes.append(person_node)
            # Create links from person to events they attended
            for event in item["properties"].get("events", []):
                event_title = event["title"]
                if event_title not in event_id_map:
                    # If event is not already a node, add it
                    event_id = f"event_{len(event_id_map)}"
                    event_node = {
                        "labels": ["event"],
                        "id": event_id,
                        "properties": {
                            "name": event_title,
                            "url": event.get("link", "")
                        }
                    }
                    nodes.append(event_node)
                    event_id_map[event_title] = event_id
                else:
                    event_id = event_id_map[event_title]

                # Add link
                links.append({
                    "type": "people_attended_event",
                    "id": f"link_{link_id}",
                    "source": item["id"],
                    "target": event_id,
                    "properties": {
                        "value": 1
                    }
                })
                link_id += 1

    # make the value of nodes as links it has
    for node in nodes:
        node["value"] = sum(1 for link in links if link["source"] == node["id"])
        
    # update properties of event_node based on the original data json
    for node in nodes:
        if "event" in node["labels"]:
            for item in data["nodes"]:
                if "event" in item["labels"]:
                    if item["properties"]["name"] == node["properties"]["name"]:
                        node["properties"] = item["properties"]
                        break

    # extract year from all "date" of event nodes
    for node in nodes:
        if "event" in node["labels"]:
            node["properties"]["year"] = extract_year(node["properties"]["date"])


    return {"nodes": nodes, "links": links}

def save_graph_to_json(graph, file_path):
    with open(file_path, 'w') as file:
        json.dump(graph, file, indent=4)

# Example usage:
file_path = "js/cesta_event_people.json"
data = load_json_data(file_path)
graph = generate_graph(data)
output_path = "js/graph_output.js"

# out put a js
with open(output_path, 'w') as file:
    file.write(f"var graph = {json.dumps(graph, indent=4)};")


In [19]:

# print all years of events
# summarize he distribution

years = []
for node in graph["nodes"]:
    if "event" in node["labels"]:
        years.append(node["properties"]["year"])

print("Years of events:", years)
print("Distribution of years:")
# sort years based on the year
years = sorted(years)
# years
for year in set(years):
    print(f"{year}: {years.count(year)}")

    


Years of events: ['2024', '2019', '2019', '2019', '2019', '2024', '2017', '2024', '2024', '2024', '2024', '2024', '2024', '2022', '2021', '2021', '2020', '2020', '2019', '2024', '2018', '2018', '2017', '2017', '2016', '2023', '2022', '2022', '2022', '2023', '2020', '2019', '2018', '2017', '2023', '2023', '2022', '2022', '2022', '2018', '2017', '2021', '2020', '2022', '2022', '2022', '2022', '2022', '2022', '2021', '2020', '2018', '2017', '2017', '2023', '2022', '2021', '2018', '2018', '2022', '2022', '2022', '2022', '2020', '2018', '2022', '2021', '2022', '2022', '2020', '2021', '2019', '2021', '2021', '2021', '2020', '2020', '2019', '2017', '2021', '2019', '2021', '2021', '2021', '2023', '2023', '2021', '2018', '2021', '2024', '2021', '2021', '2021', '2022', '2021', '2020', '2021', '2021', '2021', '2020', '2020', '2021', '2020', '2019', '2024', '2023', '2021', '2022', '2020', '2020', '2018', '2018', '2018', '2018', '2021', '2024', '2023', '2024', '2023', '2021', '2024', '2024', '2020'