In [76]:
import logging

from langchain.prompts import ChatPromptTemplate
import json
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
import re

from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()
import instructor
from openai import OpenAI


aclient = instructor.patch(OpenAI())

from typing import Optional, List
from pydantic import BaseModel, Field


In [143]:
input_article_one = """ n the nicest possible way, Britons have always been a bit silly about animals. “Keeping pets, for the English, is not so much a leisure activity as it is an entire way of life,” wrote the anthropologist Kate Fox in Watching the English, nearly 20 years ago. Our dogs, in particular, have been an acceptable outlet for emotions and impulses we otherwise keep strictly controlled – our latent desire to be demonstratively affectionate, to be silly and chat to strangers. If this seems like an exaggeration, consider the different reactions you’d get if you struck up a conversation with someone in a park with a dog, versus someone on the train.

Indeed, British society has been set up to accommodate these four-legged ambassadors. In the UK – unlike Australia, say, or New Zealand – dogs are not just permitted on public transport but often openly encouraged. Many pubs and shops display waggish signs, reading, “Dogs welcome, people tolerated”, and have treat jars on their counters. The other day, as I was waiting outside a cafe with a friend’s dog, the barista urged me to bring her inside.

For years, Britons’ non-partisan passion for animals has been consistent amid dwindling common ground. But lately, rather than bringing out the best in us, our relationship with dogs is increasingly revealing us at our worst – and our supposed “best friends” are paying the price.

As with so many latent traits in the national psyche, it all came unleashed with the pandemic, when many people thought they might as well make the most of all that time at home and in local parks with a dog. Between 2019 and 2022, the number of pet dogs in the UK rose from about nine million to 13 million. But there’s long been a seasonal surge around this time of year, substantial enough for the Dogs Trust charity to coin its famous slogan back in 1978: “A dog is for life, not just for Christmas.”

Green spaces, meanwhile, have been steadily declining, and now many of us have returned to the office, just as those “pandemic dogs” are entering their troublesome teens. It’s a combustible combination and we are already seeing the results: the number of dog attacks recorded by police in England and Wales rose by more than a third between 2018 and 2022.

At the same time, sites such as Pets4Homes.co.uk are replete with listings for dogs that, their owners accept “with deep regret”, are no longer suited to their lifestyles now that lockdown is over. It may have felt as if it would go on for ever, but was there ever any suggestion it was going to last the average dog’s lifespan of a decade?

Living beings are being downgraded to mere commodities. You can see it reflected the “designer” breeds currently in fashion, the French bulldogs and pugs that look cute but spend their entire lives in discomfort. American XL bully dogs, now so controversial, are often sought after as a signifier of masculinity: roping an entire other life in service of our egos. Historically, many of Britain’s most popular breeds evolved to hunt vermin, retrieve game, herd, or otherwise do a specific job alongside humans; these days we are breeding and buying them for their aesthetic appeal.

Underpinning this is a shift to what was long disdained as the “American” approach: treating pets as substitutes for children. In the past in Britain, dogs were treasured on their own terms, for the qualities that made them dogs, and as such, sometimes better than people: their friendliness and trustingness and how they opened up the world for us. They were indulged, certainly – by allowing them on to the sofa or in our beds, for instance, when we’d sworn we never would – but in ways that did not negate or deny their essential otherness.

Now we have more dogs of such ludicrous proportions, they struggle to function as dogs at all – and we treat them accordingly, indulging them as we would ourselves: by buying unnecessary things. The total spend on pets in the UK has more than doubled in the past decade, reaching nearly £10bn last year. That huge rise has not just come from essentials: figures from the marketing agency Mintel suggest that one in five UK owners like their pet to “keep up with the latest trends” in grooming or, heaven forbid, outfits.

These days pet “boutiques” – like the one that recently opened on my street in Norwich, selling “cold-pressed” dog treats, “paw and nose balms” and spa services – are a widespread sign of gentrification. But it’s not just wealthier areas: this summer in Great Yarmouth, one of the most deprived towns in the country, I noticed seaside stalls selling not one but two brands of ice-cream for dogs.

It suggests dog-lovers have become untethered from their companions’ desires, let alone their needs. Let’s be honest: most dogs would be thrilled to bits to be eating a paper bag, or even their own faeces. And although they are certainly delighted by ice-cream, they don’t need it. But the ways we ourselves find solace – in consumption, by indulging our simian “treat brain” with things that we don’t need and/or aren’t good for us – we have simply extended to our pets.

It’s hard not to see the rise in dog-friendly restaurants, cinema screenings and even churches as similar to the ludicrous expenditure: a way to placate the two-legged being on the end of the lead (regardless of the experience of others in the vicinity).

Meanwhile, many dogs suffer daily deprivation, their worlds made small and monotonous by our busy modern schedules. These are social animals: it’s not natural for them to live without other dogs, let alone in an empty house for eight hours a day, Monday to Friday. If we are besieged by badly behaved dogs, the cause isn’t hard to pinpoint. Many behavioural problems can be alleviated and even addressed by sufficient exercise, supervision and consistent routines, but instead of organising our lives so that our pets may thrive, we show our love with a Halloween-themed cookie, or a new outfit for Instagram likes.

It’s easy to forget that we are sharing our homes with a descendant of the wolf when it is dressed in sheep’s clothing; but the more we learn about animals, the clearer it becomes that our treatment of them, simultaneously adoring and alienated, means they are leading strange, unsatisfying simulacra of the lives they ought to lead.

But for as long as we choose to share our lives with pets, the bar should be the same as for any relationship we value: being prepared to make sacrifices for their wellbeing, prioritising quality time and care, and loving them as they are – not for how they reflect on us, or how we’d like them to be.


"""

In [144]:
input_article_two = """Lee Parkin had been the proud owner of his terrier-spaniel cross Izzy for nearly 10 years when he stepped out for what would be his last walk with his beloved pet.

He was walking Izzy near his home in Doncaster when an XL bully pounced on her, mounting a 20-minute attack and ultimately killing the dog in front of Parkin, who desperately intervened in vain.

“It was such a nice day,” he said. “We were walking a normal field where I go, and I saw this dog loose. It appeared wild by its demeanour.”

Parkin, 50, took his dog through a gate but found himself cornered. The dog approached and started circling them. And then, he says, “it just grabbed her”.

“I’ve never encountered a bigger, stronger dog before in my life,” he says. “I’ve dealt with dogs attacking another dog before.”

Lee Parkin and his dog Izzy
Lee Parkin and his dog Izzy. Photograph: Lee Parkin
Parkin did his best to fight the dog off. “I smashed both hands against it, I twisted its balls, I kicked it in its back end. It did nothing whatsoever. I just shouted for help.”

At first there were no other people around, but “all of a sudden” there were about three other men, possibly including the owner, attempting to remove the animal.

A passerby gave him a lift to the vet but Izzy was “bleeding so profusely” he could hear her choking on her own blood. Her bones had been crushed.

The owners were handed a caution and the dog remains alive and living nearby.

“It was dangerously out of control,” Parkin says of the XL bully. “I’ve been brought up with dogs all my life. There’s no place for this type of dog in society.”

He welcomes the incoming ban on XL bullies but says he does not think it is enough and it will not work.

He believes the majority of XL bully owners will not be fazed by the ban and will keep their dogs and ignore the new law and regulations.

“The only effective thing that I’ve seen the police doing is turning up and shooting these dogs, which is what I think they should be doing,” Parkin adds.

He was left with significant mental impacts from the attack and was subsequently diagnosed with post-traumatic stress disorder. He received counselling but still struggles with walking dogs, and often rises very early in the morning to avoid other owners. He also carries a dog spray.

Marie Hay’s siberian husky, Naevia, survived a savage attack on the beach in Redcar on the North Yorkshire coast by two XL bullies – but has been left with life-changing injuries. Hay, like Parkin, has also been left with mental scars.

The owner of the dog that attacked seven-year-old Naevia is facing a criminal trial next year.

“We must have only been three minutes and the guy pulls up and basically he’s just got his dogs out of the car. They run down to the bottom of the beach and one starts to run towards Naevia.

“The owner turned to me and said: ‘They’re friendly, don’t worry,’ because I must have pulled a face at the size of the dog.

skip past newsletter promotion
Sign up to First Edition

Free daily newsletter
Our morning email breaks down the key stories of the day, telling you what’s happening and why it matters

Enter your email address
Sign up
Privacy Notice: Newsletters may contain info about charities, online ads, and content funded by outside parties. For more information see our Privacy Policy. We use Google reCaptcha to protect our website and the Google Privacy Policy and Terms of Service apply.
after newsletter promotion
“But then the first one jumped on Naevia’s chest and just started tearing into her.

“So she was screaming, screaming like a baby. And then the other one just came out of nowhere. The attack lasted about 12 minutes.”

An American bully XL with cropped ears. The practice is illegal in England and Wales, but it is still carried out by unscrupulous owners.
Perfect pets or dangerous dogs? The sudden, surprising rise of American bully XLs
Read more
Hay said several people attempted to remove the dogs but were initially unsuccessful. They attempted to lift the dogs by the legs and her 20-year-old daughter was bitten, as were other people who intervened.

The owner eventually placed a harness on one of them and put it in the car, while Hay had to walk the other dog back to the car on a lead.

Naevia lost 83% of her blood. “She was bleeding to death on the beach … she had hundreds of bite marks all over, she had an incision that ripped her chest open.

“She had to have between eight and 10 operations. She’s now in kidney failure because of the stress that it caused on her kidneys. She had to have two blood transfusions.”

Hay said the vet bills were more than £30,000, which she has been able to cover through donations on a fundraising website.

Like Parkin, Hay struggles to go out for walks now, due to the stress caused by the incident.

“I carry a full kit that I’ve made myself, it’s got a rape alarm, a couple of extra dog leads … I’m constantly in fear.”

Hay says she is “100%” supportive of the new ban. She says she accepts that a dog’s behaviour is partly down to the owners but is confident the breed plays a part too."""

In [145]:
""" We classify input based on the available document types"""

classification = {
    "Natural Language Text": {
        "type": "TEXT",
        "subclass": [
            "Articles, essays, and reports",
            "Books and manuscripts",
            "News stories and blog posts",
            "Research papers and academic publications",
            "Social media posts and comments",
            "Website content and product descriptions",
            "Personal narratives and stories"
        ]
    },
    "Structured Documents": {
        "type": "TEXT",
        "subclass": [
            "Spreadsheets and tables",
            "Forms and surveys",
            "Databases and CSV files"
        ]
    },
    "Code and Scripts": {
        "type": "TEXT",
        "subclass": [
            "Source code in various programming languages",
            "Shell commands and scripts",
            "Markup languages (HTML, XML)",
            "Stylesheets (CSS) and configuration files (YAML, JSON, INI)"
        ]
    },
    "Conversational Data": {
        "type": "TEXT",
        "subclass": [
            "Chat transcripts and messaging history",
            "Customer service logs and interactions",
            "Conversational AI training data"
        ]
    },
    "Educational Content": {
        "type": "TEXT",
        "subclass": [
            "Textbook content and lecture notes",
            "Exam questions and academic exercises",
            "E-learning course materials"
        ]
    },
    "Creative Writing": {
        "type": "TEXT",
        "subclass": [
            "Poetry and prose",
            "Scripts for plays, movies, and television",
            "Song lyrics"
        ]
    },
    "Technical Documentation": {
        "type": "TEXT",
        "subclass": [
            "Manuals and user guides",
            "Technical specifications and API documentation",
            "Helpdesk articles and FAQs"
        ]
    },
    "Legal and Regulatory Documents": {
        "type": "TEXT",
        "subclass": [
            "Contracts and agreements",
            "Laws, regulations, and legal case documents",
            "Policy documents and compliance materials"
        ]
    },
    "Medical and Scientific Texts": {
        "type": "TEXT",
        "subclass": [
            "Clinical trial reports",
            "Patient records and case notes",
            "Scientific journal articles"
        ]
    },
    "Financial and Business Documents": {
        "type": "TEXT",
        "subclass": [
            "Financial reports and statements",
            "Business plans and proposals",
            "Market research and analysis reports"
        ]
    },
    "Advertising and Marketing Materials": {
        "type": "TEXT",
        "subclass": [
            "Ad copies and marketing slogans",
            "Product catalogs and brochures",
            "Press releases and promotional content"
        ]
    },
    "Emails and Correspondence": {
        "type": "TEXT",
        "subclass": [
            "Professional and formal correspondence",
            "Personal emails and letters"
        ]
    },
    "Metadata and Annotations": {
        "type": "TEXT",
        "subclass": [
            "Image and video captions",
            "Annotations and metadata for various media"
        ]
    },
    "Language Learning Materials": {
        "type": "TEXT",
        "subclass": [
            "Vocabulary lists and grammar rules",
            "Language exercises and quizzes"
        ]
    },
    "Audio Content": {
    "type": "AUDIO",
    "subclass": [
        "Music tracks and albums",
        "Podcasts and radio broadcasts",
        "Audiobooks and audio guides",
        "Recorded interviews and speeches",
        "Sound effects and ambient sounds"
    ]
    },
    "Image Content": {
        "type": "IMAGE",
        "subclass": [
            "Photographs and digital images",
            "Illustrations, diagrams, and charts",
            "Infographics and visual data representations",
            "Artwork and paintings",
            "Screenshots and graphical user interfaces"
        ]
    },
    "Video Content": {
        "type": "VIDEO",
        "subclass": [
            "Movies and short films",
            "Documentaries and educational videos",
            "Video tutorials and how-to guides",
            "Animated features and cartoons",
            "Live event recordings and sports broadcasts"
        ]
    },
    "Multimedia Content": {
        "type": "MULTIMEDIA",
        "subclass": [
            "Interactive web content and games",
            "Virtual reality (VR) and augmented reality (AR) experiences",
            "Mixed media presentations and slide decks",
            "E-learning modules with integrated multimedia",
            "Digital exhibitions and virtual tours"
        ]
    },
    "3D Models and CAD Content": {
        "type": "3D_MODEL",
        "subclass": [
            "Architectural renderings and building plans",
            "Product design models and prototypes",
            "3D animations and character models",
            "Scientific simulations and visualizations",
            "Virtual objects for AR/VR environments"
        ]
    },
    "Procedural Content": {
        "type": "PROCEDURAL",
        "subclass": [
            "Tutorials and step-by-step guides",
            "Workflow and process descriptions",
            "Simulation and training exercises",
            "Recipes and crafting instructions"
        ]
    }
}

system_prompt = f""" Classify content based on the following categories: {str(classification)}"""

In [146]:
class CognitiveLayerSubgroup(BaseModel):
    """ CognitiveCategorySubgroup in a general category """
    id: int
    name:str
    data_type:str


class CognitiveCategory(BaseModel):
    """Cognitive  category"""
    name:str
    cognitive_subgroups: List[CognitiveLayerSubgroup] = Field(..., default_factory=list)

In [147]:
def classify_input(input) -> CognitiveCategory:
    """Classify input"""
    model = "gpt-4-1106-preview"
    user_prompt = f"Use the given format to extract information from the following input: {input}."


    out = aclient.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": user_prompt,
            },
            {
                "role": "system",
                "content": system_prompt,
            },
            
            {
                "role": "system",
                "content": "Make sure both values are returned. Incomplete results will result in termination",
            },
        ],
        response_model=CognitiveCategory,
    )
    return out

In [148]:
required_layers_one = classify_input(input = input_article_one)
print(required_layers_one.json())

{"name":"Natural Language Text","cognitive_subgroups":[{"id":1,"name":"News stories and blog posts","data_type":"TEXT"}]}


In [149]:
required_layers_two = classify_input(input = input_article_two)
print(required_layers_two.json())

{"name":"Natural Language Text","cognitive_subgroups":[{"id":1,"name":"News stories and blog posts","data_type":"TEXT"},{"id":2,"name":"Personal narratives and stories","data_type":"TEXT"}]}


In [84]:

def system_prompt_temp(required_layers):
    system_prompt = f"""
    You are tasked with analyzing a {required_layers.dict()['cognitive_subgroups'][0]['data_type']} files, especially in a multilayer network context for tasks such as analysis, categorization, and feature extraction, various layers can be incorporated to capture the depth and breadth of information contained within the {required_layers.dict()['cognitive_subgroups'][0]['data_type']} 
    These layers can help in understanding the content, context, and characteristics of the {required_layers.dict()['cognitive_subgroups'][0]['data_type']}
    Your objective is to extract meaningful layers of information that will contribute to constructing a detailed multilayer network or knowledge graph.
    Approach this task by considering the unique characteristics and inherent properties of the data at hand.
    VERY IMPORTANT: The context you are working in is {required_layers.dict()['name']} and specific domain you are extracting data on is {required_layers.dict()['cognitive_subgroups'][0]['name']}
    
    Guidelines for Layer Extraction:
    
    Take into account: The content type that in this case is: {required_layers.dict()['cognitive_subgroups'][0]['name']} should play a major role in how you decompose into layers.
    
    Based on your analysis, define and describe the layers you've identified, explaining their relevance and contribution to understanding the dataset. Your independent identification of layers will enable a nuanced and multifaceted representation of the data, enhancing applications in knowledge discovery, content analysis, and information retrieval.
    
    ."""
    return system_prompt

In [85]:
class CognitiveLayerSubgroup(BaseModel):
    """ CognitiveLayerSubgroup in a general layer """
    id: int
    name:str
    description: str


class CognitiveLayer(BaseModel):
    """Cognitive  layer"""
    category_name:str
    cognitive_layers: List[CognitiveLayerSubgroup] = Field(..., default_factory=list)

In [86]:
def determine_layers(input, required_layers) -> CognitiveLayer:
    """Classify input"""
    model = "gpt-4-1106-preview"
    user_prompt = f"Use the given format to extract information from the following input: {input}."


    out = aclient.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": user_prompt,
            },
            {
                "role": "system",
                "content": system_prompt_temp(required_layers),
            },
        ],
        response_model=CognitiveLayer,
    )
    return out

In [87]:
cognitive_layers_one = determine_layers(input=input_article_one, required_layers= required_layers_one)

In [88]:
print(cognitive_layers_one)

category_name='Exploring British Attitudes Toward Pets' cognitive_layers=[CognitiveLayerSubgroup(id=1, name='Cultural Practices', description='Exploration of British cultural practices regarding pets, highlighting the affection and accommodations made for dogs in public spaces, and the sociocultural emphasis on pet ownership.'), CognitiveLayerSubgroup(id=2, name='Behavioral Shifts', description='Identification and analysis of changing behaviors and attitudes toward pets in Britain, particularly during and after the COVID-19 pandemic.'), CognitiveLayerSubgroup(id=3, name='Animal Welfare Concerns', description='Insights into the repercussions of current trends in pet ownership on animal welfare, including the impact of designer breeds and the commodification of pets.'), CognitiveLayerSubgroup(id=4, name='Human-Pet Dynamics', description='Discussion of the evolving relationship between humans and pets, focusing on the anthropomorphization of animals, and the appearance versus functionalit

In [89]:
cognitive_layers_two = determine_layers(input=input_article_two, required_layers=required_layers_two)

In [90]:
print(cognitive_layers_two)

category_name='News stories and blog posts' cognitive_layers=[CognitiveLayerSubgroup(id=1, name='Topical Context', description='This layer encapsulates the main subject matter of the text, which involves animal attacks, specifically dog attacks, and their legal and emotional repercussions.'), CognitiveLayerSubgroup(id=2, name='Geographical Context', description='The layer that locates the events mentioned in the text geographically, including specific places like Doncaster, Redcar, and the North Yorkshire coast in England.'), CognitiveLayerSubgroup(id=3, name='Temporal Context', description='This layer provides temporal references pertinent to the stories related to the individual attacks and subsequent events, such as the upcoming criminal trial or the historical context of the pet ownership.'), CognitiveLayerSubgroup(id=4, name='Personal Testimony', description='A layer of direct quotes and personal experiences as recounted by the individuals involved in the incidents, emphasizing th

In [91]:
cognitive_layers_one = [layer_subgroup.name for layer_subgroup in cognitive_layers_one.cognitive_layers]

print("Extracted Layer Names:", cognitive_layers_one)

Extracted Layer Names: ['Cultural Practices', 'Behavioral Shifts', 'Animal Welfare Concerns', 'Human-Pet Dynamics', 'Economic Impact', 'Social Commentary', 'Ethical Considerations', 'Comparative Analysis', 'Public Policy Implications']


In [92]:
cognitive_layers_two = [layer_subgroup.name for layer_subgroup in cognitive_layers_two.cognitive_layers]

print("Extracted Layer Names:", cognitive_layers_two)

Extracted Layer Names: ['Topical Context', 'Geographical Context', 'Temporal Context', 'Personal Testimony', 'Legal and Policy Framework', 'Socioemotional Impact', 'Economic Impact', 'Community Reaction', 'Advocacy and Activism', 'Incident Details']


In [93]:
def system_prompt(layer:str=None)->str: 
    return f"""You are a top-tier algorithm
designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- **Edges** represent relationships between concepts. They're akin to Wikipedia links.
- The aim is to achieve simplicity and clarity in the
knowledge graph, making it accessible for a vast audience.
YOU ARE ONLY EXTRACTING DATA FOR COGNITIVE LAYER {layer}
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
  - For example, when you identify an entity representing a person,
   always label it as **"person"**.
  Avoid using more specific terms like "mathematician" or "scientist".
  - Include event, entity, time, or action nodes to the category.
  - Classify the memory type as episodic or semantic.
- **Node IDs**: Never utilize integers as node IDs.
    Node IDs should be names or human-readable identifiers found in the text.
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information,
should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**:
Do not create separate nodes for dates or numerical values.
 Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**:
When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
always use the most complete identifier for that entity throughout the knowledge graph.
 In this example, use "John Doe" as the entity ID.
Remember, the knowledge graph should be coherent and easily understandable,
 so maintaining consistency in entity references is crucial.
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination"""

In [100]:
import instructor
from openai import OpenAI

from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

aclient = instructor.patch(OpenAI())

from typing import Optional, List
from pydantic import BaseModel, Field


class Node(BaseModel):
    """Node in a knowledge graph."""
    id: int
    description: str
    category: str
    memory_type: str
    created_at: Optional[float] = None
    summarized: Optional[bool] = None


class Edge(BaseModel):
    """Edge in a knowledge graph."""
    source: int
    target: int
    description: str
    created_at: Optional[float] = None
    summarized: Optional[bool] = None


class KnowledgeGraph(BaseModel):
    """Knowledge graph."""
    nodes: List[Node] = Field(..., default_factory=list)
    edges: List[Edge] = Field(..., default_factory=list)


def generate_graph(input, layer:str=None) -> KnowledgeGraph:
    """Generate a knowledge graph from a user query."""
    model = "gpt-4-1106-preview"
    user_prompt = f"Use the given format to extract information from the following input: {input}."


    out = aclient.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": user_prompt,
            },
            {
                "role": "system",
                "content": system_prompt(layer=layer),
            },
            {
                "role": "system",
                "content": "Must include both nodes and edges",
            },
        ],
        response_model=KnowledgeGraph,
    )
    return out

In [101]:

def create_layer_graph(input, layer_list):
    layer_graphs = []
    
    for layer in layer_list[:3]:
        print("Layer processed is:", str(layer))
        layer_graph = generate_graph(input=input, layer= layer)
        print("Layer graph is:", str(layer_graph))
        layer_graphs.append(layer_graph)
    return layer_graphs



In [102]:
layer_1_graph = create_layer_graph(input_article_one, cognitive_layers_one)


Layer processed is: Cultural Practices
Layer graph is: nodes=[Node(id=1, description='In British culture, keeping pets is an integral way of life and an outlet for emotions, with a particular fondness for dogs', category='Cultural Practices', memory_type='semantic', created_at=None, summarized=None), Node(id=2, description='Dogs serve as an acceptable outlet for typically controlled British emotions, facilitating affection and sociability', category='Cultural Practices', memory_type='semantic', created_at=None, summarized=None), Node(id=3, description="Public spaces in the UK are accommodating of dogs, with signs such as 'Dogs welcome, people tolerated' and treats offered in establishments", category='Cultural Practices', memory_type='semantic', created_at=None, summarized=None), Node(id=4, description='Dogs act as social catalysts, making it more acceptable to converse with strangers in parks rather than more formal settings like trains', category='Cultural Practices', memory_type='se

In [103]:
layer_2_graph = create_layer_graph(input_article_two, cognitive_layers_two)

Layer processed is: Topical Context
Layer graph is: nodes=[Node(id=1, description='Lee Parkin, 50, owner of the late terrier-spaniel cross Izzy, suffers from PTSD', category='person', memory_type='episodic', created_at=None, summarized=None), Node(id=2, description='Izzy, a terrier-spaniel cross owned by Lee Parkin, killed by an XL bully', category='animal', memory_type='episodic', created_at=None, summarized=None), Node(id=3, description='An XL bully attacked and killed Izzy during a walk with Lee Parkin in Doncaster', category='event', memory_type='episodic', created_at=None, summarized=None), Node(id=4, description='Doncaster, the location where Izzy was attacked and killed by an XL bully', category='location', memory_type='semantic', created_at=None, summarized=None), Node(id=5, description="Naevia, Marie Hay's Siberian husky, survived an attack by two XL bullies with life-changing injuries", category='animal', memory_type='episodic', created_at=None, summarized=None), Node(id=6, d

In [203]:
import networkx as nx
import uuid
from datetime import datetime

def create_user_content_graph(user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):

    category_name = required_layers.dict()['name']
    subgroup_names = [subgroup['name'] for subgroup in required_layers.dict()['cognitive_subgroups']]

    
    # Construct the additional_categories structure
    additional_categories = {
        category_name: subgroup_names
}

    # Define default fields for all nodes if not provided
    if default_fields is None:
        default_fields = {
            'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

    # Merge custom user properties with default properties; custom properties take precedence
    user_properties = {**default_fields, **(custom_user_properties or {})}

    # Default content categories
    content_categories = {
        "Temporal": ["Historical events", "Schedules and timelines"],
        "Positional": ["Geographical locations", "Spatial data"],
        "Propositions": ["Hypotheses and theories", "Claims and arguments"],
        "Personalization": ["User preferences", "User information"]
    }

    # Update content categories with any additional categories provided
    if additional_categories:
        content_categories.update(additional_categories)

    G = existing_graph if existing_graph else nx.MultiDiGraph()

    # Check if the user node already exists, if not, add the user node with properties
    if not G.has_node(user_id):
        G.add_node(user_id, **user_properties)

    # Add or update content category nodes and their edges
    for category, subclasses in content_categories.items():
        category_properties = {**default_fields, 'type': 'category'}

        # Add or update the category node
        if not G.has_node(category):
            G.add_node(category, **category_properties)
            G.add_edge(user_id, category, relationship='created')

        # Add or update subclass nodes and their edges
        for subclass in subclasses:
            # Using both category and subclass names to ensure uniqueness within categories
            subclass_node_id = f"{category}:{subclass}"

            # Check if subclass node exists before adding, based on node content
            if not any(subclass == data.get('content') for _, data in G.nodes(data=True)):
                subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
                G.add_node(subclass_node_id, **subclass_properties)
                G.add_edge(category, subclass_node_id, relationship='includes')

    return G

    # # Add content category nodes and their edges
    # for category, subclasses in content_categories.items():
    #     category_properties = {**default_fields, 'type': 'category'}
    #     G.add_node(category, **category_properties)
    #     G.add_edge(user_id, category, relationship='created')

    #     # Add subclass nodes and their edges
    #     for subclass in subclasses:
    #         unique_id = str(uuid.uuid4())
    #         subclass_node_id = f"{subclass} - {unique_id}"
    #         subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
    #         G.add_node(subclass_node_id, **subclass_properties)
    #         G.add_edge(category, subclass_node_id, relationship='includes')

    # return G



In [264]:
G = None

In [265]:
# Example usage
user_id = 'user123'
custom_user_properties = {
    'username': 'exampleUser',
    'email': 'user@example.com'
}

# additional_categories = {
#     "Natural Language Text": ["Articles, essays, and reports", "Books and manuscripts"]
# }

G = create_user_content_graph(user_id, custom_user_properties, required_layers_one)

# Accessing the graph
print("Nodes in the graph:")
print(G.nodes(data=True))
print("\nEdges in the graph:")
print(G.edges(data=True))

Nodes in the graph:
[('user123', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'username': 'exampleUser', 'email': 'user@example.com'}), ('Temporal', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'category'}), ('Temporal:Historical events', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'subclass', 'content': 'Historical events'}), ('Temporal:Schedules and timelines', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'subclass', 'content': 'Schedules and timelines'}), ('Positional', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'category'}), ('Positional:Geographical locations', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'subclass', 'content': 'Geographical locations'}), ('Positional:Spatial data', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type

In [273]:
required_layers_one

CognitiveCategory(name='Natural Language Text', cognitive_subgroups=[CognitiveLayerSubgroup(id=1, name='News stories and blog posts', data_type='TEXT')])

In [268]:
B = create_user_content_graph(user_id, custom_user_properties, required_layers_two, existing_graph=G)

In [269]:
print(B)

MultiDiGraph with 16 nodes and 15 edges


In [272]:
import graphistry
import pandas as pd

# Assuming Graphistry is already configured with API key
# graphistry.register(api=3, username='your_username', password='your_password')

# Convert NetworkX graph to a Pandas DataFrame
edges = nx.to_pandas_edgelist(B)
graphistry.register(api=3, username='Vasilije1990', password='Q@HLdgv5SMUsGxy') 

# Visualize the graph
graphistry.edges(edges, 'source', 'target').plot()

In [231]:
print(G)

MultiDiGraph with 15 nodes and 14 edges


In [274]:
def append_data_to_graph(G, category_name, subclass_content, new_data, layer_uuid, layer_decomposition_uuid):
    # Find the node ID for the subclass within the category
    subclass_node_id = None
    for node, data in G.nodes(data=True):
        if subclass_content in node:
            subclass_node_id = node

            print(subclass_node_id)

    if not subclass_node_id:
        print(f"Subclass '{subclass_content}' under category '{category_name}' not found in the graph.")
        return G

    # Mapping from old node IDs to new node IDs
    node_id_mapping = {}

    # Add nodes from the Pydantic object
    for node in new_data.nodes:
        unique_node_id =uuid.uuid4()
        new_node_id = f"{node.description} - {str(layer_uuid)}  - {str(layer_decomposition_uuid)} - {str(unique_node_id)}"
        G.add_node(new_node_id, 
                   created_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 
                   updated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 
                   description=node.description, 
                   category=node.category, 
                   memory_type=node.memory_type, 
                   layer_uuid = str(layer_uuid),
                   layer_decomposition_uuid = str(layer_decomposition_uuid),
                   id = str(unique_node_id),
                   type='detail')
        G.add_edge(subclass_node_id, new_node_id, relationship='detail')

        # Store the mapping from old node ID to new node ID
        node_id_mapping[node.id] = new_node_id

    # Add edges from the Pydantic object using the new node IDs
    for edge in new_data.edges:
        # Use the mapping to get the new node IDs
        source_node_id = node_id_mapping.get(edge.source)
        target_node_id = node_id_mapping.get(edge.target)

        if source_node_id and target_node_id:
            G.add_edge(source_node_id, target_node_id, description=edge.description, relationship='relation')
        else:
            print(f"Could not find mapping for edge from {edge.source} to {edge.target}")

    return G




# Assuming `pydata` is your Pydantic model instance containing the nodes and edges information
# and `G` is your existing graph

# # Here's how you would call this function:
# category_name = list(additional_categories.keys())[0]
# subclass_content = list(additional_categories.values())[0][0]


In [275]:
required_layers_one

CognitiveCategory(name='Natural Language Text', cognitive_subgroups=[CognitiveLayerSubgroup(id=1, name='News stories and blog posts', data_type='TEXT')])

In [282]:
def append_to_graph(layer_graphs, required_layers, G):
    layer_uuid = uuid.uuid4()
    category_name = required_layers.dict()['name']
    subgroup_names = [subgroup['name'] for subgroup in required_layers.dict()['cognitive_subgroups']]
    for subgroup in subgroup_names:

        for layer_decomposition in layer_graphs:
            layer_decomposition_uuid = uuid.uuid4()
            F = append_data_to_graph(G, category_name[0], subgroup, layer_decomposition, layer_uuid, layer_decomposition_uuid)
            
            # Print updated graph for verification
            print("Updated Nodes:", F)
    return F


In [283]:

U = append_to_graph(layer_1_graph, required_layers_one, G)

Natural Language Text:News stories and blog posts
Updated Nodes: MultiDiGraph with 64 nodes and 101 edges
Natural Language Text:News stories and blog posts
Updated Nodes: MultiDiGraph with 75 nodes and 121 edges
Natural Language Text:News stories and blog posts
Updated Nodes: MultiDiGraph with 88 nodes and 145 edges


In [284]:
R = append_to_graph(layer_2_graph, required_layers_two, U)

Natural Language Text:News stories and blog posts
Updated Nodes: MultiDiGraph with 98 nodes and 166 edges
Natural Language Text:News stories and blog posts
Updated Nodes: MultiDiGraph with 105 nodes and 179 edges
Natural Language Text:News stories and blog posts
Updated Nodes: MultiDiGraph with 113 nodes and 197 edges
Natural Language Text:Personal narratives and stories
Updated Nodes: MultiDiGraph with 123 nodes and 218 edges
Natural Language Text:Personal narratives and stories
Updated Nodes: MultiDiGraph with 130 nodes and 231 edges
Natural Language Text:Personal narratives and stories
Updated Nodes: MultiDiGraph with 138 nodes and 249 edges


In [285]:
import graphistry
import pandas as pd

# Assuming Graphistry is already configured with API key
# graphistry.register(api=3, username='your_username', password='your_password')

# Convert NetworkX graph to a Pandas DataFrame
edges = nx.to_pandas_edgelist(R)
graphistry.register(api=3, username='Vasilije1990', password='Q@HLdgv5SMUsGxy') 

# Visualize the graph
graphistry.edges(edges, 'source', 'target').plot()

In [286]:
print(R.nodes(data=True))

[('user123', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'username': 'exampleUser', 'email': 'user@example.com'}), ('Temporal', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'category'}), ('Temporal:Historical events', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'subclass', 'content': 'Historical events'}), ('Temporal:Schedules and timelines', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'subclass', 'content': 'Schedules and timelines'}), ('Positional', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'category'}), ('Positional:Geographical locations', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'subclass', 'content': 'Geographical locations'}), ('Positional:Spatial data', {'created_at': '2024-02-28 10:24:56', 'updated_at': '2024-02-28 10:24:56', 'type': 'subclass', 'cont

In [None]:
## Utility to check if relationships are as they should be

In [212]:
def list_graph_relationships_with_node_attributes(graph):
    print("Graph Relationships with Node Attributes:")
    for source, target, data in graph.edges(data=True):
        # Get source and target node attributes
        source_attrs = graph.nodes[source]
        target_attrs = graph.nodes[target]
        relationship = data.get('relationship', 'No relationship specified')

        # Format and print source and target node attributes along with the relationship
        source_attrs_formatted = ', '.join([f"{k}: {v}" for k, v in source_attrs.items()])
        target_attrs_formatted = ', '.join([f"{k}: {v}" for k, v in target_attrs.items()])
        
        print(f"Source [{source_attrs_formatted}] -> Target [{target_attrs_formatted}]: Relationship [{relationship}]")

# Assuming 'F' is your graph instance
list_graph_relationships_with_node_attributes(G)

Graph Relationships with Node Attributes:
Source [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, username: exampleUser, email: user@example.com] -> Target [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, type: category]: Relationship [created]
Source [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, username: exampleUser, email: user@example.com] -> Target [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, type: category]: Relationship [created]
Source [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, username: exampleUser, email: user@example.com] -> Target [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, type: category]: Relationship [created]
Source [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, username: exampleUser, email: user@example.com] -> Target [created_at: 2024-02-28 07:42:21, updated_at: 2024-02-28 07:42:21, type: category]: Relationship [created]
So

In [2]:
!pip install qdrant-client

Collecting qdrant-client
  Using cached qdrant_client-1.7.3-py3-none-any.whl.metadata (9.3 kB)
Collecting grpcio>=1.41.0 (from qdrant-client)
  Downloading grpcio-1.62.0-cp311-cp311-macosx_10_10_universal2.whl.metadata (4.0 kB)
Collecting grpcio-tools>=1.41.0 (from qdrant-client)
  Downloading grpcio_tools-1.62.0-cp311-cp311-macosx_10_10_universal2.whl.metadata (6.2 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant-client)
  Using cached portalocker-2.8.2-py3-none-any.whl.metadata (8.5 kB)
Collecting protobuf<5.0dev,>=4.21.6 (from grpcio-tools>=1.41.0->qdrant-client)
  Downloading protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)
Collecting h2<5,>=3 (from httpx[http2]>=0.14.0->qdrant-client)
  Using cached h2-4.1.0-py3-none-any.whl.metadata (3.6 kB)
Collecting hyperframe<7,>=6.0 (from h2<5,>=3->httpx[http2]>=0.14.0->qdrant-client)
  Using cached hyperframe-6.0.1-py3-none-any.whl.metadata (2.7 kB)
Collecting hpack<5,>=4.0 (from h2<5,>=3->httpx[http2]>=0.14.0-

In [294]:
# def extract_node_descriptions(data):
#     descriptions = []
#     for node_id, attributes in data:
#         # Check if both 'description' and 'layer_id' are in the attributes
#         if 'description' in attributes and 'layer_id' in attributes and 'layer_uuid' in attributes:
#             descriptions.append({
#                 'node_id': node_id, 
#                 'description': attributes['description'],
#                 'layer_uuid': attributes['layer_uuid']  # Include layer_id
#             })
#     return descriptions

# # Extract the node descriptions
# node_descriptions = extract_node_descriptions(R.nodes(data=True))

# # Display the results (displaying a subset for brevity)
# for item in node_descriptions[:5]:  # Adjust the slice as needed for display
#     print(item)

In [292]:
def extract_node_descriptions(data):
    descriptions = []
    for node_id, attributes in data:
        if 'description' in attributes:
            descriptions.append({'node_id': node_id, 'description': attributes['description']})
    return descriptions

# Extract the node descriptions
node_descriptions = extract_node_descriptions(R.nodes(data=True))

# Display the results (displaying a subset for brevity)
for item in node_descriptions[:5]:  # Adjust the slice as needed for display
    print(item)

{'node_id': 'In British culture, keeping pets is an integral way of life and an outlet for emotions, with a particular fondness for dogs - 08b965ee-470a-4133-979b-0003edef15d0  - 97597864-a11a-4058-b854-8f21864c7e06 - 58fc3b86-be82-4d50-9578-2b157924ef23', 'description': 'In British culture, keeping pets is an integral way of life and an outlet for emotions, with a particular fondness for dogs'}
{'node_id': 'Dogs serve as an acceptable outlet for typically controlled British emotions, facilitating affection and sociability - 08b965ee-470a-4133-979b-0003edef15d0  - 97597864-a11a-4058-b854-8f21864c7e06 - e7929623-23e0-4b43-80b6-52fdbf4ce154', 'description': 'Dogs serve as an acceptable outlet for typically controlled British emotions, facilitating affection and sociability'}
{'node_id': "Public spaces in the UK are accommodating of dogs, with signs such as 'Dogs welcome, people tolerated' and treats offered in establishments - 08b965ee-470a-4133-979b-0003edef15d0  - 97597864-a11a-4058-b8

## HOW TO CONNECT INTERLAYERS WITH SEMANTIC SEARCH

In [295]:
## Idea here is to pass descriptions to the vectorstore and embed them, then do a semantic search for each description to other one and retrieve only those between layers that have a connection

In [27]:
from openai import OpenAI
client = OpenAI()

In [36]:
def get_embedding(text):
    response = client.embeddings.create(
        input=[text],
        model="text-embedding-3-large"  # Choose an appropriate engine for your use case
    ).data[0].embedding

    return response

In [37]:
embedding = get_embedding("Example text to encode")

In [51]:
some_embeddings = []
some_text = ["bla", "blwea"]
for st in some_text:
    some_embeddings.append(get_embedding(st))

In [40]:
from qdrant_client import models, QdrantClient

In [41]:
qdrant = QdrantClient(":memory:")

In [None]:
from qdrant_client.http import models as rest

In [56]:
collection_name = 'Articles'

# Create a new collection in Qdrant
def create_collection(collection_name):
    qdrant.recreate_collection(
        collection_name=collection_name,
        vectors_config={
            'content': rest.VectorParams(
                distance=rest.Distance.COSINE,
                size=3072,
            )
        }
    )

# Function to upload embeddings to Qdrant
def upload_embedding(id, metadata, some_embeddings, collection_name):
    qdrant.upload_points(
        collection_name=collection_name,
        points=[
            models.PointStruct(
                id=idx, vector={"content":doc}, payload=metadata
            )
            for idx, doc in enumerate(some_embeddings)
        ],
    )

# Example usage
upload_embedding(1, ("meta":1}, some_embeddings)

In [61]:
hits = qdrant.search(
    collection_name="Articles",
    query_vector=(
            "content", get_embedding("bla")
        ),
    limit=3,
)
for hit in hits:
    print(hit.payload, "score:", hit.score)

{'bv': 'bjdjdj'} score: 0.9999999947315471
{'bv': 'bjdjdj'} score: 0.497255529710952


In [47]:
f

In [213]:
nodes = list(G.nodes())

print("Nodes in the graph:", nodes)

Nodes in the graph: ['user123', 'Temporal', 'Temporal:Historical events', 'Temporal:Schedules and timelines', 'Positional', 'Positional:Geographical locations', 'Positional:Spatial data', 'Propositions', 'Propositions:Hypotheses and theories', 'Propositions:Claims and arguments', 'Personalization', 'Personalization:User preferences', 'Personalization:User information', 'Natural Language Text', 'Natural Language Text:News stories and blog posts']
