### Environment Configuration
#### Setup required directories and environment variables.


In [1]:
import pathlib
import os
import cognee

notebook_dir = pathlib.Path().resolve()
data_directory_path = str(notebook_dir / ".data_storage")
cognee_directory_path = str(notebook_dir / ".cognee_system")

cognee.config.data_root_directory(data_directory_path)
cognee.config.system_root_directory(cognee_directory_path)

BASE_URL = "https://pokeapi.co/api/v2/"
os.environ["BUCKET_URL"] = data_directory_path
os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "true"


## Initialize DLT Pipeline
### Create the DLT pipeline to fetch Pokémon data.


In [2]:
import dlt
from pathlib import Path

pipeline = dlt.pipeline(
    pipeline_name="pokemon_pipeline",
    destination="filesystem",
    dataset_name="pokemon_data",
)


##  Fetch Pokémon List
### Retrieve a list of Pokémon from the API.


In [3]:
@dlt.resource(write_disposition="replace")
def pokemon_list(limit: int = 50):
    import requests
    response = requests.get(f"{BASE_URL}pokemon", params={"limit": limit})
    response.raise_for_status()
    yield response.json()["results"]


##  Fetch Pokémon Details
### Fetch detailed information about each Pokémon.


In [4]:
@dlt.transformer(data_from=pokemon_list)
def pokemon_details(pokemons):
    """Fetches detailed info for each Pokémon"""
    import requests
    for pokemon in pokemons:
        response = requests.get(pokemon["url"])
        response.raise_for_status()
        yield response.json()


##  Run Data Pipeline
### Execute the pipeline and store Pokémon data.


In [5]:
info = pipeline.run([pokemon_list, pokemon_details])
print(info)


Pipeline pokemon_pipeline load step completed in 0.04 seconds
1 load package(s) were loaded to destination filesystem and into dataset pokemon_data
The filesystem destination used file:///Users/borisarzentar/Projects/Topoteretes/cognee/notebooks/.data_storage location to store data
Load package 1743589860.3306491 is LOADED and contains no failed jobs


##  Load Pokémon Abilities
### Load Pokémon ability data from stored files.


In [6]:
import json
from cognee.low_level import DataPoint
from uuid import uuid5, NAMESPACE_OID

class Abilities(DataPoint):
    name: str = "Abilities"
    metadata: dict = {"index_fields": ["name"]}

def load_abilities_data(jsonl_abilities):
    abilities_root = Abilities()
    pokemon_abilities = []

    for jsonl_ability in jsonl_abilities:
        with open(jsonl_ability, "r") as f:
            for line in f:
                ability = json.loads(line)
                ability["id"] = uuid5(NAMESPACE_OID, ability["_dlt_id"])
                ability["name"] = ability["ability__name"]
                ability["is_type"] = abilities_root
                pokemon_abilities.append(ability)

    return abilities_root, pokemon_abilities


##  Load Pokémon Data
### Load Pokémon details and associate them with abilities.


In [7]:
from typing import List, Optional

class Pokemons(DataPoint):
    name: str = "Pokemons"
    have: Abilities
    metadata: dict = {"index_fields": ["name"]}

class PokemonAbility(DataPoint):
    name: str
    ability__name: str
    ability__url: str
    is_hidden: bool
    slot: int
    _dlt_load_id: str
    _dlt_id: str
    _dlt_parent_id: str
    _dlt_list_idx: str
    is_type: Abilities
    metadata: dict = {"index_fields": ["ability__name"]}

class Pokemon(DataPoint):
    name: str
    base_experience: int
    height: int
    weight: int
    is_default: bool
    order: int
    location_area_encounters: str
    species__name: str
    species__url: str
    cries__latest: str
    cries__legacy: str
    sprites__front_default: str
    sprites__front_shiny: str
    sprites__back_default: Optional[str]
    sprites__back_shiny: Optional[str]
    _dlt_load_id: str
    _dlt_id: str
    is_type: Pokemons
    abilities: List[PokemonAbility]
    metadata: dict = {"index_fields": ["name"]}

def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):
    pokemons = []

    for jsonl_pokemon in jsonl_pokemons:
        with open(jsonl_pokemon, "r") as f:
            for line in f:
                pokemon_data = json.loads(line)
                abilities = [
                    ability for ability in pokemon_abilities
                    if ability["_dlt_parent_id"] == pokemon_data["_dlt_id"]
                ]
                pokemon_data["external_id"] = pokemon_data["id"]
                pokemon_data["id"] = uuid5(NAMESPACE_OID, str(pokemon_data["id"]))
                pokemon_data["abilities"] = [PokemonAbility(**ability) for ability in abilities]
                pokemon_data["is_type"] = pokemon_root
                pokemons.append(Pokemon(**pokemon_data))

    return pokemons


##  Process Pokémon Data
### Load and associate Pokémon abilities.


In [8]:
STORAGE_PATH = Path(".data_storage/pokemon_data/pokemon_details")
jsonl_pokemons = sorted(STORAGE_PATH.glob("*.jsonl"))

ABILITIES_PATH = Path(".data_storage/pokemon_data/pokemon_details__abilities")
jsonl_abilities = sorted(ABILITIES_PATH.glob("*.jsonl"))

abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)
pokemon_root = Pokemons(have=abilities_root)
pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)


##  Initialize Cognee
### Setup Cognee for data processing.


In [9]:
from cognee.low_level import setup as cognee_setup

async def initialize_cognee():
    await cognee.prune.prune_data()
    await cognee.prune.prune_system(metadata=True)
    await cognee_setup()

await initialize_cognee()


  from .autonotebook import tqdm as notebook_tqdm


##  Process Pokémon Data
### Add Pokémon data points to Cognee.


In [10]:
from cognee.modules.pipelines.tasks.Task import Task
from cognee.tasks.storage import add_data_points
from cognee.modules.pipelines import run_tasks

tasks = [Task(add_data_points)]
pipeline_run = run_tasks(
    tasks=tasks,
    data=pokemons,
    dataset_id=uuid5(NAMESPACE_OID, "Pokemon"),
    pipeline_name='pokemon_pipeline',
)

async for run_info in pipeline_run:
    print(run_info.__dict__)
print("Done")


{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x31b8c4230>, 'pipeline_run_id': UUID('32976ad1-847f-4c80-8eab-002bc28ba621'), 'pipeline_name': 'pokemon_pipeline', 'pipeline_id': UUID('fd2ed59d-b550-5b05-bbe6-7b708fe12483'), 'status': <PipelineRunStatus.DATASET_PROCESSING_STARTED: 'DATASET_PROCESSING_STARTED'>, 'dataset_id': UUID('dafbc434-f846-5ad8-8f28-143eb0e60ed5'), 'run_info': {'data': "[Pokemon(id=UUID('996ad860-2a9a-504f-8861-aeafd0b2ae29'), created_at=1743589870289, updated_at=1743589870289, ontology_valid=False, version=1, topological_rank=0, metadata={'index_fields': ['name']}, type='Pokemon', name='bulbasaur', base_experience=64, height=7, weight=69, is_default=True, order=1, location_area_encounters='https://pokeapi.co/api/v2/pokemon/1/encounters', species__name='bulbasaur', species__url='https://pokeapi.co/api/v2/pokemon-species/1/', cries__latest='https://raw.githubusercontent.com/PokeAPI/cries/main/cries/pokemon/latest/1.ogg', cries__legacy='https://

##  Search Pokémon Data
### Execute a search query using Cognee.


In [11]:
from cognee.api.v1.search import SearchType

search_results = await cognee.search(
    query_type=SearchType.GRAPH_COMPLETION,
    query_text="pokemons?"
)

print("Search results:")
for result_text in search_results:
    print(result_text)

Search results:
Pokemons have abilities. Examples of Pokemons include: 
- nidorino (ability: poison-point) 
- nidoqueen (ability: poison-point) 
- ninetales (ability: flash-fire) 
- vulpix (ability: flash-fire).
