In [1]:
import os
import sys
from getpass import getpass
import nest_asyncio

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv()


True

In [106]:
import os
import json
from typing import Dict, List, Optional, Tuple
from notion_client import Client
import pandas as pd
from datetime import datetime

class NotionProcessor:
    """A class to process and extract structured data from Notion databases and pages."""
    
    def __init__(self, auth_token: str):
        """Initialize the Notion client with authentication token."""
        self.notion = Client(auth=auth_token)
    
    def get_database_pages(self, database_id: str, parent_properties: Dict = None) -> List[Dict]:
        """
        Retrieve all pages from a Notion database.
        
        Args:
            database_id: The ID of the Notion database to query
            parent_properties: Optional properties from parent pages to merge with results
            
        Returns:
            List of page objects from the database
        """
        pages = []
        cursor = None
        
        while True:
            response = self.notion.databases.query(
                database_id=database_id,
                start_cursor=cursor
            )
            
            # If parent properties exist, merge them with each page
            if parent_properties:
                for page in response['results']:
                    self._merge_parent_properties(page, parent_properties)
            
            pages.extend(response['results'])
            
            if not response.get('has_more'):
                break
                
            cursor = response['next_cursor']
            
        return pages
    
    def _merge_parent_properties(self, page: Dict, parent_properties: Dict):
        """
        Merge parent properties into page properties, avoiding duplicates.
        
        Args:
            page: The page object to merge properties into
            parent_properties: Properties from parent pages to merge
        """
        for key, value in parent_properties.items():
            if key not in page['properties']:
                # Create a new property of type 'rich_text' to store inherited values
                page['properties'][f'parent_{key}'] = {
                    'type': 'rich_text',
                    'rich_text': [{
                        'type': 'text',
                        'text': {'content': str(value)},
                        'plain_text': str(value)
                    }]
                }
    
    def extract_properties(self, page: Dict) -> Dict:
        """
        Extract properties from a Notion page into a simplified dictionary format.
        
        Args:
            page: The Notion page object
            
        Returns:
            Dictionary of extracted properties
        """
        properties = {}
        
        for prop_name, prop_data in page['properties'].items():
            prop_type = prop_data['type']
            
            if prop_type == 'title':
                properties[prop_name] = self._get_rich_text_content(prop_data['title'])
            elif prop_type == 'rich_text':
                properties[prop_name] = self._get_rich_text_content(prop_data['rich_text'])
            elif prop_type == 'select':
                if prop_data['select']:
                    properties[prop_name] = prop_data['select']['name']
            elif prop_type == 'multi_select':
                properties[prop_name] = [item['name'] for item in prop_data['multi_select']]
            elif prop_type == 'date':
                if prop_data['date']:
                    properties[prop_name] = prop_data['date']['start']
            elif prop_type in ['number', 'checkbox']:
                properties[prop_name] = prop_data[prop_type]
                
        return properties
    
    def _get_rich_text_content(self, rich_text: List) -> str:
        """
        Extract plain text content from Notion's rich text array format.
        
        Args:
            rich_text: List of rich text objects from Notion
            
        Returns:
            Concatenated plain text string
        """
        return ' '.join([text['plain_text'] for text in rich_text if text.get('plain_text')])
    
    def get_block_children(self, block_id: str, level: int = 0) -> List[Tuple[Dict, int]]:
        """
        Retrieve all child blocks of a given block with their nesting level.
        
        Args:
            block_id: ID of the parent block
            level: Current nesting level (used for recursion)
            
        Returns:
            List of tuples containing (block, nesting_level)
        """
        blocks = []
        cursor = None
        
        while True:
            response = self.notion.blocks.children.list(
                block_id=block_id,
                start_cursor=cursor
            )
            
            for block in response['results']:
                blocks.append((block, level))
                
                if block.get('has_children'):
                    # For child database blocks, don't process them here
                    if block['type'] != 'child_database':
                        child_blocks = self.get_block_children(block['id'], level + 1)
                        blocks.extend(child_blocks)
            
            if not response.get('has_more'):
                break
                
            cursor = response['next_cursor']
            
        return blocks
    
    def process_blocks(self, blocks: List[Tuple[Dict, int]]) -> Tuple[Dict, List[str]]:
        """
        Process blocks to extract headers and content sections.
        
        Args:
            blocks: List of (block, level) tuples to process
            
        Returns:
            Tuple of (headers_dict, content_sections) where headers_dict maps
            headers to content section indices
        """
        current_main_header = None
        current_sub_headers = []
        current_content = []
        headers = {}
        content_sections = []
        current_bullet_group = []
        
        def save_current_section():
            """Helper function to save current section's content."""
            nonlocal current_content, current_bullet_group, content_sections, current_main_header, current_sub_headers, headers
            
            # Save any remaining bullet group
            if current_bullet_group:
                current_content.append(self._merge_bullet_group(current_bullet_group))
                current_bullet_group = []
            
            # Save content if exists and we have a header
            if current_main_header is not None and current_content:
                content_sections.append('\n'.join(filter(None, current_content)))
                full_header = current_main_header
                if current_sub_headers:
                    full_header += ' - ' + ' - '.join(current_sub_headers)
                headers[full_header] = len(content_sections) - 1  # Point to the section we just added
        
        for block, level in blocks:
            block_type = block['type']
            
            # Handle headers
            if block_type.startswith('heading_'):
                # Save current section before starting new one
                save_current_section()
                
                # Reset content collection for new section
                current_content = []
                current_bullet_group = []
                
                # Process new header
                header_text = self._get_rich_text_content(block[block_type]['rich_text'])
                header_level = int(block_type[-1])
                
                if header_level == 1:
                    current_main_header = header_text
                    current_sub_headers = []
                else:
                    current_sub_headers.append(header_text)
            
            # Handle child database
            elif block_type == 'child_database':
                if current_bullet_group:
                    current_content.append(self._merge_bullet_group(current_bullet_group))
                    current_bullet_group = []
                current_content.append(f"[Database: {block['id']}]")
            
            # Handle bullet points and numbered lists
            elif block_type in ['bulleted_list_item', 'numbered_list_item']:
                text_content = self._get_rich_text_content(block[block_type]['rich_text'])
                
                if level == 0:
                    if current_bullet_group:
                        current_content.append(self._merge_bullet_group(current_bullet_group))
                        current_bullet_group = []
                    current_bullet_group = [(text_content, level)]
                else:
                    current_bullet_group.append((text_content, level))
            
            # Handle regular paragraphs
            elif block_type == 'paragraph':
                if current_bullet_group:
                    current_content.append(self._merge_bullet_group(current_bullet_group))
                    current_bullet_group = []
                
                text_content = self._get_rich_text_content(block[block_type]['rich_text'])
                if text_content:
                    current_content.append(text_content)
        
        # Save final section
        save_current_section()
        
        return headers, content_sections
    
    def _merge_bullet_group(self, bullet_group: List[Tuple[str, int]]) -> str:
        """
        Merge a group of bullets into a single line, with sub-bullets inline.
        
        Args:
            bullet_group: List of (text, level) tuples representing bullet points
            
        Returns:
            Merged bullet points as a single string
        """
        if not bullet_group:
            return ""
        
        result = []
        current_main_bullet = []
        
        for text, level in bullet_group:
            if level == 0:
                if current_main_bullet:
                    result.append(' '.join(current_main_bullet))
                current_main_bullet = [text]
            else:
                current_main_bullet.append(text)
        
        if current_main_bullet:
            result.append(' '.join(current_main_bullet))
        
        return '\n'.join(result)
    
    def process_page(self, page: Dict, parent_properties: Dict = None) -> List[Dict]:
        """
        Process a single page and its nested databases.
        
        Args:
            page: The Notion page object to process
            parent_properties: Optional properties from parent pages
            
        Returns:
            List of processed entries with properties and content
        """
        results = []
        properties = self.extract_properties(page)
        
        # Merge parent properties if they exist
        if parent_properties:
            properties.update({f'parent_{k}': v for k, v in parent_properties.items()})
        
        # Process page blocks
        blocks = self.get_block_children(page['id'])
        headers, content_sections = self.process_blocks(blocks)
        
        # Add headers to properties and create entries
        for header, section_index in headers.items():
            section_properties = properties.copy()
            section_properties['header'] = header
            
            # Create entry for this section
            if 0 <= section_index < len(content_sections):
                results.append({
                    'properties': section_properties,
                    'content': content_sections[section_index]
                })
        
        # Process any child databases found in the blocks
        for block, _ in blocks:
            if block['type'] == 'child_database':
                child_pages = self.get_database_pages(block['id'], properties)
                for child_page in child_pages:
                    results.extend(self.process_page(child_page, properties))
        
        return results
    
    def process_database(self, database_id: str) -> List[Dict]:
        """
        Process entire database and return structured data.
        
        Args:
            database_id: The ID of the Notion database to process
            
        Returns:
            List of processed entries with properties and content
        """
        processed_data = []
        
        # Get all pages from the database
        pages = self.get_database_pages(database_id)
        
        # Process each page and its nested databases
        for page in pages:
            processed_data.extend(self.process_page(page))
            
        return processed_data

# Example usage in Jupyter notebook (separate cells):

In [20]:
import os
import json
from typing import Dict, List, Optional, Tuple
from notion_client import Client
import pandas as pd
from datetime import datetime

class NotionProcessor:
    def __init__(self, auth_token: str):
        """Initialize the Notion client with authentication token."""
        self.notion = Client(auth=auth_token)
    
    def get_database_pages(self, database_id: str, parent_properties: Dict = None) -> List[Dict]:
        """
        Retrieve all pages from a Notion database.
        If parent_properties provided, merge them with each page's properties.
        """
        pages = []
        cursor = None
        
        while True:
            response = self.notion.databases.query(
                database_id=database_id,
                start_cursor=cursor
            )
            
            # If parent properties exist, merge them with each page
            if parent_properties:
                for page in response['results']:
                    self._merge_parent_properties(page, parent_properties)
            
            pages.extend(response['results'])
            
            if not response.get('has_more'):
                break
                
            cursor = response['next_cursor']
            
        return pages
    
    def _merge_parent_properties(self, page: Dict, parent_properties: Dict):
        """
        Merge parent properties into page properties with specific handling for Name, Description, and Tags.
        """
        for key, value in parent_properties.items():
            if key == 'Name':
                # Handle name merging only if child page has a name
                if 'Name' in page['properties']:
                    child_name = self._get_rich_text_content(page['properties']['Name'].get('title', []))
                    if child_name:
                        merged_name = f"{value} - {child_name}"
                        page['properties']['Name'] = {
                            'type': 'title',
                            'title': [{
                                'type': 'text',
                                'text': {'content': merged_name},
                                'plain_text': merged_name
                            }]
                        }
            elif key == 'Description':
                # Skip Description property - keep child's description if it exists
                continue
            elif key == 'Tags':
                # Merge tags, removing duplicates
                parent_tags = set(value.split(', ')) if value else set()
                if 'Tags' in page['properties']:
                    child_tags = set(tag['name'] for tag in page['properties']['Tags'].get('multi_select', []))
                    merged_tags = parent_tags.union(child_tags)
                    page['properties']['Tags'] = {
                        'type': 'multi_select',
                        'multi_select': [{'name': tag} for tag in sorted(merged_tags)]
                    }
            else:
                # For all other properties, inherit from parent if not present in child
                if key not in page['properties']:
                    page['properties'][key] = {
                        'type': 'rich_text',
                        'rich_text': [{
                            'type': 'text',
                            'text': {'content': str(value)},
                            'plain_text': str(value)
                        }]
                    }
    
    
    def extract_properties(self, page: Dict) -> Dict:
        """Extract properties from a page."""
        properties = {}
        
        for prop_name, prop_data in page['properties'].items():
            prop_type = prop_data['type']
            
            if prop_type == 'title':
                properties[prop_name] = self._get_rich_text_content(prop_data['title'])
            elif prop_type == 'rich_text':
                properties[prop_name] = self._get_rich_text_content(prop_data['rich_text'])
            elif prop_type == 'select':
                if prop_data['select']:
                    properties[prop_name] = prop_data['select']['name']
            elif prop_type == 'multi_select':
                # Convert multi-select to comma-separated string
                properties[prop_name] = ', '.join(sorted(item['name'] for item in prop_data['multi_select']))
            elif prop_type == 'date':
                if prop_data['date']:
                    properties[prop_name] = prop_data['date']['start']
            elif prop_type in ['number', 'checkbox']:
                properties[prop_name] = prop_data[prop_type]
                
        return properties
    
    def _normalize_text(self, text: str) -> str:
        """
        Normalize text content by:
        1. Replacing multiple spaces with single space
        2. Removing spaces before newlines
        3. Removing spaces after newlines
        4. Removing empty lines
        """
        # Replace multiple spaces with single space
        text = ' '.join(text.split())
        
        # Remove spaces before colons
        text = text.replace(' :', ':')
        
        # Split into lines and process each line
        lines = text.split('\n')
        cleaned_lines = []
        
        for line in lines:
            # Clean each line individually
            cleaned_line = line.strip()
            if cleaned_line:  # Only keep non-empty lines
                cleaned_lines.append(cleaned_line)
        
        # Join lines back together
        return '\n'.join(cleaned_lines)
    
    def _get_rich_text_content(self, rich_text: List) -> str:
        """Extract text content from rich text array and normalize it."""
        text = ' '.join([text['plain_text'] for text in rich_text if text.get('plain_text')])
        return self._normalize_text(text)
    
    def get_block_children(self, block_id: str, level: int = 0) -> List[Tuple[Dict, int]]:
        """Retrieve all child blocks of a given block with their nesting level."""
        blocks = []
        cursor = None
        
        while True:
            response = self.notion.blocks.children.list(
                block_id=block_id,
                start_cursor=cursor
            )
            
            for block in response['results']:
                blocks.append((block, level))
                
                if block.get('has_children'):
                    if block['type'] != 'child_database':
                        child_blocks = self.get_block_children(block['id'], level + 1)
                        blocks.extend(child_blocks)
            
            if not response.get('has_more'):
                break
                
            cursor = response['next_cursor']
            
        return blocks
    
    def process_blocks(self, blocks: List[Tuple[Dict, int]]) -> Tuple[Dict, List[str]]:
        """
        Process blocks to extract headers and content.
        Sub-headers are treated as text content with line breaks.
        """
        current_header = None
        current_content = []
        headers = {}
        content_sections = []
        current_bullet_group = []
        
        def save_current_section():
            """Helper function to save current section's content."""
            nonlocal current_content, current_bullet_group, content_sections, current_header, headers
            
            if current_bullet_group:
                current_content.append(self._merge_bullet_group(current_bullet_group))
                current_bullet_group = []
            
            if current_header is not None and current_content:
                # Join content and normalize the entire section
                section_content = self._normalize_text('\n'.join(filter(None, current_content)))
                content_sections.append(section_content)
                headers[current_header] = len(content_sections) - 1
        
        for block, level in blocks:
            block_type = block['type']
            
            # Handle headers
            if block_type.startswith('heading_'):
                header_text = self._get_rich_text_content(block[block_type]['rich_text'])
                header_level = int(block_type[-1])
                
                if header_level == 1:
                    # Save current section before starting new one
                    save_current_section()
                    current_content = []
                    current_header = header_text
                else:
                    # Treat sub-headers as text content with line break
                    if current_bullet_group:
                        current_content.append(self._merge_bullet_group(current_bullet_group))
                        current_bullet_group = []
                    current_content.append(f"{header_text}\n")
            
            # Handle child database
            elif block_type == 'child_database':
                if current_bullet_group:
                    current_content.append(self._merge_bullet_group(current_bullet_group))
                    current_bullet_group = []
                current_content.append(f"[Database: {block['id']}]")
            
            # Handle bullet points and numbered lists
            elif block_type in ['bulleted_list_item', 'numbered_list_item']:
                text_content = self._get_rich_text_content(block[block_type]['rich_text'])
                
                if level == 0:
                    if current_bullet_group:
                        current_content.append(self._merge_bullet_group(current_bullet_group))
                        current_bullet_group = []
                    current_bullet_group = [(text_content, level)]
                else:
                    current_bullet_group.append((text_content, level))
            
            # Handle regular paragraphs
            elif block_type == 'paragraph':
                if current_bullet_group:
                    current_content.append(self._merge_bullet_group(current_bullet_group))
                    current_bullet_group = []
                
                text_content = self._get_rich_text_content(block[block_type]['rich_text'])
                if text_content:
                    current_content.append(text_content)
        
        # Save final section
        save_current_section()
        
        return headers, content_sections
    
    def _merge_bullet_group(self, bullet_group: List[Tuple[str, int]]) -> str:
        """Merge a group of bullets into a single line, with sub-bullets inline."""
        if not bullet_group:
            return ""
        
        result = []
        current_main_bullet = []
        
        for text, level in bullet_group:
            if level == 0:
                if current_main_bullet:
                    result.append(self._normalize_text(' '.join(current_main_bullet)))
                current_main_bullet = [text]
            else:
                current_main_bullet.append(text)
        
        if current_main_bullet:
            result.append(self._normalize_text(' '.join(current_main_bullet)))
        
        return '\n'.join(result)
    
    def process_page(self, page: Dict, parent_properties: Dict = None) -> List[Dict]:
        """Process a single page and its nested databases."""
        results = []
        
        # Extract properties before any merging
        properties = self.extract_properties(page)
        
        # Merge with parent properties if they exist
        if parent_properties:
            # Skip special properties handling here as it's done in _merge_parent_properties
            # Only handle properties that weren't merged during database query
            for key, value in parent_properties.items():
                if key not in ['Name', 'Description', 'Tags'] and key not in properties:
                    properties[key] = value
        
        # Process page blocks
        blocks = self.get_block_children(page['id'])
        headers, content_sections = self.process_blocks(blocks)
        
        # Create entries for each section
        for header, section_index in headers.items():
            section_properties = properties.copy()
            section_properties['header'] = header
            
            if 0 <= section_index < len(content_sections):
                results.append({
                    'properties': section_properties,
                    'content': content_sections[section_index]
                })
        
        # Process child databases
        for block, _ in blocks:
            if block['type'] == 'child_database':
                child_pages = self.get_database_pages(block['id'], properties)
                for child_page in child_pages:
                    results.extend(self.process_page(child_page, properties))
        
        return results

    def process_database(self, database_id: str) -> List[Dict]:
        """Process entire database and return structured data."""
        processed_data = []
        pages = self.get_database_pages(database_id)
        
        for page in pages:
            processed_data.extend(self.process_page(page))
            
        return processed_data

In [21]:
# Example usage in Jupyter notebook (separate cells):

# Cell 1: Initialize processor
NOTION_TOKEN = os.getenv("NOTION_TOKEN")
DATABASE_ID = os.getenv("NOTION_DATABASE_ID")

processor = NotionProcessor(NOTION_TOKEN)
processed_data = processor.process_database(DATABASE_ID)

In [23]:
processed_data[8:15]

[{'properties': {'Employer': 'Personal',
   'Description': 'A Chrome Extension for Structured ChatGPT Chat Management and Navigation',
   'Project Size': 'Large',
   'When': '2024-07-08',
   'Position': 'Fun',
   'Tags': 'CSS, ChatGPT, Chrome Extension, HTML, JavaScript, React, Web Development',
   'Name': 'ChatGPT Organization and Navigation Tool',
   'header': 'Tool Purpose:'},
  'content': 'The Chrome extension is designed to enhance the usability and organization of ChatGPT conversations by introducing a structured approach to managing multiple chats. It enables users to create associations between chats, visualize the connections between them through a node-based diagram, and quickly navigate to specific chat conversation, making it ideal for research, brainstorming, and project management. More information on tool itself and its use can be found here [Database: 906e9719-ccb1-4ddd-b800-abda20f123bf]'},
 {'properties': {'Tags': 'Boostrap, CSS, ChatGPT, Chrome, Chrome Extension, HTM

In [108]:
from llama_index.core import Document

documents = [Document(text=record['content'], metadata=record['properties']) for record in processed_data]

In [109]:
documents[0].__dict__


{'id_': '05d66872-f820-4560-9ce3-a2a8b463c08a',
 'embedding': None,
 'metadata': {'Employer': 'McMaster University - Partnered with Stellantis',
  'Description': 'Maintained and improved the  Boundary Diagram Tool  (BDT) for change impact analysis in large-scale Simulink models, specifically for automotive systems at Stellantis   ',
  'Project Size': 'Medium',
  'When': '2020-06-01',
  'Position': 'Research Assistant',
  'Tags': ['MATLAB', 'Simulink'],
  'Name': 'MATLAB/Simulink - Boundary Diagram Tool',
  'header': 'Project Overview:'},
 'excluded_embed_metadata_keys': [],
 'excluded_llm_metadata_keys': [],
 'relationships': {},
 'text': 'The  Boundary Diagram Tool (BDT)  is an advanced tool designed for  change impact analysis  in large-scale Simulink models, particularly those used in embedded systems like automotive control units. The tool enables engineers to trace how changes in specific parts of a system propagate through other models and network interfaces, aiding in software m

In [111]:
from llama_index.core.node_parser import SentenceSplitter

sentence_splitter = SentenceSplitter(
    chunk_size=512, # in tokens
    chunk_overlap=16, # in tokens
    paragraph_separator="\n\n\n"
)

nodes = sentence_splitter.get_nodes_from_documents(documents, show_progress=True)


Parsing nodes: 100%|██████████| 96/96 [00:00<00:00, 2128.80it/s]


In [112]:
len(nodes)


99

In [113]:
#from llama_index.embeddings.cohere import CohereEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding

CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter your Cohere API key: ")

embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")

In [114]:
import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore

QDRANT_URL = os.environ['QDRANT_URL']
QDRANT_API_KEY = os.environ['QDRANT_API_KEY']

# initialize qdrant client
client = qdrant_client.QdrantClient(
    url=QDRANT_URL, 
    api_key=QDRANT_API_KEY,
)

vector_store = QdrantVectorStore(
    client=client, 
    collection_name="Notion_vector_store",
    embed_model=embed_model,
)

INFO:httpx:HTTP Request: GET https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/exists "HTTP/1.1 200 OK"
HTTP Request: GET https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/exists "HTTP/1.1 200 OK"
HTTP Request: GET https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/exists "HTTP/1.1 200 OK"


In [115]:
from llama_index.core import StorageContext

# assign qdrant vector store to storage context
storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    )

In [116]:
from llama_index.core import  VectorStoreIndex

# create the index
index = VectorStoreIndex(
    nodes,
    show_progress=True,
    store_nodes_override=True,
    #transformation=[sentence_splitter],
    embed_model=embed_model,
    storage_context=storage_context,
)

Generating embeddings:   0%|          | 0/99 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Generating embeddings: 100%|██████████| 99/99 [00:01<00:00, 52.47it/s]


INFO:httpx:HTTP Request: PUT https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store "HTTP/1.1 200 OK"
HTTP Request: PUT https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store "HTTP/1.1 200 OK"
HTTP Request: PUT https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: PUT https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/index?wait=true "HTTP/1.1 200 OK"
HTTP Request: PUT https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/index?wait=true "HTTP/1.1 200 OK"
HTTP Request: PUT https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/index?wait=true "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https:/

In [130]:
from llama_index.llms.cohere import Cohere
from llama_index.core import get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

llm = Cohere(model="command-r-plus")

# configure a retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=15,
)

# configure a post processor
similarity_processor = SimilarityPostprocessor(similarity_cutoff=0.3)

# configure a response sythesizer
response_synthsizer = get_response_synthesizer(llm=llm)

# create a query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)

In [131]:
response = query_engine.query("Outline work done that involved the use of embedded systems")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/points/search "HTTP/1.1 200 OK"
HTTP Request: POST https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/points/search "HTTP/1.1 200 OK"
HTTP Request: POST https://0a690c49-5e88-4998-82fe-3be37b2cc61d.us-east4-0.gcp.cloud.qdrant.io:6333/collections/Notion_vector_store/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.cohere.com/v1/chat "HTTP/1.1 200 OK"
HTTP Request: POST https://api.cohere.com/v1/chat "HTTP/1.1 200 OK"
HTTP Request: POST https://api.cohere.com/v1/chat "HTTP/1.1 200 OK"


In [132]:
for node in response.source_nodes:
    print(node.score)


0.5184556
0.5076841
0.5056155
0.504579
0.5045206
0.485343
0.47040236
0.46849632
0.4519909
0.4500804
0.44908825
0.44807273
0.44527188
0.4442506
0.4425006


In [133]:
print(response.response)

I have been involved in several projects that utilized embedded systems:

- Cobra 55 Bootloader RAM Project: Developed a custom bootloader for the Cobra 55 microcontroller, allowing it to establish an Ethernet connection, receive a program, store it in RAM, and execute it. This project involved in-depth knowledge of embedded systems memory management and bootloader development.

- Stroke Rehabilitation Using EMG-Driven Tetris Game: Designed and built a wearable device for stroke patients to rehabilitate fine-motor skills. This involved using surface EMG technology to collect data, process it using embedded software, and transmit it to a Tetris game for rehabilitation.

- Quadcopter Development and Testing: Assembled, tested, and programmed a quadcopter, including working with various embedded systems such as the Pixhawk flight controller, sensors, and the Raspberry Pi for enhanced processing and computer vision capabilities.

- Cubic Gridsmart (GS3) Deployment: Deployed a Radar-Fusion 