In [30]:
from typing import List, Dict, Any, Optional
from langchain.docstore.document import Document
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent
from langchain.prompts import StringPromptTemplate
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline
from networkx import DiGraph
import networkx as nx
from enum import Enum
import torch
from transformers import AutoTokenizer, AutoModel, pipeline
import json
from langchain.docstore.document import Document
import glob
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from preprocessing import SemanticJSONSplitter, load_and_process_documents
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import numpy as np
from crewai import Agent, Task, Crew, Process

In [31]:
class QueryType(Enum):
    SEMANTIC = "semantic"
    KEYWORD = "keyword"
    GRAPH = "graph"
    HYBRID = "hybrid"

class MakerspaceKG:
    def __init__(self, embeddings: HuggingFaceEmbeddings):
        self.graph = DiGraph()
        self.node_embeddings = {}
        self.embeddings = embeddings  # i use the same original embeddings model
    
    def get_subgraph_for_query(self, query: str) -> DiGraph:
        # get query embedding using our embedding model instance
        query_embedding = self.embeddings.embed_query(query)
        
        # Find relevant nodes
        relevant_nodes = []
        for node in self.graph.nodes:
            if node in self.node_embeddings:
                similarity = self._calculate_similarity(
                    query_embedding, 
                    self.node_embeddings[node]
                )
                if similarity > 0.5:  # Threshold can be adjusted
                    relevant_nodes.append(node)
        
        return nx.subgraph(self.graph, relevant_nodes)
        
    def _calculate_similarity(self, embedding1, embedding2):
        """Calculate cosine similarity between two embeddings"""
        return np.dot(embedding1, embedding2) / (
            np.linalg.norm(embedding1) * np.linalg.norm(embedding2)
        )

    def add_node_with_embedding(self, name: str, description: str, **attrs):
        """Add a node and compute its embedding"""
        self.graph.add_node(name, **attrs)
        self.node_embeddings[name] = self.embeddings.embed_query(description)

In [32]:
''''
class AutoRAGRouter:
    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        
    def determine_query_type(self, query: str) -> QueryType:
        # Analyze query complexity and structure
        query_features = self._extract_query_features(query)
        
        if query_features['requires_graph_traversal']:
            return QueryType.GRAPH
        elif query_features['is_semantic']:
            return QueryType.SEMANTIC
        elif query_features['is_keyword_based']:
            return QueryType.KEYWORD
        else:
            return QueryType.HYBRID
    
    def _extract_query_features(self, query: str) -> Dict[str, bool]:
        # Implement query analysis logic
        # This is a simplified version
        words = query.lower().split()
        
        return {
            'requires_graph_traversal': any(w in words for w in ['related', 'connected', 'similar']),
            'is_semantic': len(words) > 5 and ' '.join(words).find(' with ') != -1,
            'is_keyword_based': len(words) <= 3
        }
'''
class QueryAnalysisAgent(Agent):
    def __init__(self, llm):
        super().__init__(
            role='Query Analyst',
            goal='Analyze and refine search queries to improve retrieval effectiveness',
            backstory="""You are an expert at understanding user queries and breaking them down 
            into structured components that can be effectively searched across different information sources.""",
            llm=llm,
            verbose=True,
            tools=[
                Tool(
                    name='analyze_query',
                    func=self.analyze_query,
                    description='Analyze and structure the search query'
                )
            ]
        )

    def analyze_query(self, query:str) -> Dict[str,Any]:
        '''analyze the query and break it dowm into structured, usable components'''
        analysis_prompt=f"""
        Analyze the following query and break it down into components:
        Query: {query}

        Provide:
        1. Main topics/concepts
        2. Required capabilities. 
        3. Required resources. 
        4. Any constraints or preferences. 
        5. Query type (semantic, keyword-based, or relationship-focused)

        Format as JSON. 
        """
        result = self.execute_task(analysis_prompt)
        try:
            return json.loads(result)
        except:
            return {
                "main_topics": [],
                "capabilities": [],
                "resources": [],
                "constraints": {},
                "query_type": "semantic"
            }

In [33]:
class KnowledgeGraphAgent(Agent):
    def __init__(self, llm, knowledge_graph: MakerspaceKG):
        super().__init__(
            role='Knowledge Graph Explorer',
            goal='Navigate and extract relevant information from the knowledge graph',
            backstory="""You are an expert at traversing knowledge graphs and finding
            connections between different concepts and entities.""",
            llm=llm,
            verbose=True,
            tools=[
                Tool(
                    name="explore_graph",
                    func=self.explore_graph,
                    description="Explore the knowledge graph based on query analysis"
                )
            ],
            memory={'knowledge_graph': knowledge_graph}
        )
        
        
    def explore_graph(self, query_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Explore the knowledge graph based on query analysis"""
        kg = self.memory.get('knowledge_graph')
        
        # Create a comprehensive query from the analysis
        kg_query = ' '.join([
            *query_analysis.get('main_topics', []),
            *query_analysis.get('capabilities', []),
            *query_analysis.get('resources', [])
        ])
        
        subgraph = self.kg.get_subgraph_for_query(kg_query)
        results = []
        for node in subgraph.nodes:
            node_data = subgraph.nodes[node]
            results.append({
                'name': node,
                'type': 'graph_node',
                'data': node_data
            })
        return results

class DocumentRetrievalAgent(Agent):
    def __init__(self, llm, vector_store: Chroma):
        super().__init__(
            role='Document Retriever',
            goal='Retrieve and rank relevant documents based on query analysis',
            backstory="""You are an expert at finding the most relevant documents
            and ranking them based on their relevance to the query.""",
            llm=llm,
            verbose=True,
            tools=[
                Tool(
                    name='retrieve_documents',
                    func=self.retrieve_documents, 
                    description='Retrieve relevant documents based on query analysis'
                )
            ],
            memory = {'vector_store': vector_store}
        )

        
    def retrieve_documents(self, query_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Retrieve relevant documents based on query analysis"""
        # Combine different aspects of the query for comprehensive search
        vector_store = self.memory.get('vector_store')
        
        search_query = ' '.join([
            *query_analysis.get('main_topics', []),
            *query_analysis.get('capabilities', []),
            *query_analysis.get('resources', [])
        ])
        
        results = vector_store.similarity_search(search_query, k=5)
        return [
            {
                'title': doc.metadata.get('title', 'No Title'),
                'content': doc.page_content,
                'type': 'document',
                'metadata': doc.metadata
            }
            for doc in results
        ]

In [34]:
'''class SearchAgent:
    def __init__(self, llm, knowledge_graph: MakerspaceKG, vector_store):
        self.llm = llm
        self.kg = knowledge_graph
        self.vector_store = vector_store
        self.router = AutoRAGRouter()
        
    def search(self, query: str) -> List[Dict[str, Any]]:
        # Determine search strategy
        query_type = self.router.determine_query_type(query)
        
        # Execute appropriate search strategy
        if query_type == QueryType.GRAPH:
            results = self._graph_search(query)
        elif query_type == QueryType.SEMANTIC:
            results = self._semantic_search(query)
        elif query_type == QueryType.KEYWORD:
            results = self._keyword_search(query)
        else:
            results = self._hybrid_search(query)

        if results is None:
            results = []

        return self._format_results(results)
    
    def _graph_search(self, query: str) -> List[Any]:
        try:
            subgraph = self.kg.get_subgraph_for_query(query, self.model)
            if not subgraph:
                return []

            results = []
            for node in subgraph.nodes:
                node_data = subgraph.nodes[node]
                results.append({
                    'name': node,
                    'type': node_data.get('type', 'unknown'),
                    'capabilities': node_data.get('capabilities', []),
                    'resources': node_data.get('resources', [])
                })
            return results
        except Exception as e:
            print(f"Graph search error: {e}")
            return []
    
    def _semantic_search(self, query: str) -> List[Document]:
        try:
            results = self.vector_store.similarity_search(query)
            return results if results else []
        except Exception as e:
            print(f"Semantic search error: {e}")
            return []
    
    def _keyword_search(self, query: str) -> List[Document]:
        # Implement basic keyword search using vector store
        try:
            # Split query into keywords
            keywords = query.lower().split()
            results = self.vector_store.similarity_search(
                ' OR '.join(keywords),
                k=5
            )
            return results if results else []
        except Exception as e:
            print(f"Keyword search error: {e}")
            return []
    
    def _hybrid_search(self, query: str) -> List[Any]:
        # Combine multiple search strategies
        try:
            graph_results = self._graph_search(query)
            semantic_results = self._semantic_search(query)
            return self._merge_results(graph_results, semantic_results)
        except Exception as e:
            print(f"Hybrid search error: {e}")
            return []

    def _format_results(self, results: List[Any]) -> List[Dict[str, Any]]:
        formatted_results = []
        
        for result in results:
            if isinstance(result, Document):
                # Handle document objects (from langchain documents object type)
                formatted_results.append({
                    'title': result.metadata.get('title', 'No Title'),
                    'content': result.page_content,
                    'type': 'document',
                    'metadata': result.metadata
                })
            elif isinstance(result, dict):
                # Handle graph search results
                formatted_results.append({
                    'title': result.get('name', 'No Name'),
                    'capabilities': result.get('capabilities', []),
                    'resources': result.get('resources', []),
                    'type': 'graph_node',
                    'metadata': result
                })
                
        return formatted_results
    
    def _merge_results(self, graph_results: List[Any], semantic_results: List[Document]) -> List[Any]:
        """Merge results from different search strategies"""
        all_results = []
        
        # Add graph results
        all_results.extend(graph_results)
        
        # Add semantic results, avoiding duplicates
        seen_titles = {r.get('title') for r in all_results}
        for doc in semantic_results:
            if doc.metadata.get('title') not in seen_titles:
                all_results.append(doc)
                seen_titles.add(doc.metadata.get('title'))
        
        return all_results

class MakerspaceMatchingAgent:
    def __init__(self, search_agent: SearchAgent):
        self.search_agent = search_agent
        
    def find_optimal_makerspaces(self, requirements: Dict[str, Any]) -> List[Dict[str, Any]]:
        # Analyze requirements
        required_capabilities = requirements.get('capabilities', [])
        required_resources = requirements.get('resources', [])
        
        # Search for potential matches
        matches = []
        for capability in required_capabilities:
            results = self.search_agent.search(capability)
            matches.extend(results)
        
        # here, we would ideally score and rank matches, we can use an open-source reranking model, or develop the algorithm by scratch
        #ranked_matches = self._rank_matches(matches, requirements)
        #return ranked_matches
        return matches
    
    def _rank_matches(self, matches: List[Dict[str, Any]], 
                     requirements: Dict[str, Any]) -> List[Dict[str, Any]]:
        # Implement ranking logic
        pass
'''


'class SearchAgent:\n    def __init__(self, llm, knowledge_graph: MakerspaceKG, vector_store):\n        self.llm = llm\n        self.kg = knowledge_graph\n        self.vector_store = vector_store\n        self.router = AutoRAGRouter()\n        \n    def search(self, query: str) -> List[Dict[str, Any]]:\n        # Determine search strategy\n        query_type = self.router.determine_query_type(query)\n        \n        # Execute appropriate search strategy\n        if query_type == QueryType.GRAPH:\n            results = self._graph_search(query)\n        elif query_type == QueryType.SEMANTIC:\n            results = self._semantic_search(query)\n        elif query_type == QueryType.KEYWORD:\n            results = self._keyword_search(query)\n        else:\n            results = self._hybrid_search(query)\n\n        if results is None:\n            results = []\n\n        return self._format_results(results)\n    \n    def _graph_search(self, query: str) -> List[Any]:\n        try:\n    

In [35]:
class ResponseSynthesisAgent(Agent):
    def __init__(self, llm):
        super().__init__(
            role='Response Synthesizer',
            goal='Synthesize information from multiple sources into coherent responses',
            backstory="""You are an expert at combining and synthesizing information
            from multiple sources to create comprehensive and accurate responses.""",
            llm=llm,
            verbose=True
        )
        
    def synthesize_response(self, 
                          query: str,
                          graph_results: List[Dict[str, Any]],
                          doc_results: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Synthesize final response from multiple sources"""
        synthesis_prompt = f"""
        Based on the following information:
        
        Query: {query}
        
        Graph Results: {json.dumps(graph_results, indent=2)}
        
        Document Results: {json.dumps(doc_results, indent=2)}
        
        Create a comprehensive response that:
        1. Ranks the most relevant matches
        2. Explains why each match is relevant
        3. Highlights any special capabilities or resources
        
        Format as JSON with the following structure:
        {{
            "matches": [
                {{
                    "title": "string",
                    "relevance_score": float,
                    "explanation": "string",
                    "capabilities": ["string"],
                    "resources": ["string"]
                }}
            ],
            "summary": "string"
        }}
        """
        
        result = self.execute_task(synthesis_prompt)
        try:
            return json.loads(result)
        except:
            return {
                "matches": [],
                "summary": "Error synthesizing response"
            }

In [36]:
class EnhancedMakerspaceMatchingAgent:
    def __init__(self, llm, knowledge_graph: MakerspaceKG, vector_store: Chroma):
        # Initialize individual agents
        self.query_analyst = QueryAnalysisAgent(llm)
        self.kg_explorer = KnowledgeGraphAgent(llm, knowledge_graph)
        self.doc_retriever = DocumentRetrievalAgent(llm, vector_store)
        self.synthesizer = ResponseSynthesisAgent(llm)
        
        # Create the crew
        self.crew = Crew(
            agents=[
                self.query_analyst,
                self.kg_explorer,
                self.doc_retriever,
                self.synthesizer
            ],
            process=Process.sequential  # Tasks will be executed in sequence
        )
        
    def find_optimal_makerspaces(self, query: str) -> Dict[str, Any]:
        # Create tasks for the crew
        tasks = [
            Task(
                description="Analyze and structure the search query",
                agent=self.query_analyst,
                expected_output='A JSON object containing query analysis',
                context=[{
                    "description": "Analyze user query for search",
                    "expected_output": "Query analysis in JSON format",
                    "query": query
                }]
            ),
            Task(
                description="Explore knowledge graph for relevant entities",
                agent=self.kg_explorer,
                expected_output='A list of relevant entities from the knowledge graph',
                context=[{
                    "description": "Find relevant entities in knowledge graph",
                    "expected_output": "List of matching entities",
                    "query_analysis": "Will be filled from previous task"
                }]
            ),
            Task(
                description="Retrieve relevant documents",
                agent=self.doc_retriever,
                expected_output='A list of relevant documents',
                context=[{
                    "description": "Search for relevant documents",
                    "expected_output": "List of matching documents",
                    "query_analysis": "Will be filled from previous task"
                }]
            ),
            Task(
                description="Synthesize final response",
                agent=self.synthesizer,
                expected_output='A JSON object containing ranked matches and summary',
                context=[{
                    "description": "Create final response from all sources",
                    "expected_output": "Final synthesized results",
                    "query": query,
                    "graph_results": "Will be filled from KG task",
                    "doc_results": "Will be filled from retrieval task"
                }]
            )
        ]
        
        # Execute the crew's tasks
        try:
            results = self.crew.execute(tasks)
            return results[-1]  # Return the synthesizer's results
        except Exception as e:
            print(f"Error executing crew tasks: {e}")
            return {
                "matches": [],
                "summary": "Error occurred during processing"
            }
            
def setup_enhanced_rag():
    # Initialize components
    documents = load_and_process_documents('./OKWs/')
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    kg = MakerspaceKG(embeddings)
    
    # Add some test data to the knowledge graph
    kg.add_node_with_embedding(
        name="Woodworking Shop",
        description="Specializes in sustainable wood processing and furniture making",
        capabilities=["woodworking", "furniture making"],
        resources=["sustainable wood"]
    )
    
    kg.add_node_with_embedding(
        name="Textile Studio",
        description="Focuses on organic fabric processing and sustainable textile work",
        capabilities=["textile processing"],
        resources=["organic fabrics"]
    )
    
    vector_store = Chroma.from_documents(documents=documents, embedding=embeddings)
    llm = HuggingFacePipeline(pipeline=pipeline(
        "text2text-generation",
        model="google/flan-t5-base"
    ))
    
    #search_agent = SearchAgent(llm, kg, vector_store)
    #matching_agent = MakerspaceMatchingAgent(search_agent)
    matching_agent = EnhancedMakerspaceMatchingAgent(llm, kg, vector_store)
    
    return matching_agent

if __name__ == "__main__":
    matching_agent = setup_enhanced_rag()
    query = 'I need a makerspace with woodworking capabilities with sustainable and eco-friendly materials'
    results = matching_agent.find_optimal_makerspaces(query)

#matches = matching_agent.find_optimal_makerspaces(requirements)

    print("Found matches:")
    for match in results.get("matches", []):
        print(f"\nTitle: {match['title']}")
        print(f"Relevance Score: {match['relevance_score']}")
        print(f"Explanation: {match['explanation']}")
        print(f"Capabilities: {', '.join(match['capabilities'])}")
        print(f"Resources: {', '.join(match['resources'])}")
    
    print(f"\nSummary: {results.get('summary', 'No summary available')}")

Device set to use mps:0
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable
ERROR:root:Failed to get supported params: argument of type 'NoneType' is not iterable



[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Error executing crew tasks: 'Crew' object has no attribute 'execute'
Found matches:

Summary: Error occurred during processing
