# Install and Import phidata
Install the phidata library and import necessary modules for working with agents, tools, knowledge, and storage components.

In [None]:
# ! pip install numpy==1.26.4 ipykernel phidata openai ipywidgets duckduckgo-search yfinance crawl4ai lancedb sentence-transformers torch pypdf chromadb duckdb


### Explanation of the Code

The code performs the following steps:

1. **Importing Required Modules**:
    - `os`: Provides functions to interact with the operating system.
    - `load_dotenv` from `dotenv`: Loads environment variables from a `.env` file into the system environment.

2. **Loading Environment Variables**:
    - The `load_dotenv()` function is called to load environment variables from a `.env` file.

3. **Clearing Conflicting Environment Variables**:
    - A list of environment variables (`env_vars_to_clear`) is defined, which includes `OPENAI_API_KEY`, `OPENAI_BASE_URL`, and `OPENAI_API_BASE`.
    - For each variable in the list, the code checks if it exists in the environment. If it does, it prints a warning message and deletes the variable from the environment.

4. **Setting New Environment Variables**:
    - The `OPENAI_API_KEY` is set to the value of the `OPEN_ROUTER_KEY` environment variable.
    - Both `OPENAI_API_BASE` and `OPENAI_BASE_URL` are set to `'https://openrouter.ai/api/v1'`.

5. **Commented-Out Code**:
    - There is an alternative block of code (commented out) that clears the same environment variables and sets `OPENAI_API_KEY` to the value of the `OPEN_AI_KEY` environment variable instead.

### Purpose:
This code ensures that conflicting environment variables are removed and sets up the required environment variables for interacting with the OpenRouter API. It provides flexibility to switch between different API keys and base URLs by modifying the `.env` file or uncommenting the alternative block of code.


In [None]:

import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()


env_vars_to_clear = ['OPENAI_API_KEY', 'OPENAI_BASE_URL', 'OPENAI_API_BASE']
for var in env_vars_to_clear:
    if os.getenv(var):
        print(f"⚠️  Removing conflicting {var}")
        del os.environ[var]


os.environ["OPENAI_API_KEY"] = os.getenv("OPEN_ROUTER_KEY")
# os.environ["OPENAI_API_KEY"] = os.getenv("OPEN_AI_KEY")
os.environ['OPENAI_API_BASE'] = 'https://openrouter.ai/api/v1'
os.environ['OPENAI_BASE_URL'] = 'https://openrouter.ai/api/v1'



# env_vars_to_clear = ['OPENAI_API_KEY', 'OPENAI_BASE_URL', 'OPENAI_API_BASE']
# for var in env_vars_to_clear:
#     if os.getenv(var):
#         print(f"⚠️  Removing conflicting {var}")
#         del os.environ[var]
# os.environ["OPENAI_API_KEY"] = os.getenv("OPEN_AI_KEY")




### Explanation of the Code

The code in the next cell performs the following steps:

1. **Importing Required Modules**:
    - `phi.agent.Agent`: Used to create an agent that can perform reasoning tasks.
    - `phi.tools.Tool`: Provides tools that can be used by the agent.
    - `phi.model.openai.OpenAIChat`: Represents the OpenAI GPT model used for generating responses.
    - `phi.utils.pprint.pprint_run_response`: A utility function to pretty-print the agent's response.

### Purpose:
This code sets up the necessary imports to create and interact with agents that utilize OpenAI's GPT models for reasoning and task execution. It also includes tools and utilities for enhancing the agent's functionality and formatting its output.

In [None]:
from phi.agent import Agent
from phi.tools import Tool
from phi.model.openai import OpenAIChat
from phi.utils.pprint import pprint_run_response

In [None]:
import httpx
import networkx as nx
import pandas as pd
from pathlib import Path
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.csv_tools import CsvTools
from phi.tools.python import PythonTools
from typing import Dict, List, Any
import json

class KnowledgeGraphQueryTools:
    """Custom tools for querying the knowledge graph"""
    
    def __init__(self):
        self.graph = None
        self.df = None
    
    def load_graph_and_data(self, graph_path: str = "movie_kg.graphml", csv_path: str = "imdb.csv"):
        """Load the knowledge graph and original data"""
        try:
            self.graph = nx.read_graphml(graph_path)
            self.df = pd.read_csv(csv_path)
            return f"Loaded graph with {self.graph.number_of_nodes()} nodes and {self.graph.number_of_edges()} edges"
        except Exception as e:
            return f"Error loading graph: {str(e)}"
    
    def query_node_relationships(self, node_name: str, max_depth: int = 2):
        """Find all relationships for a node up to specified depth"""
        if not self.graph:
            return "Graph not loaded. Please load the graph first."
        
        if node_name not in self.graph:
            return f"Node '{node_name}' not found in graph"
        
        relationships = {}
        for depth in range(1, max_depth + 1):
            nodes_at_depth = []
            for node in nx.single_source_shortest_path_length(self.graph, node_name, cutoff=depth):
                if nx.shortest_path_length(self.graph, node_name, node) == depth:
                    nodes_at_depth.append(node)
            relationships[f"depth_{depth}"] = nodes_at_depth
        
        return relationships
    
    def find_common_connections(self, node1: str, node2: str):
        """Find common connections between two nodes"""
        if not self.graph:
            return "Graph not loaded. Please load the graph first."
        
        neighbors1 = set(self.graph.neighbors(node1))
        neighbors2 = set(self.graph.neighbors(node2))
        common = neighbors1.intersection(neighbors2)
        
        return {
            "common_connections": list(common),
            "node1_unique": list(neighbors1 - neighbors2),
            "node2_unique": list(neighbors2 - neighbors1)
        }
    
    def analyze_node_centrality(self, node_type: str = None):
        """Analyze centrality measures for nodes"""
        if not self.graph:
            return "Graph not loaded. Please load the graph first."
        
        centrality_measures = {
            "degree": nx.degree_centrality(self.graph),
            "betweenness": nx.betweenness_centrality(self.graph),
            "closeness": nx.closeness_centrality(self.graph),
            "eigenvector": nx.eigenvector_centrality(self.graph, max_iter=1000)
        }
        
        # Filter by node type if specified
        if node_type:
            filtered_measures = {}
            for measure_name, measure_dict in centrality_measures.items():
                filtered_measures[measure_name] = {
                    node: score for node, score in measure_dict.items() 
                    if self.graph.nodes[node].get('type') == node_type
                }
            return filtered_measures
        
        return centrality_measures
    
    def find_shortest_path_with_context(self, source: str, target: str):
        """Find shortest path with additional context about the relationships"""
        if not self.graph:
            return "Graph not loaded. Please load the graph first."
        
        try:
            path = nx.shortest_path(self.graph, source, target)
            path_details = []
            
            for i in range(len(path) - 1):
                edge_data = self.graph.get_edge_data(path[i], path[i+1])
                path_details.append({
                    "from": path[i],
                    "to": path[i+1],
                    "relationship": edge_data
                })
            
            return {
                "path": path,
                "path_length": len(path) - 1,
                "path_details": path_details
            }
        except nx.NetworkXNoPath:
            return f"No path found between {source} and {target}"

# Create enhanced agent with query capabilities
def create_enhanced_kg_agent():
    imdb_csv = "imdb.csv"
    
    # Initialize query tools
    query_tools = KnowledgeGraphQueryTools()
    
    return Agent(
        name="Enhanced IMDB Knowledge Graph Agent",
        model=OpenAIChat(id="gpt-4o"),
        tools=[
            CsvTools(csvs=[imdb_csv]),
            PythonTools(pip_install=True, save_and_run=True)
        ],
        instructions=[
            "You are an expert at analyzing movie data, creating knowledge graphs, and answering complex queries",
            "You can handle both first-order queries (direct facts) and second-order queries (relationships between relationships)",
            "Always provide comprehensive analysis with visualizations when appropriate",
            
            # Knowledge Graph Creation Instructions
            "For graph creation:",
            "- First, explore the CSV file structure",
            "- Create nodes for Directors, Genres, Actors, and Movies",
            "- Add edges with weights based on ratings/revenue",
            "- Save the graph as GraphML format for later querying",
            
            # Second-Order Query Instructions  
            "For second-order queries, you can analyze:",
            "- Indirect relationships (who worked with whom through common projects)",
            "- Network patterns (clusters, communities, influential nodes)",
            "- Path analysis (how entities are connected through intermediaries)", 
            "- Comparative analysis (similarities between different entities)",
            "- Temporal patterns (if time data is available)",
            
            # Available query functions
            "Available query functions:",
            "- query_node_relationships(): Find multi-hop relationships",
            "- find_common_connections(): Discover shared connections",
            "- analyze_node_centrality(): Identify most influential nodes",
            "- find_shortest_path_with_context(): Analyze connection paths",
            
            "Always visualize results when possible and provide actionable insights"
            "save visuals as png files and return the file paths",
            "Use networkx for graph operations and matplotlib for visualizations",
        ],
        markdown=True,
        show_tool_calls=True,
    )

# Create the enhanced agent
knowledge_graph_agent = create_enhanced_kg_agent()

# Initial knowledge graph creation
print("=== PHASE 1: Creating Knowledge Graph ===")
knowledge_graph_agent.print_response("""
Use only 200 records from the IMDB dataset.

Please analyze the IMDB movie dataset and create a knowledge graph:

1. First, examine the CSV file structure and show columns/sample data
2. Create a knowledge graph with:
   - Directors as nodes  
   - Genres as nodes
   - Actors as nodes
   - Movies as nodes
   - Edge weights based on movie ratings or revenue

3. Visualize using networkx and matplotlib
4. IMPORTANT: Save the graph as 'movie_kg.graphml' for later querying
5. Provide initial insights about most connected nodes

Use appropriate graph layout algorithms for clear visualization.
""", stream=True)

print("\n" + "="*60)
print("=== PHASE 2: Second-Order Query Examples ===")
print("="*60)



In [None]:
# Example second-order queries
second_order_queries = [
    """
    QUERY 1 - Indirect Collaborations:
    Find directors who have never worked directly together but have worked with the same actors. 
    Show the "degrees of separation" between directors through their shared cast members.
    Visualize this as a network showing indirect connections.
    """,
    
    """
    QUERY 2 - Genre Bridge Analysis:
    Identify actors or directors who serve as "bridges" between different genres.
    Find who connects seemingly unrelated genres (e.g., horror and comedy).
    Show the shortest paths between genre clusters.
    """,
    
    """
    QUERY 3 - Influence Propagation:
    Analyze how high-rated movies influence the rating patterns of connected movies.
    Do actors/directors from highly-rated films tend to be in other highly-rated films?
    Create a visualization showing this influence network.
    """,
    
    """
    QUERY 4 - Community Detection:
    Find clusters or communities in the movie network.
    Identify groups of actors/directors who frequently work together.
    Analyze if these communities have distinct characteristics (genres, ratings, time periods).
    """
]

# Execute second-order queries
for i, query in enumerate(second_order_queries, 1):
    print(f"\n--- Executing Query {i} ---")
    knowledge_graph_agent.print_response(query, stream=True)
    print("\n" + "-"*40)

# Interactive query function
def interactive_query():
    """Allow users to input custom second-order queries"""
    print(f"\n{'='*60}")
    print("=== INTERACTIVE SECOND-ORDER QUERY MODE ===")
    print("="*60)
    print("You can now ask complex questions about the movie knowledge graph!")
    print("Examples of second-order queries you can ask:")
    print("- 'How are action movies connected to comedy movies through shared actors?'")
    print("- 'Which director has the most indirect influence in the network?'")
    print("- 'Find the shortest collaboration path between two specific actors'")
    print("- 'What genres tend to cluster together through shared personnel?'")
    print("\nType 'exit' to quit interactive mode")
    
    while True:
        user_query = input("\nEnter your second-order query: ").strip()
        if user_query.lower() == 'exit':
            break
        if user_query:
            print(f"\n--- Processing Query: {user_query} ---")
            knowledge_graph_agent.print_response(user_query, stream=True)

# Uncomment the line below to enable interactive querying
# interactive_query()

print(f"\n{'='*60}")
print("Knowledge Graph Agent with Second-Order Queries Ready!")
print("The agent can now handle complex relational queries beyond simple facts.")
print("="*60)

In [None]:
import httpx
import networkx as nx
import pandas as pd
from pathlib import Path
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.csv_tools import CsvTools
from phi.tools.python import PythonTools

class GraphQueryTools:
    def __init__(self):
        self.graph = None
        self.df = None
    
    def load_graph(self, graph_path="movie_kg.graphml", csv_path="imdb.csv"):
        try:
            self.graph = nx.read_graphml(graph_path)
            self.df = pd.read_csv(csv_path)
            return f"Loaded: {self.graph.number_of_nodes()} nodes, {self.graph.number_of_edges()} edges"
        except:
            return "Graph not found. Create it first."
    
    def find_indirect_connections(self, node1, node2, max_depth=3):
        if not self.graph or node1 not in self.graph or node2 not in self.graph:
            return "Invalid nodes or graph not loaded"
        
        try:
            path = nx.shortest_path(self.graph, node1, node2)
            return {"path": path, "length": len(path)-1}
        except:
            return "No connection found"
    
    def common_neighbors(self, node1, node2):
        if not self.graph:
            return "Graph not loaded"
        
        n1_neighbors = set(self.graph.neighbors(node1))
        n2_neighbors = set(self.graph.neighbors(node2))
        common = n1_neighbors.intersection(n2_neighbors)
        
        return list(common)
    
    def get_node_influence(self, node_type=None):
        if not self.graph:
            return "Graph not loaded"
        
        centrality = nx.degree_centrality(self.graph)
        if node_type:
            return {n: c for n, c in centrality.items() 
                   if self.graph.nodes[n].get('type') == node_type}
        return centrality

# Create streamlined agent
imdb_csv = "imdb.csv"

knowledge_graph_agent = Agent(
    name="IMDB KG Agent",
    model=OpenAIChat(id="gpt-4o"),
    tools=[
        CsvTools(csvs=[imdb_csv]),
        PythonTools(pip_install=True, save_and_run=True)
    ],
    instructions=[
        "Analyze movie data and create knowledge graphs",
        "Handle both direct and indirect relationship queries",
        "Create nodes: Directors, Genres, Actors, Movies",
        "Save graph as 'movie_kg.graphml' for querying",
        "For second-order queries, analyze relationships between relationships"
    ],
    markdown=True,
    show_tool_calls=True,
)

# Phase 1: Create knowledge graph
print("=== Creating Knowledge Graph ===")
knowledge_graph_agent.print_response("""
Use 200 records from IMDB dataset.

1. Load and examine CSV structure
2. Create knowledge graph with Directors, Genres, Actors, Movies as nodes
3. Add edges with weights (ratings/revenue)
4. Save as 'movie_kg.graphml'
5. Visualize and show basic stats
""", stream=True)



In [None]:
# Phase 2: Second-order queries
print("\n=== Second-Order Queries ===")

queries = [
    "Find directors who never worked together but share common actors. Show indirect connections.",
    "Identify actors who bridge different genres. Visualize genre connectivity.",
    "Analyze influence patterns: do high-rated films connect to other high-rated films?",
    "Detect communities of frequently collaborating personnel."
]

for i, query in enumerate(queries, 1):
    print(f"\n--- Query {i} ---")
    knowledge_graph_agent.print_response(query, stream=True)

print("\n=== Ready for Interactive Queries ===")
print("Agent can now handle complex relational questions about the movie network.")
