# Basic Coral RAG 



In [12]:
from fileinput import filename
import os
import getpass
import pandas as pd
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from IPython.display import Image, display
from langgraph.graph import START, END, StateGraph
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from pydantic import Field, BaseModel
from langchain_core.prompts import ChatPromptTemplate
from typing import Optional, TypedDict

In [13]:
# Load environment variables from .env file
load_dotenv()

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("OPENAI_API_KEY")

_set_env("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langchain-academy"

## Step 1: Create 

- Create a very simple workflow that takes a common name and searches a csv to find these fields. 

In [43]:
class AgentState(TypedDict):
    filename: str
    is_valid: bool
    common_name: Optional[str]
    species_id_1: Optional[str]

In [28]:
class SpeciesCodeBook():
    def __init__(self, filename: str):
        self.codebook = pd.read_csv(filename)
        print("self.codebook columns:", self.codebook.columns.tolist())
        
    def search_by_common_name(self, common_name: str):
        """Search for species by common name"""
        # Example: assuming there's a column called 'Common Name' or similar
        print("Searching for common name:", self.codebook['Common Name'].str)
        matches = self.codebook[self.codebook['Common Name'].str.contains(common_name, case=False, na=False)]
        return matches
        
    def get_all_columns(self):
        """Get all column names in the codebook"""
        return self.codebook.columns.tolist()

In [29]:
codebook = SpeciesCodeBook("/Users/rylanlorance/Dear_Ocean/Dear_Ocean_Digital_Coral_Ark_Agent/data/codebook/Master - DCA Metadata Codebook - Master.csv")

self.codebook columns: ['Common Name', 'Grouping', 'Other Search Terms', 'Family', 'Kingdom', 'Phylum', 'Class', 'Sub-Class', 'Order', 'Group Abbre', 'Common Abbre', 'LENGTH', 'LENGTH.1', 'Date of Addition', 'Rank']


In [41]:
def retrieve_species_name_from_common_name(state: AgentState) -> AgentState:
    codebook = SpeciesCodeBook("/Users/rylanlorance/Dear_Ocean/Dear_Ocean_Digital_Coral_Ark_Agent/data/codebook/Master - DCA Metadata Codebook - Master.csv")
    common_name = state.get("common_name", "")
    matches = codebook.search_by_common_name(common_name)
    
    best_match = matches.iloc[0] if not matches.empty else None
    if best_match is not None:
      print("best_match columns:", best_match.index.tolist())
      state["species_id_1"] = best_match.get("Group Abbre", None)

    return state
    
  

In [None]:
init_state = {
  "filename": "Antler Coral Pocillopora eydouxi entangled Hanauma Bay 20210421_25_Roberts_Anke - HAN.JPG",
  "is_valid": True,
  "common_name": "Potter's Angelfish"
}
res = retrieve_species_name_from_common_name(init_state)

self.codebook columns: ['Common Name', 'Grouping', 'Other Search Terms', 'Family', 'Kingdom', 'Phylum', 'Class', 'Sub-Class', 'Order', 'Group Abbre', 'Common Abbre', 'LENGTH', 'LENGTH.1', 'Date of Addition', 'Rank']
Searching for common name: <pandas.core.strings.accessor.StringMethods object at 0x12d0267a0>
best_match columns: ['Common Name', 'Grouping', 'Other Search Terms', 'Family', 'Kingdom', 'Phylum', 'Class', 'Sub-Class', 'Order', 'Group Abbre', 'Common Abbre', 'LENGTH', 'LENGTH.1', 'Date of Addition', 'Rank']
{'filename': 'Antler Coral Pocillopora eydouxi entangled Hanauma Bay 20210421_25_Roberts_Anke - HAN.JPG', 'is_valid': True, 'Common Name': "Potter's Angelfish", 'species_id_1': 'ACORWOR'}
