# Leiden

In [None]:
import os
import re
import nltk
import time
import json
import torch
import chromadb
import numpy as np
import networkx as nx
import nltk.downloader
import leidenalg as la
import igraph as ig
from datetime import datetime
from model_loader_folder.config import *
from pykeen.pipeline import pipeline
from transformers import AutoTokenizer
from nltk.tokenize import sent_tokenize
from pykeen.triples import TriplesFactory
from chromadb.utils import embedding_functions
from pykeen.optimizers import AdamW as PyKeenAdamW
from typing import List, Dict, Any, Tuple, Optional, Set
from sentence_transformers import CrossEncoder, SentenceTransformer

os.environ["CUDA_VISIBLE_DEVICES"]="1,2"
class CustomEmbeddingFunction(embedding_functions.EmbeddingFunction):
    def __init__(self, embedding_model):
        self.embedding_model = embedding_model
    
    def __call__(self, texts):
        return self.embedding_model.encode(texts).tolist()
    
nltk.download("punkt")
nltk.download("punkt_tab")

generation_loader = generation_loader

class QASystem:
    def __init__(self, 
                 graphml_path: str, 
                 md_path: str,
                 vector_db_path: str = "./chroma_db_split", 
                 similarity_threshold: float = 0.5,
                 chunk_token_threshold: int = 250,
                 embedding_model_path: str = "./model/embedding/bge-m3"):
        self.graphml_path = graphml_path
        self.md_path = md_path
        self.similarity_threshold = similarity_threshold
        self.chunk_token_threshold = chunk_token_threshold
        self.llm_loader = None
        
        self.tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")
        self.reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

        self.embedding_model = SentenceTransformer(embedding_model_path)
        self.custom_embedding_function = CustomEmbeddingFunction(self.embedding_model)

        self.graph = nx.read_graphml(graphml_path)

        G = nx.Graph(self.graph) if self.graph.is_multigraph() else self.graph
        ig_graph = ig.Graph.from_networkx(G)
        partition = la.find_partition(ig_graph, la.ModularityVertexPartition)

        communities = []
        node_names = list(G.nodes())
        for cluster in partition :
            communities.append({node_names[i] for i in cluster})

        self.node_to_community = {}
        self.community_to_nodes = {}
        for i, community_nodes in enumerate(communities) :
            self.community_to_nodes[i] = set(community_nodes)
            for node in community_nodes :
                self.node_to_community[node] = i
                if node in self.graph :
                    self.graph.nodes[node]["community"] = i

        print("#############Leiden Community Info")
        for i, (common_id, nodes) in enumerate(self.community_to_nodes.items()) :
            if i >= 30 :
                break
            print(f"Community {common_id} : {nodes}")

        self.node_name_map = {node.lower(): node for node in self.graph.nodes()}
        self.client = chromadb.PersistentClient(path=vector_db_path)
        
        self.entity_collection = self.client.get_or_create_collection(name="entities_split", embedding_function=self.custom_embedding_function)
        self.relation_collection = self.client.get_or_create_collection(name="relations_split", embedding_function=self.custom_embedding_function)
        self.chunk_collection = self.client.get_or_create_collection(name="chunks", embedding_function=self.custom_embedding_function)
        self.entity_relation_extraction_prompt_template = """
            Extract entities and their relations from the following sentence.

            **Entities** should be **unique nouns or concepts**, extracted as **noun phrases** whenever possible. Identify **concrete objects or concepts** rather than complex activities or phenomena as entities.

            **Relations** should clearly describe the connection between two entities, preferring **reusable predicate verbs** for a knowledge graph. Use **concise verbs** or clear, hyphenated forms like **'part_of' or 'includes'**.

            Output the result **only in the following JSON format**, with no other explanations or text:

            ```json
            {{
                "entities": [
                    {{"name": "Entity1", "type": "Type (e.g., Organ, System, Substance, Function, Disease)"}},
                    {{"name": "Entity2", "type": "Type"}}
                ],
                "relations": [
                    {{"head": "Entity1", "relation": "Relation_Type (e.g., part_of, causes)", "tail": "Entity2"}},
                    {{"head": "Entity3", "relation": "generates", "tail": "Entity4"}}
                ]
            }}

            sentence : "{text_to_analyze}"
            JSON result :
        """

        self._initialize_vector_db()
        # self._initialize_chunk_db()

    def _preprocess_text(self, text: str) -> str:
        return text.upper().replace(' ', '_')

    def _create_chunks_from_text(self, text: str, page_num: str) -> List[Dict[str, Any]] :
        chunks = []
        paragraphs = re.split("\n\n+", text)
        for para in paragraphs :
            para = para.strip()
            if not para :
                continue

            para_tokens = self.tokenizer.tokenize(para)

            if len(para_tokens) <= self.chunk_token_threshold :
                chunks.append({"document": para, "metadata": {"source_page": page_num}})
            else :
                sentences = sent_tokenize(para)
                current_chunk_sentences = []
                current_chunk_tokens = 0

                for sentence in sentences :
                    sentence_tokens = self.tokenizer.tokenize(sentence)

                    if current_chunk_tokens + len(sentence_tokens) > self.chunk_token_threshold and current_chunk_sentences :
                        chunk_text = " ".join(current_chunk_sentences)
                        chunks.append({"document": chunk_text, "metadata": {"source_page": page_num}})
                        current_chunk_sentences = [sentence]
                        current_chunk_tokens = len(sentence_tokens)
                    else :
                        current_chunk_sentences.append(sentence)
                        current_chunk_tokens += len(sentence_tokens)

                if current_chunk_sentences :
                    chunk_text = " ".join(current_chunk_sentences)
                    chunks.append({"document": chunk_text, "metadata": {"source_page": page_num}})

        return chunks

    def _initialize_chunk_db(self) :
        if self.chunk_collection.count() > 0 :
            print("DB가 이미 초기화되어있음")
            return
        
        print("청크 DB 초기화 시작")
        all_chunks = []
        all_md_files = [f for f in os.listdir(self.md_path) if f.endswith(".md")]

        for md_file in all_md_files :
            with open(os.path.join(self.md_path, md_file), 'r', encoding="utf-8") as f :
                content = f.read()

            page_matches = re.finditer(r"####\s+Page\s+(\d+)\b(.*?)(?=####\s+Page|\Z)", content, re.S)
            for match in page_matches :
                page_num = match.group(1).strip()
                page_content = match.group(2).strip()
                if page_content :
                    chunks = self._create_chunks_from_text(page_content, page_num)
                    all_chunks.extend(chunks)

        if all_chunks :
            documents = [chunk["document"] for chunk in all_chunks]
            metadatas = [chunk["metadata"] for chunk in all_chunks]

            ids = [f"chunk_{i}_{datetime.now().timestamp()}" for i in range(len(documents))]
            self.chunk_collection.add(ids=ids, documents=documents, metadatas=metadatas)
        print(f"청크DB 초기화 완료. {self.chunk_collection.count()}개의 청크 추가")

    def _initialize_vector_db(self):
        if self.entity_collection.count() == 0:
            nodes_to_add = []
            unique_nodes = set()
            for node, data in self.graph.nodes(data=True):
                processed_node = self._preprocess_text(node)
                if processed_node not in unique_nodes :
                    metadata = {k: str(v) for k, v in data.items()}
                    metadata['original_name'] = node
                    nodes_to_add.append({'id': processed_node, 'document': node, 'metadata': metadata})
                    unique_nodes.add(processed_node)
            
            if nodes_to_add:
                ids = [item['id'] for item in nodes_to_add]
                documents = [item['document'] for item in nodes_to_add]
                metadatas = [item['metadata'] for item in nodes_to_add]
                self.entity_collection.add(ids=ids, documents=documents, metadatas=metadatas)

        if self.relation_collection.count() == 0:
            edges_to_add = []
            unique_processed_relations = set()
            for u, v, data in self.graph.edges(data=True):
                relation_type = data.get('type')
                if relation_type:
                    processed_relation = self._preprocess_text(relation_type)
                    if processed_relation not in unique_processed_relations :
                        metadata = {'original_name': relation_type}
                        edges_to_add.append({'id': processed_relation, 'document': processed_relation, 'metadata': metadata})
                        unique_processed_relations.add(processed_relation)

            if edges_to_add:
                ids = [item['id'] for item in edges_to_add]
                documents = [item['document'] for item in edges_to_add]
                metadatas = [item['metadata'] for item in edges_to_add]
                self.relation_collection.add(ids=ids, documents=documents, metadatas=metadatas)
    
    def _extract_entities_relations(self, question) :
        prompt = self.entity_relation_extraction_prompt_template.format(text_to_analyze=question)
        raw_llm_output = self._call_llm_generate(prompt)

        try :
            json_start = raw_llm_output.find("{")
            json_end = raw_llm_output.rfind("}") + 1
            if json_start != -1 and json_end != -1 and json_end > json_start :
                json_str = raw_llm_output[json_start:json_end]
                extracted_data = json.loads(json_str)
                return extracted_data.get("entities", []), extracted_data.get("relations", [])
            else :
                print(f"LLM 답변에서 유효한 JSON 형태를 찾을 수 없음 : {raw_llm_output}")
                return [], []
            
        except json.JSONDecodeError as e :
            print(f"개체 추출 과정에서 JSON 디코딩 오류 발생: {e}")
            print(f"오류 발생 원문: {raw_llm_output}")
            return [], []
    
    def _find_pages_from_entities(self, entities: List[Dict[str, Any]]) -> Set[str]:
        query_texts = [e['name'] for e in entities if 'name' in e]
        if not query_texts:
            return set()

        # 1. 벡터 DB에서 유사 엔티티 검색
        entity_results = self.entity_collection.query(
            query_texts=query_texts,
            n_results=5,
            include=["metadatas", "distances"]
        )
        # print(f"쿼리 : {query_texts}")
        # print(f"검색 결과 : {entity_results}")

        similar_entity_names = set()
        if entity_results.get('distances'):
            for i, dists in enumerate(entity_results['distances']):
                for j, dist in enumerate(dists):
                    if dist <= self.similarity_threshold:
                        meta = entity_results['metadatas'][i][j]
                        similar_entity_names.add(meta['original_name'])
        
        # 2. 그래프에서 엔티티를 찾아 페이지 번호 추출
        page_numbers = set()
        # 'source_page' : 페이지 정보
        for entity_name in similar_entity_names:
            lower_entity_name = entity_name.lower()
            if lower_entity_name in self.node_name_map :
                original_case_node_name = self.node_name_map[lower_entity_name]
                node_data = self.graph.nodes[original_case_node_name]
                pages_str = node_data.get("source_page")

                if pages_str :
                    for page in pages_str.split(',') :
                        if page.strip() :
                            page_numbers.add(page.strip())
            else :
                # print(f"{entity_name}을 그래프에서 찾을 수 없음")
                continue
        return page_numbers

    def _retrieve_and_rerank_context(self, question: str, page_numbers: Set[str], top_k_rerank: int = 5) -> List[Dict[str, Any]]:
        if not page_numbers:
            return []

        # 3. 모든 MD 파일 내용을 읽어 하나의 문자열로 합침
        all_md_content = ""
        for md_file in os.listdir(self.md_path):
            if md_file.endswith(".md"):
                with open(os.path.join(self.md_path, md_file), 'r', encoding='utf-8') as f:
                    all_md_content += f.read() + "\n\n"

        # 페이지 번호에 해당하는 내용 추출 및 청킹
        candidate_chunks = []
        for page_num in page_numbers:
            # 정규식을 사용하여 '#### Page X' 형식의 섹션 찾기
            pattern = re.compile(rf"####\s+Page\s+{re.escape(page_num)}\b(.*?)(?=####\s+Page|\Z)", re.S)
            match = pattern.search(all_md_content)
            
            if match:
                page_content = match.group(1).strip()
                # 4. 새로운 규칙에 따라 텍스트 청킹
                chunks = self._create_chunks_from_text(page_content, page_num)
                candidate_chunks.extend(chunks)
        
        if not candidate_chunks:
            return []
            
        # 5. Cross-encoder를 사용하여 재정렬
        rerank_pairs = [(question, chunk['document']) for chunk in candidate_chunks]
        if not rerank_pairs:
            return []

        scores = self.reranker.predict(rerank_pairs)

        reranked_results = []
        for score, chunk in zip(scores, candidate_chunks):
            chunk["rerank_score"] = score
            reranked_results.append(chunk)

        reranked_results.sort(key=lambda x: x["rerank_score"], reverse=True)

        return reranked_results[:top_k_rerank]
    
    def _build_llm_prompt(self, question: str, context: str) -> str:
        prompt = f"""
        You are a helpful assistant who answers questions based on the provided context.
        You MUST cite the source page number for every piece of information you use.

        **Instructions:**
        1. Answer the user's question clearly and concisely using ONLY the provided context and knowledge graph information.
        2. For every statement, you MUST provide the source page number in parentheses, like this: (Page XX).
        3. If a single piece of information is supported by multiple pages, cite all of them: (Page X, Y, Z).
        4. If no context is available, state that you are answering based on the graph structure alone.

        **Example of a GOOD answer:**
        The ductus arteriosus degenerates into the ligamentum arteriosum after birth(page 360). This is a normal physiological change that happens post-delivery(page 361).

        **Example of a BAD answer:** -> (This is a bad answer because it lacks the mandatory citation)
        The ductus arteriosus becomes the ligamentum arteriosum.

        ---
        **Context:**
        {context}
        ---
        **Question:**
        {question}
        ---
        **Answer:**
        """
        return prompt.strip()
    
    def _call_llm_generate(self, prompt: str) -> str:
        if self.llm_loader:
            if hasattr(self.llm_loader, "tokenizer") and hasattr(self.llm_loader, "model"):
                tokenizer = self.llm_loader.tokenizer
                model = self.llm_loader.model

                input_ids = tokenizer.encode(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
                attention_mask = (input_ids != tokenizer.pad_token_id).long().to(model.device)

                output = model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=500,
                    temperature=0.0,
                    do_sample=False,
                    top_p=0.85,
                    repetition_penalty=1.2,
                    early_stopping=True,
                    num_beams=3,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id
                )
                generated_ids = output[0][input_ids.shape[-1]:]
                raw_answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
                return raw_answer
            else:
                raw_answer = self.llm_loader.generate(prompt)
                return raw_answer
        else:
            print("generation_loader가 로드되지 않음")
            return "LLM 로더가 설정되지 않았습니다."

    def _leiden_expand_entities(self, initial_entity_names: List[str]) -> Set[str] :
        if not hasattr(self, "node_to_community") :
            return set()
        
        expanded_community_entities = set()
        target_community_ids = set()

        for name in initial_entity_names :
            lower_name = name.lower()
            if lower_name in self.node_name_map :
                original_case_name = self.node_name_map[lower_name]
                if original_case_name in self.node_to_community :
                    community_id = self.node_to_community[original_case_name]
                    target_community_ids.add(community_id)

        for comm_id in target_community_ids :
            expanded_community_entities.update(self.community_to_nodes.get(comm_id, set()))

        return expanded_community_entities

    def _expand_entities(self, initial_entities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        if not initial_entities:
            return []

        initial_entity_names = [e['name'] for e in initial_entities]
        prompt = f"""
        Given the following list of medical or anatomical entities, provide a list of related or synonymous entities.
        This will be used to improve search recall in a knowledge base.
        Focus on providing closely related concepts, components, or alternative names.
        Provide the output as a simple comma-separated list, without numbers or bullets.

        Initial Entities: {', '.join(initial_entity_names)}

        Expanded Entities:
        """

        raw_llm_output = self._call_llm_generate(prompt.strip())
        llm_expanded_entity_names = {name.strip() for name in raw_llm_output.split(',')}
        louvain_expanded_entity_names = self._leiden_expand_entities(initial_entity_names)
        all_entity_names = set(initial_entity_names)
        all_entity_names.update(llm_expanded_entity_names)
        all_entity_names.update(louvain_expanded_entity_names)

        final_entities = [{'name': name} for name in all_entity_names]

        return final_entities

    def generate_response(self, question: str) -> Tuple[str, str]:
        # 질문에서 엔티티 추출
        initial_entities, _ = self._extract_entities_relations(question)
        if not initial_entities:
            return "질문에서 유효한 엔티티를 추출할 수 없습니다.", ""

        # 엔티티 확장
        expanded_entities = self._expand_entities(initial_entities)

        page_numbers = self._find_pages_from_entities(expanded_entities)
        if not page_numbers :
            return "페이지를 찾을 수 없음", ""
        print(f"찾은 페이지 : {page_numbers}")

        reranked_chunks = self._retrieve_and_rerank_context(question, page_numbers, top_k_rerank=5)
        if not reranked_chunks:
            return "관련 페이지는 찾았으나, 질문과 직접적으로 연관된 문맥이 없음", ""

        final_context_parts = []
        current_len = 0

        if hasattr(self.llm_loader, 'tokenizer') and self.llm_loader.tokenizer is not None:
            print("LLM 로더의 특정 토크나이저를 사용하여 길이를 계산합니다.")
            llm_tokenizer = self.llm_loader.tokenizer
            max_len = getattr(llm_tokenizer, 'model_max_length', 4096) - 500

            base_prompt = self._build_llm_prompt(question, "")
            base_prompt_len = len(llm_tokenizer.tokenize(base_prompt))
            current_len += base_prompt_len

            for chunk in reranked_chunks:
                page_num = chunk['metadata'].get('source_page', 'N/A')
                context_snippet = f"... {chunk['document']} ... (출처: Page {page_num})"
                chunk_token_len = len(llm_tokenizer.tokenize(context_snippet))

                if current_len + chunk_token_len <= max_len:
                    final_context_parts.append(context_snippet)
                    current_len += chunk_token_len
                else:
                    break
        else:
            print("범용 토크나이저를 사용하여 길이를 근사치로 계산합니다. (Ollama 등)")
            proxy_tokenizer = self.tokenizer  
            max_len = 2048 - 500

            for chunk in reranked_chunks:
                page_num = chunk['metadata'].get('source_page', 'N/A')
                context_snippet = f"... {chunk['document']} ... (출처: Page {page_num})"
                chunk_token_len = len(proxy_tokenizer.tokenize(context_snippet))

                if current_len + chunk_token_len <= max_len:
                    final_context_parts.append(context_snippet)
                    current_len += chunk_token_len
                else:
                    break

        if not final_context_parts:
            return "관련 정보를 찾았으나, 모델의 입력 길이 제한으로 인해 컨텍스트를 구성할 수 없습니다.", ""

        context = "\n\n".join(final_context_parts)

        prompt = self._build_llm_prompt(question, context)

        answer = self._call_llm_generate(prompt)
        return answer, context

def save_results_to_file(question: str, answer: str, context: str, output_dir: str, file_index: int):
    os.makedirs(output_dir, exist_ok=True)

    timestamp = datetime.now().strftime("%H%M%S_%f")
    file_name = f"result_{file_index}_{timestamp}.txt"
    file_path = os.path.join(output_dir, file_name)
    
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(f"[질문]\n{question}\n\n")
        f.write(f"[답변]\n{answer}\n\n")
        f.write(f"[근거]\n{context}\n\n")

if __name__ == '__main__':
    qa_system = QASystem(
        graphml_path="./data/knowledge_graph/knowledge_graph_1.graphml",
        md_path="./data/split_file/anatomy/"
    )
    
    qa_system.llm_loader = generation_loader

    questions = [
        ############## 1_Embryology.md
        "What are the two essential components of a higher organism cell as defined in the text?", # 7페이지
        "Describe the four main phases of indirect cell division (karyokinesis) as outlined in the text.", # 7페이지
        "What is the primary role of the yolk-sac in the embryo's early development?", # 20페이지
        "How does the embryo separate from the yolk-sac, and what does the enclosed part of the yolk-sac form?", # 19페이지
        "What significant developments occur in a human embryo during the Second Week?", # 33페이지
        "What are the key characteristics of the human embryo by the end of the Third Week?", # 33페이지
        
        ############## 2_Osteology.md
        "What are the three groups into which the cells of a primitive segment differentiate, and what do they form?", # 38페이지
        "How is each vertebral body formed from primitive segments during development?", # 38페이지
        "What are the sphenoidal air sinuses, and where are they located within the sphenoid bone?", # 88페이지
        "Describe the sphenoidal rostrum and its articulation.",# 88
        "What is the tibia, and where is it located in the human leg?", # 158
        "Describe the superior articular surface of the tibia's upper extremity.", # 158

        ############## 3_Syndesmology.md
        "What are joints or articulations, and how are immovable joints characterized?", # 174
        "How does the articular lamella differ from ordinary bone tissue?", # 174
        "Where is the synovial membrane located in relation to the glenoid cavity and humerus, and how does it interact with the Biceps brachii tendon?", # 207
        "List some of the bursae located near the shoulder-joint and specify which ones communicate with the synovial cavity.", # 207
        "What is the function of the plantar calcaneonavicular ligament, and what condition results if it yields?", # 236
        "How are the navicular bone and the three cuneiform bones connected, and what type of movement do they permit?", # 236

        ############## 4_Myology.md
        "How does the nervous system serve as an indicator for the origin and migration paths of developing muscles, despite not influencing muscle differentiation?", # 250
        "Describe the structural components of striped or voluntary muscle, from bundles to individual fibers.", # 250
        "What is the triangular ligament and where is it located?", # 290
        "What structures perforate the superficial layer (inferior fascia) of the urogenital diaphragm?", # 290
        "Where does the Extensor digitorum longus muscle originate, and what structures are located between it and the Tibialis anterior?", # 322
        "What is the Peronæus tertius, and where is it inserted?", # 322

        ############## 5_Angiology.md
        "What are the main characteristics of the middle coat (tunica media) of arteries, and how does its composition vary with vessel size?", # 334
        "Describe the composition and variations of the external coat (tunica adventitia) in arteries.", # 334
        "How do the Vitelline Veins develop into parts of the portal and hepatic veins?", # 345
        "What happens to the Umbilical Veins during embryonic development and after birth?", # 345
        "What are the three phases of a cardiac cycle and what happens during each?", # 358
        "What are the main peculiarities observed in the fetal heart's vascular system?" # 359
    ]   
    today = datetime.now()
    folder_name = f"{today.month}월{today.day}일"
    output_dir = os.path.join("./result", "knowledge_graph", folder_name, 'leiden')
    total_time = 0
    for i, q in enumerate(questions):
        print(f"질문: {q}")
        start_time = time.time()
        response, context = qa_system.generate_response(q)
        end_time = time.time()
        elapse_time = end_time - start_time
        total_time += elapse_time
        print("=" * 30)
        print(f"답변: {response}\n\n\n")
        print("=" * 30)
        save_results_to_file(q, response, context, output_dir, i + 1)
    avg_time = total_time / len(questions)
    print("평균 답변 시간 : %.2f" % avg_time)

  from .autonotebook import tqdm as notebook_tqdm




Loading checkpoint shards: 100%|██████████| 4/4 [00:01<00:00,  2.17it/s]
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


#############Leiden Community Info
Community 0 : {'inferior nasal conchæ', 'occipital', 'frontal process', 'dark band', 'maxillæ', 'posteriorly', 'membranes of Krause', 'median plane', 'conchal crest', 'incisive foramen', 'lower part of the bone', 'lacrimal bone', 'planum occipitale', 'lacrimal sulcus', 'uncinate process', 'great wings', 'cleft', 'calcium phosphate', 'arches of the vertebræ', 'nasopalatine nerve', 'descending palatine vessels', 'superior sagittal sinus', 'lateral margin', 'zygomatic', 'pterygoid processes', 'membrane', 'calcium carbonate', 'Perpendicular Plate', 'fibers', 'nasal crest', 'nasopalatine groove', 'Ethmoid', 'edges of the sagittal sulcus', 'Tensor veli palatini', 'Haversian canal', 'septal cartilage', 'lacrimal', 'sphenoid', 'connective tissue', 'frontal bone', 'Maxilla', 'nasal cavities', 'parietals', 'pterygoid process', 'quadrilateral', 'Inferior Nasal Concha', 'nasal septum', 'palatine bones', 'alveolus', 'lacrimals', 'hard palate', 'ethmoidal process o

Token indices sequence length is longer than the specified maximum sequence length for this model (851 > 512). Running this sequence through the model will result in indexing errors


찾은 페이지 : {'53', '95', '128', '6', '13', '26', '48', '150', '8', '129', '9', '7', '340'}
범용 토크나이저를 사용하여 길이를 근사치로 계산합니다. (Ollama 등)
답변: A cell in higher organisms is defined as having two essentials: cytoplasm and a nucleus (Page 6). The cytoplasm is a soft, jelly-like material, and the nucleus is a small spherical body embedded within it (Page 6).



질문: Describe the four main phases of indirect cell division (karyokinesis) as outlined in the text.
찾은 페이지 : {'6', '13', '8', '44', '9', '7', '340'}
범용 토크나이저를 사용하여 길이를 근사치로 계산합니다. (Ollama 등)
답변: Indirect cell division, or karyokinesis, consists of four phases. These phases begin with changes in the nucleus leading to its subdivision, followed by cleavage of the cell protoplasm (Page 7). The four phases are briefly grouped as follows (Page 7): prophase (I to III), metaphase (IV), anaphase (V and VI), and telophase (VII and VIII) (Page 7). During telophase, the cell protoplasm constricts around the achromatic spindle, dividing the original ce

: 