# Outline
    ---- Fundamental functionalities ----
    1. Check OpenAI and Pinecone connections
    2. Check embedding
    3. Check vectorbase call
    4. Check vector upsert
    5. Check vector retrieve
    6. Check split by batches
    7. Check token calculation
    ### Integration ###
    8. From text (title,content,guidance:metadata) -> split -> embedding -> vector upsert

    ---- Agents ----
    #  Check Language spliter
    4. Check conversational agent
    5. Check retrieval agent
    6. Check prompts - lanagugage spliter
    7. Check chains
    8. Check sequential chains
    
    ---- Pipeline ----
    9. Pinecone: Check namespace, plot, character, outline | Chapters
    10. Check input output and metadata as stated in the Notion doc.
    11. Check output of each chapter
    12. Add scorer




In [1]:
import os
import numpy as np
import openai
import pinecone
from vectorStore import VectorStore
from langchain.llms import OpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.chains.conversation.memory import ConversationBufferMemory
from transformers import GPT2Tokenizer


  from tqdm.autonotebook import tqdm


In [2]:
# [X] 1. Check API functionalities
vector_db = VectorStore("openai_api.txt", "pinecone_api.txt","book-chapters")
print(vector_db.openai_client, vector_db.pinecone_index)

-----Connected to index: book-chapters-----
<openai.OpenAI object at 0x000001C6D4A43F10> <pinecone.index.Index object at 0x000001C6E65FBEE0>


In [3]:
# [X] 2. Check Embeddings
# [X] 3. Check vectorbase call
# [X] 4. Check vector upsert
# [X] 5. Check vector retrieve
# [] 6. Check split_by_batches

#1. get embeds
embeds = vector_db.get_embedding("Hello, how are you?")
print(embeds)

#2. create_namespace
vector_db.create_namespace("book_temp")

#3. store embeds
vector_db.upsert_embedding("title_temp","Hello, how are you?",override_mode=False)
vector_db.upsert_embedding("title_temp","Hello, how are you?",override_mode=True)

#4. retrieve embeds
title_temp = "title_temp"
retrieval_temp = vector_db.retrieve_embedding(title_temp)
print(retrieval_temp)

#5. split by batches
test_title = "测试章节"
test_text = (
    "在遥远的东方，有一座古老的城市，那里住着一位年轻的冒险家。"
    "他从小就梦想着探索世界的每一个角落，发现隐藏在历史中的秘密。"
    "有一天，他决定离开家乡，开始他期待已久的冒险之旅。"
    "在旅途中，他遇到了许多奇异的景象和各种各样的人物。"
    "每一个新的发现都让他更加坚定自己的信念，那就是追寻真理和智慧。"
    "随着时间的推移，他的名声也逐渐传播开来，成为了家乡的骄傲。"
)
chunks = vector_db.split_text(test_title, test_text, max_length=100)

for chunk_title, chunk in chunks:
    print(f"Chunk Title: {chunk_title}\nChunk Text: {chunk}\n")

# Test if it works for English as well

test_title = "Test Chapter"
test_text = (
    "In a distant land in the East, there was an ancient city where a young adventurer lived. "
    "From a young age, he dreamed of exploring every corner of the world and discovering the secrets hidden in history. "
    "One day, he decided to leave his hometown and embark on the adventure he had long awaited. "
    "During his journey, he encountered many strange sights and various kinds of people. "
    "Each new discovery strengthened his belief in the pursuit of truth and wisdom. "
    "As time went on, his fame gradually spread, becoming the pride of his hometown."
)

chunks = vector_db.split_text(test_title, test_text, max_length=100)

for chunk_title, chunk in chunks:
    print(f"Chunk Title: {chunk_title}\nChunk Text: {chunk}\n")

#6. tokenizer
vector_db.calculate_tokens("Hello, how are you?")
vector_db.calculate_tokens_chinese("测试章节")

  warn_deprecated(


[-0.008593395949274591, -0.0006966988750628289, 0.0034827176523171415, -0.033106012844180875, -0.012054366521828817, 0.01912543289442665, -0.009326600106704667, -0.009270677913565515, -0.01744776337496194, -0.010494755172618442, 0.03151533257509037, 0.010805434126871788, -0.01688853958092535, -0.008549901324087492, 0.007717279882669895, -0.016180190559399375, 0.026494751584648313, -0.0075743670421727334, 0.02967611026018424, -0.012986404315911396, -0.02139960704913902, 0.0033615525900748, 0.01820582080565104, -0.0022943684869731872, 0.002022523819924922, -0.010532036013829517, 0.016615142399205616, -0.015919219082986616, 0.01647844287702321, -0.02646989644874421, 0.00545242593826029, -0.0006982522628491766, -0.007437667985651598, -0.004271843770055733, 0.012091647363039892, -0.02162329582169563, 0.0009941744696896604, -0.009587570651794887, 0.023300965341160332, -0.015322714447738948, 0.022890868637258196, -0.0015751452270738528, 0.013259802428953669, -0.016043491037216973, -0.01401786

4

In [4]:
# [] check Integration
#8. From text (title,content,context :metadata) -> split -> embedding -> vector upsert
text = {
    "title": "Test Chapter",
    "content": (
        "In a distant land in the East, there was an ancient city where a young adventurer lived. "
        "From a young age, he dreamed of exploring every corner of the world and discovering the secrets hidden in history. "
        "One day, he decided to leave his hometown and embark on the adventure he had long awaited. "
        "During his journey, he encountered many strange sights and various kinds of people. "
        "Each new discovery strengthened his belief in the pursuit of truth and wisdom. "
        "As time went on, his fame gradually spread, becoming the pride of his hometown."
    ),
    "context": "yes"
}


{
    "title": "冒险之旅",
    "content": "在遥远的东方，有一座古老的城市，那里住着一位年轻的冒险家。他从小就梦想着探索世界的每一个角落，发现隐藏在历史中的秘密。有一天，他决定离开家乡，开始他期待已久的冒险之旅。在旅途中，他遇到了许多奇异的景象和各种各样的人物。每一个新的发现都让他更加坚定自己的信念，那就是追寻真理和智慧。随着时间的推移，他的名声也逐渐传播开来，成为了家乡的骄傲。",
    "guide": "yes"
}


{'title': '冒险之旅',
 'content': '在遥远的东方，有一座古老的城市，那里住着一位年轻的冒险家。他从小就梦想着探索世界的每一个角落，发现隐藏在历史中的秘密。有一天，他决定离开家乡，开始他期待已久的冒险之旅。在旅途中，他遇到了许多奇异的景象和各种各样的人物。每一个新的发现都让他更加坚定自己的信念，那就是追寻真理和智慧。随着时间的推移，他的名声也逐渐传播开来，成为了家乡的骄傲。',
 'guide': 'yes'}

In [1]:
import json
import os
import numpy as np
import openai
import pinecone
from vectorStore import VectorStore
from langchain.llms import OpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.chains.conversation.memory import ConversationBufferMemory
from transformers import GPT2Tokenizer
from agentCollection import AgentCollection

  from tqdm.autonotebook import tqdm


In [2]:
# [] check Language Spliter
# [] check Language conversational agent

# 示例用法
vector_store = VectorStore("openai_api.txt", "pinecone_api.txt")
novel_agent = AgentCollection(vector_store, "test_prompt.json")

# 设置小说标题
vector_store.create_namespace("冒险之旅")

# 生成小说章节
query = "在遥远的东方，有一座古老的城市，那里住着一位年轻的冒险家。他从小就梦想着探索世界的每一个角落，发现隐藏在历史中的秘密。有一天，他决定离开家乡，开始他期待已久的冒险之旅。在旅途中，他遇到了许多奇异的景象和各种各样的人物。每一个新的发现都让他更加坚定自己的信念，那就是追寻真理和智慧。随着时间的推移，他的名声也逐渐传播开来，成为了家乡的骄傲。"
novel_segment = novel_agent.generate_novel_segment(query)
print("Generated Novel Segment:", novel_segment)


-----Connected to index: book-chapters-----
Namespace created: 冒险之旅


[1m> Entering new AgentExecutor chain...[0m
An error occurred during novel segment generation: Missing some input keys: {'chat_history'}
Generated Novel Segment: None


  warn_deprecated(
  warn_deprecated(
  warn_deprecated(
