In [1]:
import torch
#import gradio as gr

from textwrap import fill
from IPython.display import Markdown, display

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    )

from langchain import PromptTemplate
from langchain.chains import ConversationChain
from langchain import HuggingFacePipeline

from langchain.vectorstores import Chroma
from langchain.schema import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredURLLoader
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain

from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

import warnings
warnings.filterwarnings('ignore')
print(torch.cuda.is_available())

True


In [2]:
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
#MODEL_NAME = "NousResearch/Yarn-Mistral-7b-128k"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16,
    #use_flash_attention = True,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=quantization_config
)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin C:\Users\phili\miniconda3\envs\ChildrensBook\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda121_nocublaslt.dll
CUDA SETUP: CUDA runtime path found: C:\Users\phili\miniconda3\envs\ChildrensBook\bin\cudart64_12.dll
CUDA SETUP: Highest compute capability among GPUs detected: 6.1
CUDA SETUP: Detected CUDA version 121
CUDA SETUP: Loading binary C:\Users\phili\miniconda3\envs\ChildrensBook\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda121_nocublaslt.dll...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 2048
generation_config.top_p = 0.95
generation_config.do_sample = True
#generation_config.repetition_penalty = 1.15

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)
pipeline.model.config.pad_token_id = pipeline.model.config.eos_token_id

In [4]:
llm = HuggingFacePipeline(
    pipeline=pipeline,
    )

In [5]:
longchat_template = """<s>[INST]
As a master storyteller, the AI's goal is to craft captivating, magical, and informative narratives for young audiences. The AI adopts a friendly tone, with rich scene descriptions and diverse, relatable characters. Key AI storytelling elements include:

- Emphasizing positive themes: friendship, learning, curiosity, kindness.
- Incorporating age-appropriate humor.
- Using character adventures to present educational content engagingly.
- Acknowledging information limits, underscoring continuous learning.
- Introducing suitable challenges and emotions to develop resilience and empathy.
- Offering varied story endings with hope or moral lessons, avoiding negative content like insults or frightening scenes.

The AI's approach combines entertainment and education, aiming to inspire, amuse, and educate young readers.

Current conversation:
{history}
USER: {input}
AI: [/INST]
"""

In [6]:
longchat_prompt_template = PromptTemplate(
    input_variables=["input", "history"], template=longchat_template
)

In [7]:
# from langchain.chains.conversation.memory import ConversationBufferMemory
# 
# conversation_buf = ConversationChain(
#     llm=llm,
#     memory=ConversationBufferMemory(ai_prefix="AI", human_prefix="USER"),
#     prompt=longchat_prompt_template,
# )

In [8]:
# from langchain.chains.conversation.memory import ConversationSummaryMemory
# 
# conversation_buf = ConversationChain(
# 	llm=llm,
# 	memory=ConversationSummaryMemory(llm=llm, ai_prefix="AI", human_prefix="USER"),
#   prompt=longchat_prompt_template
# )

In [9]:
from langchain.memory import ConversationSummaryBufferMemory

conversation_buf = ConversationChain(
    llm=llm, 
    prompt=longchat_prompt_template,
    memory=ConversationSummaryBufferMemory(
        llm=llm,
        max_token_limit=1300,
        ai_prefix="AI", 
        human_prefix="USER")
)

In [10]:
# from langchain.memory import ConversationKGMemory
# 
# conversation_buf = ConversationChain(
# 	llm=llm,
# 	memory=ConversationKGMemory(llm=llm),
#   prompt=longchat_prompt_template
# )

In [11]:
instruction_prompt = """[INST] Create a children's book set in a magical world with dragons and Korean culture. The story, for young readers, will span 10 chapters, each focused on a unique adventure, blending wonder and Korean cultural elements. Each chapter should:

    Be a narrative of at least 500 words!
    Each chapter should have a open ending!
    Start each chapter with the specific number and title!

The AI must generate only one chapter at a time, pausing after each to await USER direction for the next chapter. 
Create only one detailed chapter. Never produce next chapters before the USER writes something![/INST]"""

In [12]:
special_prompt="""[INST] Title: Magical Adventures in Korean Culture

Objective: Create a 10-chapter children's book set in a magical world infused with Korean culture. Each chapter should be a 500-word narrative with an open ending and a unique title.

AI Role: As a master storyteller, the AI crafts engaging and educational stories for young readers. The tone is friendly, focusing on positive themes like friendship and curiosity, and incorporating humor and educational content. Each story introduces challenges and concludes with hopeful or moral lessons.

Process: The AI must generate only one chapter at a time, pausing after each to await USER direction for the next chapter. Create only one detailed chapter. Never produce next chapters before the USER writes something! 
"""

In [None]:
print(conversation_buf(instruction_prompt)["response"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [None]:
print(conversation_buf(f"[INST] Give me 4 decision possibilities of how the story could continue! Write no more than two sentences. [/INST]")["response"])

In [None]:
import random
num_chapters = 10
num_dec = 4  # number of decisions
letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
if num_dec > len(letters):
    num_dec = len(letters)
for i in range(num_chapters-2):
    decision = random.choice(letters[:num_dec])
    print(f"Decision: {decision}")
    print(conversation_buf(special_prompt + f" Continue the story with decision {decision}. Create the next chapter now! Keep in mind to write at least 500 words! [/INST]")["response"])
    print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
    print(conversation_buf(f"[INST] Give me four decision possibilities of how the story could continue. Write no more than two sentences. [/INST]")["response"])

In [None]:
decision = "c"
print(conversation_buf(f"[INST] Continue the story with decision {decision}. Create the next and last chapter now! Keep in mind to write at least 500 words! Find a happy or sad ending based on the previous decisions. [/INST]")["response"])

In [None]:
print(conversation_buf.memory.buffer)