<a href="https://colab.research.google.com/github/takiguchi-yu/google-colab/blob/main/RAG_over_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
%pip install --upgrade --quiet  langchain-openai tiktoken chromadb langchain GitPython

# Set env var OPENAI_API_KEY or load from a .env file
# import dotenv

# dotenv.load_dotenv()

In [5]:
from git import Repo
from langchain.text_splitter import (
    Language,
    RecursiveCharacterTextSplitter,
)
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
import os

# Git Clone
repo_path = "./repo"
if not os.path.exists(repo_path):
  Repo.clone_from("https://github.com/takiguchi-yu/cognito-google-federation-example", to_path=repo_path)
else:
  repo = Repo(repo_path)
  repo.remotes[0].pull()



In [6]:
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".py", ".js", ".ts", ".tsx", ".css", ".php", ".md", ".json", ".html"],
    exclude=["**/.git"],
    parser=LanguageParser()
)
docs = loader.load()
len(docs)
docs[0]

Document(page_content='## Overview\n\n```mermaid\nsequenceDiagram\n    participant Frontend\n    participant Backend\n    participant Cognito\n    participant Google\n    Frontend->>Backend: /auth/login<br>Request unique login URL (Cognito Hosted UI)\n    Backend->>Cognito: /oauth2/authorize<br>Request unique login URL (Cognito Hosted UI)\n    Cognito->>Backend: Return unique login URL (Cognito Hosted UI)\n    Backend->>Frontend: Return unique login URL (Cognito Hosted UI)\n    Frontend->>Cognito: Redirect to login URL (Cognito Hosted UI)\n    Cognito->>Cognito: Check internally if user already authenticated with Google account recently\n    Cognito->>Google: If authorization expired or never happened,<br/> redirect to Google Hosted UI for authorization.<br/>User enters Google credentials and approves Cognito to read his Google email.\n    Google->>Cognito: Internal redirect. <br/>Provisions the Google user to Cognito User Pool and map fields like "email" to Cognito user fields\n    Co

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=200, chunk_size=2048)
texts = text_splitter.split_documents(docs)
len(texts)
texts[0]

Document(page_content='## Overview\n\n```mermaid\nsequenceDiagram\n    participant Frontend\n    participant Backend\n    participant Cognito\n    participant Google\n    Frontend->>Backend: /auth/login<br>Request unique login URL (Cognito Hosted UI)\n    Backend->>Cognito: /oauth2/authorize<br>Request unique login URL (Cognito Hosted UI)\n    Cognito->>Backend: Return unique login URL (Cognito Hosted UI)\n    Backend->>Frontend: Return unique login URL (Cognito Hosted UI)\n    Frontend->>Cognito: Redirect to login URL (Cognito Hosted UI)\n    Cognito->>Cognito: Check internally if user already authenticated with Google account recently\n    Cognito->>Google: If authorization expired or never happened,<br/> redirect to Google Hosted UI for authorization.<br/>User enters Google credentials and approves Cognito to read his Google email.\n    Google->>Cognito: Internal redirect. <br/>Provisions the Google user to Cognito User Pool and map fields like "email" to Cognito user fields\n    Co

In [8]:
!wget https://huggingface.co/TheBloke/CodeLlama-13B-Instruct-GGUF/resolve/main/codellama-13b-instruct.Q4_K_M.gguf



In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir

In [None]:
!pip install gpt4all

In [11]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import GPT4AllEmbeddings, LlamaCppEmbeddings, GPT4AllEmbeddings

db = Chroma.from_documents(texts, embedding = GPT4AllEmbeddings())
# db.get(where={"source": "repo/aws-cdk/lib"})  # メタデータのフィルタリング
retriever = db.as_retriever(
    search_type="mmr", # Also test "similarity"
    search_kwargs={"k": 8},
)


In [12]:
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = LlamaCpp(
    # モデルファイルへのパス
    model_path="codellama-13b-instruct.Q4_K_M.gguf",
    # コンテキストウィンドウ内のトークン数
    n_ctx=5000,
    # 使用するGPUレイヤー数
    n_gpu_layers=30,
    # 推論のバッチサイズ
    n_batch=512,
    # キーと値のストレージには半精度浮動小数点を使用する
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    # コールバックを処理するためのコールバック マネージャー
    callback_manager=callback_manager,
    # Verbose logging
    verbose=True,
)

llama_model_loader: loaded meta data with 20 key-value pairs and 363 tensors from codellama-13b-instruct.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = codellama_codellama-13b-instruct-hf
llama_model_loader: - kv   2:                       llama.context_length u32              = 16384
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 5120
llama_model_loader: - kv   4:                          llama.block_count u32              = 40
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 13824
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.atten

In [13]:
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [19]:
question = "What is aws-cdk?"
result = qa(question)
result['answer']

Llama.generate: prefix-match hit


 What is aws-cdk?




llama_print_timings:        load time =    1305.75 ms
llama_print_timings:      sample time =       5.22 ms /    10 runs   (    0.52 ms per token,  1915.71 tokens per second)
llama_print_timings: prompt eval time =    1852.24 ms /   314 tokens (    5.90 ms per token,   169.52 tokens per second)
llama_print_timings:        eval time =    3021.61 ms /     9 runs   (  335.73 ms per token,     2.98 tokens per second)
llama_print_timings:       total time =    4935.02 ms /   323 tokens
Llama.generate: prefix-match hit


  AWS-CDK is an open-source software development framework used to define the infrastructure resources and artifacts using a programming language such as JavaScript or TypeScript and synthesized into AWS CloudFormation stacks or AWS Cloud Development Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Envir


llama_print_timings:        load time =    1305.75 ms
llama_print_timings:      sample time =     145.60 ms /   256 runs   (    0.57 ms per token,  1758.21 tokens per second)
llama_print_timings: prompt eval time =   35858.37 ms /  3783 tokens (    9.48 ms per token,   105.50 tokens per second)
llama_print_timings:        eval time =  123523.76 ms /   255 runs   (  484.41 ms per token,     2.06 tokens per second)
llama_print_timings:       total time =  161393.33 ms /  4038 tokens
Llama.generate: prefix-match hit



The human asks what the AI thinks of aws-cdk. The AI thinks aws-cdk is an open-source software development framework used to define the infrastructure resources and artifacts using a programming language such as JavaScript or TypeScript and synthesized into AWS CloudFormation stacks or AWS Cloud Development Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Enviro


llama_print_timings:        load time =    1305.75 ms
llama_print_timings:      sample time =     146.70 ms /   256 runs   (    0.57 ms per token,  1745.06 tokens per second)
llama_print_timings: prompt eval time =    4505.88 ms /   680 tokens (    6.63 ms per token,   150.91 tokens per second)
llama_print_timings:        eval time =   96006.07 ms /   255 runs   (  376.49 ms per token,     2.66 tokens per second)
llama_print_timings:       total time =  102384.78 ms /   935 tokens


'  AWS-CDK is an open-source software development framework used to define the infrastructure resources and artifacts using a programming language such as JavaScript or TypeScript and synthesized into AWS CloudFormation stacks or AWS Cloud Development Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Environment Envi