In [2]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import DataFrameLoader
import pandas as pd
import torch
import transformers
from transformers import AutoTokenizer
from time import time
from langchain.llms import HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers import EnsembleRetriever
from langchain.retrievers.multi_query import MultiQueryRetriever
from io import StringIO

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Use an open source embeddings function that will run locally.
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [4]:
# The Statute Numbers look like floats to the default importer, so we'll define the shape of the data.
dtype_dict = {'Statute Number': 'object', 'Type': 'object', 'Category': 'object', 'Text': 'object'}

# Read the CSV file of all citations in KRS Title VIII
df_citations = pd.read_csv('krs/gov.ky.krs.title.08.citations.csv', dtype=dtype_dict)

# Check the first few rows
print(df_citations.head())

  Statute Number        Type               Category  \
0         61.010  Annotation           Construction   
1         61.010  Annotation            Application   
2         61.010  Annotation  Against Public Policy   
3         61.010  Annotation  Against Public Policy   
4         61.010  Annotation  Against Public Policy   

                                                Text  
0  By virtue of this section, attorneys have no l...  
1  This section should be read into every act cre...  
2  Assignment of official fees in futuro to a tru...  
3  Assignment of official salary in futuro to cre...  
4  Contract and notes executed to candidate who a...  


In [7]:
# Filter rows by type to put into separate stores
df_annotations = df_citations[(df_citations['Type'] == 'Annotation') & (df_citations['Statute Number'].isin(['61.870', '61.871', '61.8715', '61.874', '61.8745', '61.8746', '61.876', '61.878', '61.880', '61.882', '61.884']))]
df_opinions = df_citations[(df_citations['Type'] == 'Attorney General Opinion') & (df_citations['Statute Number'].isin(['61.870', '61.871', '61.8715', '61.874', '61.8745', '61.8746', '61.876', '61.878', '61.880', '61.882', '61.884']))]
print(df_annotations.head())

     Statute Number        Type           Category  \
2383         61.870  Annotation  Constitutionality   
2384         61.870  Annotation  Constitutionality   
2385         61.870  Annotation     Public Records   
2386         61.870  Annotation     Public Records   
2387         61.870  Annotation     Public Records   

                                                   Text  
2383  Pre-amendment version of Ky. Rev. Stat. Ann. §...  
2384  Former Ky. Rev. Stat. Ann. § 61.870(1(h) was n...  
2385  Due process does not require an appellate cour...  
2386  The custody and control of the records generat...  
2387  The materials compiled for purposes of reviewi...  


In [8]:
loader_annotations = DataFrameLoader(df_annotations, page_content_column="Text")
loader_opinions = DataFrameLoader(df_opinions, page_content_column="Text")
annotations = loader_annotations.load()
opinions = loader_opinions.load()

In [11]:
db_annotations = Chroma.from_documents(annotations, embedding_function, persist_directory='db/kora-annotations')
db_opinions = Chroma.from_documents(opinions, embedding_function, persist_directory='db/kora-opinions')

In [18]:
print(df_annotations.to_dict('records'))


[{'Statute Number': '61.870', 'Type': 'Annotation', 'Category': 'Constitutionality', 'Text': 'Pre-amendment version of Ky. Rev. Stat. Ann. § 61.870(1)(h) was constitutional as it was sufficiently definite that a common man of ordinary intelligence could read and subscribe meaning to it. Pike Cnty. Fiscal Court v. Util. Mgmt. Grp., LLC, 2015 Ky. App. LEXIS 88 (Ky. Ct. App. June 12, 2015, sub. op., 2015 Ky. App. Unpub. LEXIS 880 (Ky. Ct. App. June 12, 2015).'}, {'Statute Number': '61.870', 'Type': 'Annotation', 'Category': 'Constitutionality', 'Text': "Former Ky. Rev. Stat. Ann. § 61.870(1(h) was not unconstitutional for being vague because the statute was not void-as-unintelligible, as (1) the people the statute affected could understand the statute, and (2) courts could deduce the legislature's will, as undefined terms found to render the statute unintelligible were commonly defined. Util. Mgmt. Grp., LLC v. Pike Cty. Fiscal Court, 531 S.W.3d 3, 2017 Ky. LEXIS 441 (Ky. 2017)."}, {'Stat