In [5]:
from langchain.llms import OpenAI
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
from pathlib import Path

article_path = "data/On-algorithmic-fairness-in-medical-practice-2.pdf"
loader = UnstructuredPDFLoader(article_path)
data = loader.load()

detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.


In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

In [8]:
from langchain.embeddings import OpenAIEmbeddings
persist_directory = 'db'
embedding = OpenAIEmbeddings()

try:
    vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
except:
    vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
    vectordb.persist()

Using embedded DuckDB with persistence: data will be stored in: db


In [9]:
from langchain.chains import VectorDBQA, RetrievalQA
qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)



In [12]:
qa.run("Can you explain the problem with fairness?")

' The problem with fairness is that it is not a uniform concept and there are a bundle of different criteria about what should count as fair. Additionally, there is disagreement within the literature on how to ensure fairness in algorithmic decisionmaking and what should be the reference classes for the comparison when determining fairness.'

In [13]:
qa.run("Please provide a two-paragraph summary of the entire paper.")

' The paper looks at the issue of algorithmic bias in medical practice. In the first section, the authors outline the different mechanisms of algorithmic bias and distinguish between formal, substantive, and normative notions of algorithmic bias. In the second section, they examine the different standards of fairness and argue that restoring fairness requires more than merely mitigating the differences in the algorithm’s predictive accuracy. They state that a wider array of normative criteria needs to be taken into account. The paper has three objectives: providing an account of algorithmic discrimination, examining the underlying mechanisms of algorithmic bias, and identifying the appropriate normative standards for fair algorithmic decision-making in medical practice. The authors suggest that developers should be aware of the larger social context when choosing the target category for an algorithm, and that machine-learning algorithms need to be able to capture potentially discrimina