# LangChain - Context Compression

In [1]:
from langchain.vectorstores import Chroma
from langchain.document_loaders import WikipediaLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

In [2]:
api_key = open('../../api_key.txt').read()
embedding_function = OpenAIEmbeddings(openai_api_key=api_key)

In [3]:
db_connection = Chroma(persist_directory='./mkultra/',
                      embedding_function=embedding_function)

## Context Compression

In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

### Creating Chat Model

In [5]:
model = ChatOpenAI(openai_api_key=api_key, temperature=0)

### Chat Model to LLM Chain Extractor

In [6]:
compressor = LLMChainExtractor.from_llm(model)

### Contextual Compressor Retriever

In [7]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=db_connection.as_retriever()
)

### Similarity Search

In [8]:
docs = db_connection.similarity_search("When was this declassified")
docs[0].page_content

'== Background ==\nIn 1974, a New York Times article was published that accused the CIA of illegal operations committed against US citizens. Authored by Seymour M. Hersh, it documented an intelligence operation against the anti-war movement, as well as "break-ins, wiretapping and the surreptitious inspection of mail" conducted since the 1950s. According to former CIA Official Cord Meyer, these disclosures "Convinced large sections of the American public that the CIA had become a domestic Gestapo and stimulated an overwhelming demand for the wide-ranging congressional investigations that were to follow."Hersh had been tipped off to the possibility of an "in house operation" by an unidentified member of the CIA in spring of 1974. He embarked on an investigation, speaking to sources that included CIA Chief of Counterintelligence James Angleton. Although he was not aware of its existence, Hersh uncovered much information that had been documented in the "Family Jewels", a report ordered by 

In [9]:
compressed_docs = compression_retriever.get_relevant_documents("When was this declassified")
compressed_docs[0].page_content



'In 1974'

In [10]:
compressed_docs[0].metadata['summary']

'The United States President\'s Commission on CIA Activities within the United States was ordained by President Gerald Ford in 1975 to investigate the activities of the Central Intelligence Agency and other intelligence agencies within the United States. The Presidential Commission was led by Vice President Nelson Rockefeller, from whom it gained the nickname the Rockefeller Commission.\nThe commission was created in response to a December 1974 report in The New York Times that the CIA had conducted illegal domestic activities, including experiments on US citizens, during the 1960s. The commission issued a single report in 1975, touching upon certain CIA abuses including mail opening and surveillance of domestic dissident groups. It also publicized Project MKUltra, a CIA mind control research program.\nSeveral weeks later, committees were established in the House and Senate for a similar purpose. White House Personnel, including future Vice President Dick Cheney, edited the results, ex