In [None]:
# # Langchain package
# %pip install -qU langchain
# 
# # Local vector store via Chroma
# %pip install -qU langchain_chroma
# 
# # Local inference and embeddings via Ollama
# %pip install -qU langchain_ollama
# 
# # Web Loader
# %pip install -qU beautifulsoup4

In [1]:
MODEL="llama3.2"

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain import hub
from langchain.document_loaders import PyPDFLoader
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
anamoly_links=[
    "https://medium.com/simform-engineering/anomaly-detection-with-unsupervised-machine-learning-3bcf4c431aff",
    "https://www.stratascratch.com/blog/machine-learning-algorithms-explained-anomaly-detection/"
]

anamology_loader =WebBaseLoader(anamoly_links)

In [6]:
anamoly_langchain_docs = anamology_loader.load_and_split()
anamoly_langchain_docs

[Document(metadata={'source': 'https://medium.com/simform-engineering/anomaly-detection-with-unsupervised-machine-learning-3bcf4c431aff', 'title': 'Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | Medium', 'description': 'Anomaly Detection with Unsupervised Machine Learning Detecting Outliers and Unusual Data Patterns with Unsupervised Learning In an era of big data, anomaly detection has become a crucial capability …', 'language': 'en'}, page_content='Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inSimform Engineering·Our Engineering blog gives an inside look at our technologies from the perspective of our engineers.Anomaly Detection with Unsupervised Machine LearningHiraltalsaniya9 min read·Dec 22, 2023--1ListenShareDetecting Outliers and Unusual Data Patterns with Unsupervised LearningPress enter or click to view i

In [7]:
from langchain_core.documents import Document
from collections import defaultdict

#Group docs by source
grouped_docs = defaultdict(list)
for doc in anamoly_langchain_docs:
    source = doc.metadata.get('source', '')
    grouped_docs[source].append(doc)


In [8]:
for k,v in grouped_docs.items():
    print(f"[{k=}] {v=}")

[k='https://medium.com/simform-engineering/anomaly-detection-with-unsupervised-machine-learning-3bcf4c431aff'] v=[Document(metadata={'source': 'https://medium.com/simform-engineering/anomaly-detection-with-unsupervised-machine-learning-3bcf4c431aff', 'title': 'Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | Medium', 'description': 'Anomaly Detection with Unsupervised Machine Learning Detecting Outliers and Unusual Data Patterns with Unsupervised Learning In an era of big data, anomaly detection has become a crucial capability …', 'language': 'en'}, page_content='Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inSimform Engineering·Our Engineering blog gives an inside look at our technologies from the perspective of our engineers.Anomaly Detection with Unsupervised Machine LearningHiraltalsaniya9 min read·Dec 22, 2023--

In [9]:
# Combine documents with the same source
combined_docs = []
for source, docs in grouped_docs.items():
    combined_content = "\n".join(doc.page_content for doc in docs)
    combined_metadata={}
    # combined_metadata = docs[0].metadata.copy()  # Use metadata from the first document
    combined_metadata['num_chunks'] = len(docs)  # Add number of original chunks
    combined_docs.append(Document(page_content=combined_content, metadata=combined_metadata))

# Replace langchain_docs with the combined documents
langchain_docs = combined_docs

In [10]:
len(langchain_docs)

2

In [11]:
anamoly_doc_obj=langchain_docs[0]
anamoly_doc_obj

Document(metadata={'num_chunks': 4}, page_content='Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inSimform Engineering·Our Engineering blog gives an inside look at our technologies from the perspective of our engineers.Anomaly Detection with Unsupervised Machine LearningHiraltalsaniya9 min read·Dec 22, 2023--1ListenShareDetecting Outliers and Unusual Data Patterns with Unsupervised LearningPress enter or click to view image in full sizeIn an era of big data, anomaly detection has become a crucial capability for unlocking hidden insights and ensuring data integrity. This blog dives into the world of unsupervised machine learning techniques to detect outliers efficiently without labeled data.We introduce key anomaly detection concepts, demonstrate anomaly detection methodologies and use cases, compare supervised and unsupervised models, and provide a step-by-step imp

In [12]:
anamoly_doc_obj.page_content

'Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inSimform Engineering·Our Engineering blog gives an inside look at our technologies from the perspective of our engineers.Anomaly Detection with Unsupervised Machine LearningHiraltalsaniya9 min read·Dec 22, 2023--1ListenShareDetecting Outliers and Unusual Data Patterns with Unsupervised LearningPress enter or click to view image in full sizeIn an era of big data, anomaly detection has become a crucial capability for unlocking hidden insights and ensuring data integrity. This blog dives into the world of unsupervised machine learning techniques to detect outliers efficiently without labeled data.We introduce key anomaly detection concepts, demonstrate anomaly detection methodologies and use cases, compare supervised and unsupervised models, and provide a step-by-step implementation guide using DBSCAN in Python.What is a

In [13]:
from IPython.display import  Markdown
Markdown(anamoly_doc_obj.page_content)

Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inSimform Engineering·Our Engineering blog gives an inside look at our technologies from the perspective of our engineers.Anomaly Detection with Unsupervised Machine LearningHiraltalsaniya9 min read·Dec 22, 2023--1ListenShareDetecting Outliers and Unusual Data Patterns with Unsupervised LearningPress enter or click to view image in full sizeIn an era of big data, anomaly detection has become a crucial capability for unlocking hidden insights and ensuring data integrity. This blog dives into the world of unsupervised machine learning techniques to detect outliers efficiently without labeled data.We introduce key anomaly detection concepts, demonstrate anomaly detection methodologies and use cases, compare supervised and unsupervised models, and provide a step-by-step implementation guide using DBSCAN in Python.What is an anomaly?An anomaly is basically something that’s unusual, doesn’t fit the usual pattern, or stands out because it’s different in a specific category or situation. To explain it simply, let’s look at some clear examples:Think about a collection of smartphones, mostly from Samsung, and then there’s an iPhone. The iPhone is an anomaly because it’s a different brand.Imagine you have a bunch of pens, but one of them is a fancy fountain pen instead of a regular ballpoint pen. That fountain pen is an anomaly because it’s not like the others.What is anomaly detection?Anomaly detection is a technique used to identify data points that are significantly different or “outliers” when compared to the majority of the data in a dataset.Anomaly detection is about finding data points that are different from what is considered normal or expected, and it relies on historical data or established knowledge to determine what falls within the usual range. It plays a crucial role in ensuring the quality and security of data in various domains.Press enter or click to view image in full sizeExample of anomaly detection in server logs:Normal behavior:Website traffic follows a regular pattern.Requests per minute show a predictable trend, with slight increases during peak hours.Anomaly:Suddenly, there is an unusual, significant surge in traffic.This spike in requests per minute is an anomaly in the server logs.Anomaly detection use casesHere are some diverse applications of anomaly detection using machine learning:Event detection in sensor networksManufacturing quality controlHealthcare monitoringSocial media monitoringFraud detectionNetwork intrusion detectionHealthcare monitoringInsurance claim analysisCybersecurity threat detectionIdentity theftTraffic monitoringNetwork intrusion detectionData breachesIntrusion detectionVideo surveillanceThe three settings for anomaly detection, as described by Dr. Thomas Dietterich and his team at Oregon State University in 2018:Supervised Anomaly Detection: In this setting, the anomaly detection model is trained on a labeled dataset, which means that each data point is explicitly marked as either normal or anomalous. The model learns the characteristics of normal data and uses this knowledge to detect anomalies in new, unseen data. Supervised anomaly detection is effective when you have a reliable labeled dataset for training, and it is suitable for scenarios where anomalies are relatively easy to define and identify.ML Algorithm for structured data:- Bayesian networks- k-nearest neighbors (KNN)- Decision treesClean Anomaly Detection: Clean anomaly detection refers to situations where the data is mostly clean and free from noise or errors, making it easier to detect anomalies. In this setting, the focus is on identifying significant deviations from the established normal patterns. Clean anomaly detection is commonly used in applications where the data is well-structured and follows predictable patterns, such as fraud detection in
from the established normal patterns. Clean anomaly detection is commonly used in applications where the data is well-structured and follows predictable patterns, such as fraud detection in financial transactions or quality control in manufacturing.Unsupervised Anomaly Detection: Unsupervised anomaly detection occurs when there are no labeled anomalies in the training data, and the model needs to identify anomalies without prior knowledge of what constitutes an anomaly. The model’s task is to find data points that deviate significantly from the majority of the data, making it suitable for cases where anomalies are rare or poorly understood. ML algorithm for unstructured data:- K-means- One-class support vector machineHere are some common approaches to anomaly detection:Press enter or click to view image in full sizeStatistical methods:Z-Score/Standard Score: This method measures how many standard deviations a data point is away from the mean. Points that fall far from the mean are considered anomalies.Percentiles: Identifying anomalies based on percentiles or quantiles, where values below or above a certain threshold are considered outliers.Machine learning algorithms:Isolation Forest: An ensemble learning method that builds a tree structure to isolate anomalies efficiently.One-Class SVM: A support vector machine (SVM) model trained to classify data points as normal or outliers.K-Nearest Neighbors (KNN): Assigns an anomaly score based on the distance to the K-nearest neighbors, with distant points being potential anomalies.Autoencoders: Neural networks designed to learn a compressed representation of data, where reconstruction error can be used to identify anomalies.Clustering methods:DBSCAN (Density-Based Spatial Clustering of Applications with Noise): Clusters data points based on their density, with points that do not belong to any cluster considered outliers.K-Means Clustering: Data points that do not belong to well-defined clusters may be considered anomalies.Time-series analysis:Moving Averages: Identifying anomalies based on deviations from the moving average or exponential moving average.Seasonal Decomposition: Decomposing a time series into its trend, seasonal, and residual components, with anomalies often detected in the residual component.Proximity-based approaches:Mahalanobis Distance: Measures the distance of data points from the center of the data distribution, considering correlations between features.Local Outlier Factor (LOF): Computes the local density deviation of a data point compared to its neighbors, identifying regions of different densities.Let’s dive a bit deeper into how DBSCAN works with a simple analogyDBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a clever way to find unusual or outlier data points in a group of data. Imagine you have a bunch of points on a map, and you want to find the weird ones that don’t really fit into any group.Here’s how DBSCAN works:Step 1: Select a starting pointBegin by randomly selecting a data point from your dataset.Step 2: Define a radius (Epsilon) and minimum number of oints (Min_Samples)Specify two important values:Epsilon (a radius around the selected point).Min_Samples (the minimum number of data points that should be within this radius to form a cluster)Step 3: Check neighboring pointsExamine all data points within the defined radius (Epsilon) around the selected point.Step 4: Form a clusterIf there are at least as many data points within the Epsilon radius as specified by Min_Samples, consider the selected point and these nearby points as a cluster.Step 5: Expand the clusterNow, for each point within this newly formed cluster, repeat the process. Check for nearby points within the Epsilon radius.If additional points are found, add them to the cluster. This process continues iteratively, expanding the cluster until no more points can be added.Step 6: Identify outliers (noise)Any data points that are not included in any cluster after the
cluster. This process continues iteratively, expanding the cluster until no more points can be added.Step 6: Identify outliers (noise)Any data points that are not included in any cluster after the expansion process are labeled as outliers or noise. These points do not belong to any cluster.Imagine you have a field with a bunch of people scattered around, and you want to organize a game of tag. Some people are standing close together, and others are standing alone. DBSCAN helps you identify two things:Groups of Players: It starts by picking a person, any person, and puts an imaginary hula hoop around them (this is like setting a maximum distance). Now, it checks how many other people are inside that hula hoop. If there are enough (more than a certain number you decide in advance), it forms a group. This group is like a team of players playing tag.Lonely Players: After forming that group, it picks a person within that group, puts a hula hoop around them, and checks if there are more people inside. If yes, it adds them to the group. This process continues until there are no more people to add to that group.Now, here’s the cool part: Anyone who doesn’t end up in a group is the outlier or the “lonely player.” These are the people who don’t belong to any team, or in data terms, they are the outliers.To apply DBSCAN for outlier detection in Python using Scikit-Learn, we begin by importing the necessary libraries and modules, as follows:Step 1: Import necessary librariesThe code starts by importing the required Python libraries, including NumPy for numerical operations, Matplotlib for data visualization, and the DBSCAN class from scikit-learn for implementing the DBSCAN algorithm.import numpy as npimport matplotlib.pyplot as pltfrom sklearn.cluster import DBSCANfrom sklearn.datasets import make_blobsStep 2: Create a synthetic dataset# Create a synthetic dataset with normal and anomalous data pointsn_samples = 300X, y = make_blobs(n_samples=n_samples, centers=2, random_state=42, cluster_std=1.0)anomalies = np.array([[5, 5], [6, 6], [7, 7]])In this step, a synthetic dataset is generated to illustrate the concept. The dataset is created using the make_blobs function, producing two clusters of data points with some isolated anomalies.n_samples determines the total number of data points, and the centers parameter specifies the number of clusters (2, in this case).The anomalies variable is an array of manually created anomalous data points.Step 3: Combine normal and anomalous data# Combine the normal data and anomaliesX = np.vstack([X, anomalies])The normal data and anomalies are combined into a single dataset represented by the X array using np.vstack.Step 4: Visualize the dataset# Visualize the datasetplt.scatter(X[:, 0], X[:, 1], c='b', marker='o', s=25)plt.title("Synthetic Dataset")plt.show()The code plots the dataset to provide a visual representation. It uses Matplotlib to create a scatter plot, where normal data points are marked in blue circles.The resulting plot visually shows two clusters and some isolated red crosses representing the anomalies.Step 5: Apply DBSCAN for anomaly detection# Apply DBSCAN for anomaly detection with increased epsilondbscan = DBSCAN(eps=1, min_samples=41)  # Increase epslabels = dbscan.fit_predict(X)# Anomalies are considered as points with label -1anomalies = X[labels == -1]DBSCAN is applied for anomaly detection using the DBSCAN class from scikit-learn. The parameters eps (epsilon) and min_samples control the algorithm's behavior.The eps parameter sets the radius within which points are considered neighbors.The min_samples parameter defines the minimum number of points required to form a cluster.The code then fits the DBSCAN model to the dataset using fit_predict to obtain cluster labels for each data point.Step 6: Identify anomalies# Anomalies are considered as points with label -1anomalies = X[labels == -1]Anomalies are identified by finding data points labeled as -1. These points do not belong to
6: Identify anomalies# Anomalies are considered as points with label -1anomalies = X[labels == -1]Anomalies are identified by finding data points labeled as -1. These points do not belong to any cluster and are considered outliers or anomalies.Step 7: Visualize the anomalies# Visualize the anomaliesplt.scatter(X[:, 0], X[:, 1], c='b', marker='o', s=25)plt.scatter(anomalies[:, 0], anomalies[:, 1], c='r', marker='x', s=50, label='Anomalies')plt.title("Anomaly Detection with DBSCAN (Anomalies Outside Clusters)")plt.legend()plt.show()The code plots the anomalies found by DBSCAN in red crosses on top of the original data points.This visualization helps to highlight the anomalies detected by the algorithm.Step 8: Print the identified anomalies# Print the identified anomaliesprint("Identified Anomalies:")print(anomalies)The code concludes by printing the coordinates of the identified anomalies, allowing you to see the specific data points classified as anomalies by the DBSCAN algorithm.By following these steps, you can effectively identify an anomaly with DBSCAN and visualize its results.ConclusionDBSCAN is a valuable tool for anomaly detection, offering a data-driven approach to uncovering outliers in complex datasets. By following the step-by-step guide and code provided in this blog post, you can integrate DBSCAN into your own data analysis projects, enhance your anomaly detection capabilities, and make more informed decisions based on the unique insights that outliers can provide.Follow Simform Engineering to keep yourself updated with the latest trends in the technology horizon. Follow us: Twitter | LinkedInReferencesAnomaly detection with practical exampleImagine you are working as a sysadmin in a fintech company. There can be an issue in the front-end that stops your…towardsdatascience.comUnsupervised learning for anomaly detection - CENTUM DigitalLa identificación de anomalías puede suponer una gran ventaja para tu empresa. Aquí te contamos cómo el aprendizaje no…centum.comHow to do Anomaly Detection using Machine Learning in Python?Anomaly Detection using Machine Learning in Python Example | ProjectProwww.projectpro.ioAlgorithm selection for Anomaly DetectionAnomalies can be defined as observations which deviate sufficiently from most observations in the data set to consider…medium.comUnsupervised LearningAnomaly DetectionMachine LearningDbscan Algorithm----1Published in Simform Engineering1.4K followers·Last published 1 day agoOur Engineering blog gives an inside look at our technologies from the perspective of our engineers.Written by Hiraltalsaniya20 followers·28 followingResponses (1)See all responsesHelpStatusAboutCareersPressBlogPrivacyRulesTermsText to speech

In [14]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model=MODEL)

In [15]:
from langchain_chroma import Chroma

vectordb = Chroma.from_documents(langchain_docs, embedding=embeddings)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x21bf14e82c0>

In [16]:
retriever = vectordb.as_retriever()
retriever   

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000021BF14E82C0>, search_kwargs={})

In [17]:
from langchain_ollama import ChatOllama

local_llm=ChatOllama(model=MODEL,temperature=0)
type(local_llm)

langchain_ollama.chat_models.ChatOllama

In [18]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

In [19]:
question_ans_chain = create_stuff_documents_chain(local_llm,prompt)

question_ans_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])
| ChatOllama(model='llama3.2', temperature=0.0)
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [20]:
# query ="Do you know anything about anomaly?"
query ="How can you use anomaly deduction for monitoring an application latency?"

rag_chain = create_retrieval_chain(retriever,question_ans_chain)


In [21]:
results = rag_chain.invoke({"input" : query})
results

{'input': 'How can you use anomaly deduction for monitoring an application latency?',
 'context': [Document(id='d22d3c7a-9162-4df7-8473-d71aa26cbb30', metadata={'num_chunks': 1}, page_content="Vercel Security Checkpoint                  We're verifying your browser  Website owner? Click here to fix       Vercel Security Checkpoint | iad1::1759625628-DDjT8AGoDNVohG6hOhUWnrzIaph0TOCE                     Enable JavaScript to continue    Vercel Security Checkpoint | iad1::1759625628-DDjT8AGoDNVohG6hOhUWnrzIaph0TOCE"),
  Document(id='088a2567-9a01-4a19-aa26-d43c82723aae', metadata={'num_chunks': 4}, page_content='Anomaly Detection with Unsupervised Machine Learning | by Hiraltalsaniya | Simform Engineering | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inSimform Engineering·Our Engineering blog gives an inside look at our technologies from the perspective of our engineers.Anomaly Detection with Unsupervised Machine LearningHiraltalsaniya9 min read·Dec 22, 2023--1L

In [22]:
from IPython.display import Markdown

final_answer = results["answer"]
Markdown(final_answer)

Anomaly detection can be used to monitor application latency by identifying unusual patterns or spikes in response times. Here's a step-by-step approach:

1. **Collect latency data**: Gather latency metrics from your application, such as average response time, 95th percentile response time, and number of requests with high latency.
2. **Preprocessing**: Clean and preprocess the data to remove any noise or outliers. This may involve handling missing values, transforming data into a suitable format for analysis, and normalizing the data.
3. **Choose an algorithm**: Select an anomaly detection algorithm that suits your needs, such as One-Class SVM, Local Outlier Factor (LOF), or Isolation Forest.
4. **Train the model**: Train the chosen algorithm on historical latency data to learn the normal patterns and behavior of your application.
5. **Monitor live data**: Feed live latency data into the trained model to detect anomalies in real-time.
6. **Threshold setting**: Set a threshold for what constitutes an anomaly, based on the distribution of latency data and the chosen algorithm's sensitivity.
7. **Alerting and notification**: Configure alerts and notifications to be triggered when anomalies are detected, so that you can investigate and take corrective action.

Some popular techniques for anomaly detection in application latency include:

* **Time-series analysis**: Analyze latency data over time to identify patterns and trends.
* **Machine learning**: Use machine learning algorithms to detect anomalies based on historical data.
* **Statistical methods**: Apply statistical methods, such as Z-score or standard deviation, to identify outliers.

Example of how you can use anomaly detection in Python using Scikit-Learn:

```python
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

# Load latency data
latency_data = pd.read_csv('latency_data.csv')

# Preprocess data
scaler = StandardScaler()
latency_data[['avg_response_time', '95th_percentile_response_time']] = scaler.fit_transform(latency_data[['avg_response_time', '95th_percentile_response_time']])

# Train Isolation Forest model
model = IsolationForest(contamination=0.01)
model.fit(latency_data[['avg_response_time', '95th_percentile_response_time']])

# Monitor live data
new_latency_data = pd.DataFrame({'avg_response_time': [10, 20, 30], '95th_percentile_response_time': [15, 25, 35]})
new_latency_data[['avg_response_time', '95th_percentile_response_time']] = scaler.transform(new_latency_data[['avg_response_time', '95th_percentile_response_time']])

# Detect anomalies
anomalies = model.predict(new_latency_data[['avg_response_time', '95th_percentile_response_time']])
```

In this example, we use the Isolation Forest algorithm to detect anomalies in latency data. The `contamination` parameter is set to 0.01, which means that 1% of the data points are expected to be anomalies. The model is trained on historical data and then used to predict new latency data. Anomalies are detected based on the predicted values.

In [23]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ('system', system_prompt),
    ('human', '{input}')
])

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain_from_docs = (
        {
            'input': lambda x: x['input'],
            'context': lambda x: format_docs(x['context']),
        }
        | prompt
        | local_llm
        | StrOutputParser()
)

In [24]:
# passing the input query to the retriever
retrieve_docs = (lambda x: x['input']) | retriever

In [25]:
chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)
chain

RunnableAssign(mapper={
  context: RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000021BF14E82C0>, search_kwargs={})
})
| RunnableAssign(mapper={
    answer: {
              input: RunnableLambda(...),
              context: RunnableLambda(...)
            }
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partia

In [None]:
query = "Tell me if anomaly deduction falls under machine learning?"
chain.invoke({'input': query})