In [1]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import NLTKTextSplitter
import nltk

nltk.download("punkt")

from langchain_community.document_loaders import SeleniumURLLoader
from langchain.prompts import PromptTemplate

[nltk_data] Downloading package punkt to /home/carlos/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
# we'll use information from the following articles

urls = [
    "https://beebom.com/what-is-nft-explained/",
    "https://beebom.com/how-delete-spotify-account/",
    "https://beebom.com/how-download-gif-twitter/",
    "https://beebom.com/how-use-chatgpt-linux-terminal/",
    "https://beebom.com/how-delete-spotify-account/",
    "https://beebom.com/how-save-instagram-story-with-music/",
    "https://beebom.com/how-install-pip-windows/",
    "https://beebom.com/how-check-disk-usage-linux/",
]

In [14]:
# use the selenium scraper to load the documents

loader = SeleniumURLLoader(urls=urls)

docs_not_splitted = loader.load()

# we split the documents into smaller chunks

text_splitter = NLTKTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(docs_not_splitted)

Created a chunk of size 1068, which is longer than the specified 1000


In [16]:
print(docs[0])
print("*****************")
print(docs[1])
print("*****************")
print(docs[2])
print("*****************")
print(docs[3])
print("*****************")
print(docs[4])

page_content='Home > Internet > NFTs Explained: What is an NFT and What is Its Use

NFTs Explained: What is an NFT and What is Its Use

Arjun Sha

Published: December 6, 2021

Comments 2

Share

Copied

After Bitcoin and Blockchain, NFT is another word to have entered our lexicon.

The buzzword is everywhere and people are wondering what is NFT and what is its use?

Well, there is not really a one-line explainer.

And that’s why we have brought a comprehensive explainer on NFT, what is its use in digital art, and more.

So without wasting any time, let’s go ahead and learn about NFTs (Non-fungible Token) in complete detail.

What is NFT: A Definitive Explainer (2021)

Here, we have mentioned all the questions that people have in their minds regarding NFT.

You can click on the table to find all the sections that we have covered in this article and click on the link to move to the corresponding section.

Table of Contents

NFTs Explained: What is NFT in Crypto?' metadata={'source': 'htt

In [21]:
# Before executing the following code, make sure to have
# your OpenAl key saved in the “OPENAI_API_KEY” environment variable.
embed = OpenAIEmbeddings(
    model="text-embedding-3-small", deployment="text-embedding-3-small"
)

In [22]:
embed

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x77fc7e636fc0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x77fc7f0b2720>, model='text-embedding-3-small', dimensions=None, deployment='text-embedding-3-small', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [24]:
# Import necessary modules
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document

# Create a FAISS vector store
db = FAISS.from_documents(docs, embed)

# Save the vector store locally
db.save_local("faiss_index")

print("Local FAISS index created and saved.")

# To load the index later, you can use:
# db = FAISS.load_local("faiss_index", embeddings)

# Note: Make sure your system has CUDA installed and configured properly to use faiss-gpu
# If you encounter issues, you may need to fall back to faiss-cpu

Local FAISS index created and saved.


In [28]:
#let's see the top relevant documents to a specific quer
query = "how to check disk usage in linux?"

docs = db.similarity_search(query)
print(f"length of docs: {len(docs)}"
print(docs[0].metadata)
print(docs[0].page_content)
print("*****************")
print(docs[1].metadata)
print(docs[1].page_content)
print("*****************")
print(docs[2].metadata)
print(docs[2].page_content)
print("*****************")
print(docs[3].metadata)
print(docs[3].page_content)

length of docs: 4
{'source': 'https://beebom.com/how-check-disk-usage-linux/', 'title': 'How to Check Disk Usage in Linux (4 Methods) | Beebom', 'description': 'In this guide, we show you the best ways to check disk usage on Linux using GUI tools as well as CLI commands like du and df.', 'language': 'en-US'}
Home > Tech > How to Check Disk Usage in Linux (4 Methods)

How to Check Disk Usage in Linux (4 Methods)

Beebom Staff

Updated: December 19, 2023

Comments 0

Share

Copied

There may be times when you need to download some important files or transfer some photos to your Linux system, but face a problem of insufficient disk space.

You head over to your file manager to delete the large files which you no longer require, but you have no clue which of them are occupying most of your disk space.

In this article, we will show some easy methods to check disk usage in Linux from both the terminal and the GUI application.

Table of Contents

Check Disk Space Using the df Command

In Lin

In [29]:
# let's write a prompt for a customer support chatbot that

# answer questions using information extracted from our db

template = """You are an exceptional customer support chatbot that gently answer questions.
You know the following context information.

{chunks_formatted}

Answer to the following question from a customer. Use only information from the previous
context information. Do not invent stuff.

Question: {query}

Answer:"""

prompt = PromptTemplate(
    input_variables=["chunks_formatted", "query"],
    template=template,
)

In [32]:
# the full pipeline

# user question

query = "How to check disk usage in linux?"

# retrieve relevant chunks
docs = db.similarity_search(query)

retrieved_chunks = [doc.page_content for doc in docs]

In [42]:
# format the prompt

chunks_formatted = "\n\n".join(retrieved_chunks)

prompt_formatted = prompt.format(chunks_formatted=chunks_formatted, query=query)
# generate answer

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

answer = llm.invoke(prompt_formatted)

print(answer.content.format())

To check disk usage in Linux, you have several methods available, both through the terminal and using GUI applications. Here are some options:

1. **Using the df Command**: This is a common command to check current disk usage and available space. You can use it with the following syntax:
   - To display disk usage in a human-readable format, use: `df -h`
   - To view the disk usage of a particular file system type, use: `df -t <file_system_type>`

2. **Using the du Command**: If you want to check the disk usage for individual files and directories, you can use the `du` command, which stands for "disk usage."

3. **Using GUI Tools**: If you prefer a graphical interface, you can use tools like the Disk Usage Analyzer or the Gnome Disks Tool. To install the Disk Usage Analyzer, you can run:
   - `sudo snap install gdu-disk-usage-analyzer`
   - For the Gnome Disks Tool, use: `sudo apt-get -y install gnome-disk-utility`

   After installing the Disk Usage Analyzer, you can access it via the

In [43]:
prompt_formatted

'You are an exceptional customer support chatbot that gently answer questions.\nYou know the following context information.\n\nHome > Tech > How to Check Disk Usage in Linux (4 Methods)\n\nHow to Check Disk Usage in Linux (4 Methods)\n\nBeebom Staff\n\nUpdated: December 19, 2023\n\nComments 0\n\nShare\n\nCopied\n\nThere may be times when you need to download some important files or transfer some photos to your Linux system, but face a problem of insufficient disk space.\n\nYou head over to your file manager to delete the large files which you no longer require, but you have no clue which of them are occupying most of your disk space.\n\nIn this article, we will show some easy methods to check disk usage in Linux from both the terminal and the GUI application.\n\nTable of Contents\n\nCheck Disk Space Using the df Command\n\nIn Linux, there are many commands to check disk usage, the most common being the df command.\n\nThe df stands for “Disk Filesystem” in the command, which is a handy 

In [41]:
print(answer.content.format())

To check disk usage in Linux, you have several methods available, both through the terminal and using GUI applications. Here are some options:

1. **Using the df Command**: This is a common command to check current disk usage and available space. You can use it with the following syntax:
   - To display disk usage in a human-readable format, use: `df -h`
   - To view the disk usage of a particular file system type, use: `df -t <file_system_type>`

2. **Using the du Command**: If you want to check the disk usage for individual files and directories, you can use the `du` command, which stands for "disk usage."

3. **Using GUI Tools**: If you prefer a graphical interface, you can use tools like the Disk Usage Analyzer or the Gnome Disks Tool. To install the Disk Usage Analyzer, you can run:
   ```
   sudo snap install gdu-disk-usage-analyzer
   ```
   For the Gnome Disks Tool, use:
   ```
   sudo apt-get -y install gnome-disk-utility
   ```
   After installation, you can access these tool

In [49]:
from langchain.schema import HumanMessage
template = """You are an exceptional customer support chatbot that gently answers questions.
You know the following context information.

{chunks_formatted}

Answer the following question from a customer. Use only information from the previous
context information. Do not invent stuff.

Question: {query}

Answer:"""

prompt = PromptTemplate(
    input_variables=["chunks_formatted", "query"],
    template=template,
)

# Format the prompt with the context and query
prompt_formatted = prompt.format(chunks_formatted=chunks_formatted, query=query)

# Initialize the chat model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Create the message object
message = HumanMessage(content=prompt_formatted)
print(message.content.format())

You are an exceptional customer support chatbot that gently answers questions.
You know the following context information.

Home > Tech > How to Check Disk Usage in Linux (4 Methods)

How to Check Disk Usage in Linux (4 Methods)

Beebom Staff

Updated: December 19, 2023

Comments 0

Share

Copied

There may be times when you need to download some important files or transfer some photos to your Linux system, but face a problem of insufficient disk space.

You head over to your file manager to delete the large files which you no longer require, but you have no clue which of them are occupying most of your disk space.

In this article, we will show some easy methods to check disk usage in Linux from both the terminal and the GUI application.

Table of Contents

Check Disk Space Using the df Command

In Linux, there are many commands to check disk usage, the most common being the df command.

The df stands for “Disk Filesystem” in the command, which is a handy way to check the current disk

In [50]:
# Generate the answer
answer = llm([message])

# Print the answer
print(answer.content)

To check disk usage in Linux, you have several methods available, both through the terminal and using GUI applications. Here are some options:

1. **Using the df Command**: 
   - The `df` command stands for "Disk Filesystem" and is a common way to check current disk usage and available space. You can use it with the `-h` flag to display sizes in a human-readable format (e.g., megabytes, gigabytes) by running:
     ```
     df -h
     ```

2. **Using the du Command**: 
   - If you want to check the disk usage for individual files and directories, you can use the `du` command, which stands for "disk usage." This command provides detailed information about the space used by specific files and directories.

3. **Using GUI Tools**: 
   - If you prefer a graphical interface, you can use tools like the GDU Disk Usage Analyzer or the Gnome Disks Tool. You can install them using the following commands:
     - For GDU Disk Usage Analyzer:
       ```
       sudo snap install gdu-disk-usage-analyz

### Now let's try with a different embedding model, an open source one

In [7]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import NLTKTextSplitter
from langchain_community.document_loaders import SeleniumURLLoader
from langchain.prompts import PromptTemplate

In [2]:

urls = [
    "https://beebom.com/what-is-nft-explained/",
    "https://beebom.com/how-delete-spotify-account/",
    "https://beebom.com/how-download-gif-twitter/",
    "https://beebom.com/how-use-chatgpt-linux-terminal/",
    "https://beebom.com/how-delete-spotify-account/",
    "https://beebom.com/how-save-instagram-story-with-music/",
    "https://beebom.com/how-install-pip-windows/",
    "https://beebom.com/how-check-disk-usage-linux/",
]

In [8]:
# use the selenium scraper to load the documents
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
loader = SeleniumURLLoader(urls=urls)

docs_not_splitted = loader.load()

# we split the documents into smaller chunks

text_splitter = NLTKTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(docs_not_splitted)

[nltk_data] Downloading package punkt_tab to /home/carlos/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/carlos/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
Created a chunk of size 1068, which is longer than the specified 1000


In [22]:
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
import sentence_transformers

model_name = "sentence-transformers/all-mpnet-base-v2"

model_kwargs = {'device': 'cpu'}

hf = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
documents = ["King", "Queen", "Man", "Woman"]
doc_embeddings = hf.embed_documents(documents)

In [36]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# Find the most similar document
arr = np.array(doc_embeddings[0]) - np.array(doc_embeddings[2]) + np.array(doc_embeddings[3])
arr_normalized = arr / np.sqrt(np.sum(arr**2))
similarities = -cosine_similarity([arr_normalized], doc_embeddings)
most_similar_index = np.argmax(similarities)
most_similar_document = documents[most_similar_index]
print(most_similar_document)

Man


In [35]:
similarities

array([[ 0.63691544,  0.50925241, -0.25280194,  0.58713881]])

In [2]:
import cohere
from langchain_cohere import CohereEmbeddings
import os
from dotenv import load_dotenv
load_dotenv(dotenv_path='../.env')

# Initialize the CohereEmbeddings object
embeddings_object = CohereEmbeddings(
      model="embed-multilingual-v2.0",
      cohere_api_key=os.getenv("COHERE_API_KEY")
)

# Define a list of texts
texts =[
"Hello from Cohere!",
"Hallo von Cohere!",
"Bonjour de Cohere!",
"Hola desde Cohere!",
"Ola do Cohere!",
"Ciao da Cohere!",
]

# Generate embeddings for the texts
document_embeddings = embeddings_object.embed_documents(texts)
# Print the embeddings
for text, embedding in zip(texts, document_embeddings):
      print(f"Text: {text}")
      print(f"Embedding: {embedding[:5]}") # print first 5 dimensions of-

Text: Hello from Cohere!
Embedding: [0.234375, 0.50146484, -0.048797607, 0.13989258, -0.17980957]
Text: Hallo von Cohere!
Embedding: [0.10235596, 0.28344727, -0.050231934, 0.2368164, -0.071899414]
Text: Bonjour de Cohere!
Embedding: [0.15161133, 0.28100586, -0.057281494, 0.11730957, -0.043670654]
Text: Hola desde Cohere!
Embedding: [0.26733398, 0.4104004, -0.10748291, 0.25976562, -0.12878418]
Text: Ola do Cohere!
Embedding: [0.19580078, 0.22631836, -0.0647583, 0.10192871, -0.17895508]
Text: Ciao da Cohere!
Embedding: [0.11566162, 0.43310547, -0.025894165, 0.14526367, 0.070739746]
