In [5]:
import os
from os.path import join, dirname
from dotenv import load_dotenv
from setuptools import setup

from pathlib import Path

cwd = Path().resolve()
# or this
cwd = Path.cwd()

# dotenv_path = join(dirname(__file__), '.env')
# print(dotenv_path)

load_dotenv()

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ACTIVELOOP_TOKEN = os.environ.get("ACTIVELOOP_TOKEN")
GOOGLE_API_KEY=os.environ.get("GOOGLE_API_KEY")
GOOGLE_CSE_ID=os.environ.get("GOOGLE_CSE_ID")
HUGGINGFACEHUB_API_TOKEN=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
MY_ACTIVELOOP_ORG_ID=os.environ.get("MY_ACTIVELOOP_ORG_ID")
# print(ACTIVELOOP_TOKEN)

In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI
from langchain.document_loaders import SeleniumURLLoader
from langchain import PromptTemplate

In [2]:
# we'll use information from the following articles
urls = ['https://beebom.com/what-is-nft-explained/',
        'https://beebom.com/how-delete-spotify-account/',
        'https://beebom.com/how-download-gif-twitter/',
        'https://beebom.com/how-use-chatgpt-linux-terminal/',
        'https://beebom.com/how-delete-spotify-account/',
        'https://beebom.com/how-save-instagram-story-with-music/',
        'https://beebom.com/how-install-pip-windows/',
        'https://beebom.com/how-check-disk-usage-linux/']

In [3]:
# use the selenium scraper to load the documents
loader = SeleniumURLLoader(urls=urls)
docs_not_splitted = loader.load()

# we split the documents into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(docs_not_splitted)

Error getting version of chromedriver 115. Retrying with chromedriver 114 (attempt 1/5)
Created a chunk of size 1226, which is longer than the specified 1000


In [6]:
# Before executing the following code, make sure to have
# your OpenAI key saved in the “OPENAI_API_KEY” environment variable.
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = MY_ACTIVELOOP_ORG_ID
my_activeloop_dataset_name = "langchain_course_customer_support"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.add_documents(docs)

Your Deep Lake dataset has been successfully created!


|

Dataset(path='hub://sharathshetty/langchain_course_customer_support', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
 embedding  embedding  (127, 1536)  float32   None   
    id        text      (127, 1)      str     None   
 metadata     json      (127, 1)      str     None   
   text       text      (127, 1)      str     None   


 

['12b9a1ce-2a59-11ee-9ba3-00155dc34031',
 '12b9a296-2a59-11ee-9ba3-00155dc34031',
 '12b9a2d2-2a59-11ee-9ba3-00155dc34031',
 '12b9a304-2a59-11ee-9ba3-00155dc34031',
 '12b9a336-2a59-11ee-9ba3-00155dc34031',
 '12b9a368-2a59-11ee-9ba3-00155dc34031',
 '12b9a390-2a59-11ee-9ba3-00155dc34031',
 '12b9a3c2-2a59-11ee-9ba3-00155dc34031',
 '12b9a3f4-2a59-11ee-9ba3-00155dc34031',
 '12b9a426-2a59-11ee-9ba3-00155dc34031',
 '12b9a44e-2a59-11ee-9ba3-00155dc34031',
 '12b9a480-2a59-11ee-9ba3-00155dc34031',
 '12b9a4a8-2a59-11ee-9ba3-00155dc34031',
 '12b9a4da-2a59-11ee-9ba3-00155dc34031',
 '12b9a50c-2a59-11ee-9ba3-00155dc34031',
 '12b9a534-2a59-11ee-9ba3-00155dc34031',
 '12b9a566-2a59-11ee-9ba3-00155dc34031',
 '12b9a58e-2a59-11ee-9ba3-00155dc34031',
 '12b9a5c0-2a59-11ee-9ba3-00155dc34031',
 '12b9a5f2-2a59-11ee-9ba3-00155dc34031',
 '12b9a61a-2a59-11ee-9ba3-00155dc34031',
 '12b9a64c-2a59-11ee-9ba3-00155dc34031',
 '12b9a674-2a59-11ee-9ba3-00155dc34031',
 '12b9a6a6-2a59-11ee-9ba3-00155dc34031',
 '12b9a6d8-2a59-

In [7]:
# let's see the top relevant documents to a specific query
query = "how to check disk usage in linux?"
docs = db.similarity_search(query)
print(docs[0].page_content)

Home  Tech  How to Check Disk Usage in Linux (4 Methods)

How to Check Disk Usage in Linux (4 Methods)

Beebom Staff

Last Updated: June 19, 2023 5:14 pm

There may be times when you need to download some important files or transfer some photos to your Linux system, but face a problem of insufficient disk space. You head over to your file manager to delete the large files which you no longer require, but you have no clue which of them are occupying most of your disk space. In this article, we will show some easy methods to check disk usage in Linux from both the terminal and the GUI application.

Monitor Disk Usage in Linux (2023)

Table of Contents

Check Disk Space Using the df Command
		
Display Disk Usage in Human Readable FormatDisplay Disk Occupancy of a Particular Type

Check Disk Usage using the du Command
		
Display Disk Usage in Human Readable FormatDisplay Disk Usage for a Particular DirectoryCompare Disk Usage of Two Directories


In [8]:
# let's write a prompt for a customer support chatbot that
# answer questions using information extracted from our db
template = """You are an exceptional customer support chatbot that gently answer questions.

You know the following context information.

{chunks_formatted}

Answer to the following question from a customer. Use only information from the previous context information. Do not invent stuff.

Question: {query}

Answer:"""

prompt = PromptTemplate(
    input_variables=["chunks_formatted", "query"],
    template=template,
)

In [9]:
# the full pipeline

# user question
query = "How to check disk usage in linux?"

# retrieve relevant chunks
docs = db.similarity_search(query)
retrieved_chunks = [doc.page_content for doc in docs]

# format the prompt
chunks_formatted = "\n\n".join(retrieved_chunks)
prompt_formatted = prompt.format(chunks_formatted=chunks_formatted, query=query)

# generate answer
llm = OpenAI(model="text-davinci-003", temperature=0)
answer = llm(prompt_formatted)
print(answer)

 You can check disk usage in Linux using the df command or by using a GUI tool such as the GDU Disk Usage Analyzer or the Gnome Disks Tool. The df command is used to check the current disk usage and the available disk space in Linux. The syntax for the df command is: df <options> <file_system>. The options to use with the df command are: a, h, t, and x. To install the GDU Disk Usage Analyzer, use the command: sudo snap install gdu-disk-usage-analyzer. To install the Gnome Disks Tool, use the command: sudo apt-get -y install gnome-disk-utility.
