In [1]:
import os 
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

True

In [2]:
# https://api.python.langchain.com/en/latest/community_api_reference.html#module-langchain_community.document_loaders
def load_document(file):
    import os
    name, extension = os.path.splitext(file)

    if extension == '.pdf':
        from langchain_community.document_loaders import PyPDFLoader
        print(f'Loading {file}')
        loader = PyPDFLoader(file)
    elif extension == '.docx':
        from langchain_community.document_loaders import Docx2txtLoader
        print(f'Loading {file}')
        loader = Docx2txtLoader(file)
    elif extension == '.txt':
        from langchain_community.document_loaders import TextLoader
        print(f'Loading {file}')
        loader = TextLoader(file)
    else:
        print('Document format is not supported!')
        return None

    data = loader.load()
    return data

In [4]:
def load_webdocument():
    from langchain_community.document_loaders import WebBaseLoader
    loader = WebBaseLoader(
        web_paths=[
            "https://www.tutorialspoint.com/signals_and_systems/signals_and_systems_overview.htm",
            "https://www.tutorialspoint.com/signals_and_systems/signals_basic_types.htm",
            "https://www.tutorialspoint.com/signals_and_systems/signals_classification.htm",
            "https://www.tutorialspoint.com/signals_and_systems/signals_basic_operations.htm",
            "https://www.tutorialspoint.com/signals_and_systems/systems_classification.htm",
            "https://www.tutorialspoint.com/signals_and_systems/signals_analysis.htm",
            "https://www.tutorialspoint.com/signals_and_systems/fourier_series.htm",
            "https://www.tutorialspoint.com/signals_and_systems/fourier_series_properties.htm",
            "https://www.tutorialspoint.com/signals_and_systems/fourier_series_types.htm",
            "https://www.tutorialspoint.com/signals_and_systems/fourier_transforms.htm",
            "https://www.tutorialspoint.com/signals_and_systems/fourier_transforms_properties.htm",
            "https://www.tutorialspoint.com/signals_and_systems/distortion_less_transmission.htm",
            "https://www.tutorialspoint.com/signals_and_systems/hilbert_transform.htm",
            "https://www.tutorialspoint.com/signals_and_systems/convolution_and_correlation.htm",
            "https://www.tutorialspoint.com/signals_and_systems/signals_sampling_theorem.htm",
            "https://www.tutorialspoint.com/signals_and_systems/signals_sampling_techniques.htm",
            "https://www.tutorialspoint.com/signals_and_systems/laplace_transforms.htm",
            "https://www.tutorialspoint.com/signals_and_systems/laplace_transforms_properties.htm",
            "https://www.tutorialspoint.com/signals_and_systems/region_of_convergence.htm",
            "https://www.tutorialspoint.com/signals_and_systems/z_transforms.htm",
            "https://www.tutorialspoint.com/signals_and_systems/z_transforms_properties.htm"
        ],
        requests_per_second = 1
    )
    data = loader.load()
    return data

In [5]:
# https://api.python.langchain.com/en/latest/langchain_api_reference.html#module-langchain.text_splitter
def chunk_data(data, chunk_size=512, chunk_overlap=50):
    # from langchain.text_splitter import MarkdownTextSplitter
    # from langchain.text_splitter import CharacterTextSplitter
    from langchain.text_splitter import RecursiveCharacterTextSplitter

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, 
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", "\. ", " ", ""]
    )
    chunks = text_splitter.split_documents(data)
    return chunks

In [5]:
def print_embedding_cost(texts):
    from tiktoken import encoding_for_model
    enc = encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens: {total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens / 1000 * 0.0004:.6f}')

In [6]:
# https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.Pinecone.html#langchain_pinecone.vectorstores.Pinecone
def pinecone_insert_or_fetch_embeddings(index_name, data):
    from pinecone import Pinecone, PodSpec
    from pinecone import list_indexes, create_index
    from langchain_openai import OpenAIEmbeddings
    from langchain_pinecone import Pinecone as langpine

    embeddings = OpenAIEmbeddings()
    # initialize pinecone
    pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

    if index_name in pc.list_indexes():
        print(f'Index {index_name} already exists. Loading embeddings ...', end='')
        vector_store = langpine.from_existing_index(index_name, embeddings)
        print('Ok')
    else:
        print(f'Creating index {index_name} and embeddings ...', end='')
        pc.create_index(
            name=index_name,
            dimension=1536,
            metric="cosine",
            spec=PodSpec(
                environment="gcp-starter"
            )
        )
        vector_store = langpine.from_documents(data, embeddings, index_name=index_name)
        print('Ok')
    return vector_store

In [7]:
def delete_pinecone_index(index_name='all'):
    from pinecone import list_indexes, delete_index, Pinecone
    
    pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

    if index_name == 'all':
        indexes = list_indexes()
        print('Deleting all indexes ... ')
        for index in indexes:
            delete_index(index)
        print('Ok')
    else:
        print(f'Deleting index {index_name} ... ', end='')
        delete_index(index_name)

In [9]:
# https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.chroma.Chroma.html#langchain_community.vectorstores.chroma.Chroma
def chroma_insert_or_fetch_embeddings(index_name, data):
    # from langchain_community.vectorstores import Chroma
    from langchain_chroma import Chroma
    from langchain_openai import OpenAIEmbeddings
    from chromadb import PersistentClient
    from pathlib import Path
    
    # dir_path = os.path.dirname(os.path.realpath('__file__'))
    # # dir_path = os.path.dirname(__file__)
    # persist_directory = f'{dir_path}/cache'
    # persist_directory = Path(f'{dir_path}/cache')
    
    embeddings = OpenAIEmbeddings()
    # client = PersistentClient(path=persist_directory)
    # if index_name in client.list_collections():
    #     print(f'Index {index_name} already exists. Loading embeddings ...', end='')
    #     vector_store = client.get_collection(name={index_name})
    #     print('Ok')
    # else:
    #     print(f'Creating index {index_name} and embeddings ...', end='')
        # client.create_collection(name=index_name, embedding_function=embeddings)
        # vector_store = Chroma.from_documents(collection_name=index_name, documents=data, embedding=embeddings, persist_directory=persist_directory)
    # vector_store = Chroma.from_documents(collection_name=index_name, documents=data, 
    #                 embedding=embeddings, persist_directory='tutorials', collection_metadata={"hnsw:space": "cosine"})
    vector_store = Chroma.from_documents(data, embeddings, persist_directory="./tutorials_db")
    print('Ok')
    return vector_store

In [8]:
from pathlib import Path
dir_path = os.path.dirname(os.path.realpath('__file__'))
persist_directory = Path(f'{dir_path}/cache')
print(persist_directory)

C:\Users\ASUS\Desktop\langchain\cache


In [11]:
def delete_chroma_collection(vectordb):
    vectordb.delete_collection()
    vectordb.persist()

In [10]:
def ask_and_get_answer(vector_store, q):
    from langchain.chains import RetrievalQA
    from langchain_openai import ChatOpenAI

    llm = ChatOpenAI(openai_api_key=os.environ.get('OPENAI_API_KEY'), model='gpt-3.5-turbo', temperature=0)
    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})

    chain= RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)
    
    answer = chain.run(q)
    return answer

In [42]:
def ask_with_memory_old(vector_store, question, chat_history=[]):
    from langchain.chains import ConversationalRetrievalChain
    from langchain_openai import ChatOpenAI
    from langchain_core.prompts import ChatPromptTemplate
    from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate
    from langchain_core.messages import SystemMessage

    llm = ChatOpenAI(openai_api_key=os.environ.get('OPENAI_API_KEY'), model='gpt-3.5-turbo', temperature=0)

    chat_template = ChatPromptTemplate.from_messages(
        [
            SystemMessage(
                content=(
                    """
                        You are a Artificial Intelligence Tutor of Signal ans System subject.
                        You task is to help student to understand Signal ans System subject concept 
                        and help them to solve problem step by step. You should not solve the problem directly,
                        rather you help them to learn and solve the problem gradually and also make sure that 
                        your response are in latex.
                    """
                )
            ),
            HumanMessagePromptTemplate.from_template("{text}"),
        ]
    )
    sys_prompt = chat_template.format_messages(text=question)
    print(sys_prompt)
    
    retriever = vector_store.as_retriever(search_type='similarity',search_kwargs={'k': 3})

    crc = ConversationalRetrievalChain.from_llm(
        llm,
        retriever=retriever,
        return_source_documents=True,
        verbose=True,
        condense_question_llm=llm,
        chain_type="stuff",
        combine_docs_chain_kwargs={"prompt": sys_prompt}
    )
    # crc.combine_docs_chain.llm_chain.prompt.messages[0] = SystemMessagePromptTemplate.from_template(sys_prompt)
    result = crc({'question': question, 'chat_history': chat_history})
    chat_history.append((question, result['answer']))
    
    return result, chat_history

In [9]:
# https://github.com/langchain-ai/langchain/issues/5462
def ask_with_memory(vector_store, question, chat_history=[]):
    from langchain.chains import ConversationalRetrievalChain
    from langchain_openai import ChatOpenAI
    from langchain_core.prompts import ChatPromptTemplate
    from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate
    from langchain_core.messages import SystemMessage
    
    llm = ChatOpenAI(openai_api_key=os.environ.get('OPENAI_API_KEY'), model='gpt-3.5-turbo', temperature=0)
    # Define the system message template
    system_template = """
            You are a Artificial Intelligence Tutor of Signal ans System subject.
            You task is to help student to understand Signal ans System subject concept 
            and help them to solve problem step by step. You should not solve the problem directly,
            rather you help them to learn and solve the problem gradually and also make sure that 
            your response are in latex. If you cannot find the answer from the pieces of context, 
            just say that you don't know, don't try to make up an answer.
            -------------------------------------------
            {context}
        """

    # Create the chat prompt templates
    messages = [
        SystemMessagePromptTemplate.from_template(system_template),
        HumanMessagePromptTemplate.from_template("{question}")
    ]
    qa_prompt = ChatPromptTemplate.from_messages(messages)
    
    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
    crc = ConversationalRetrievalChain.from_llm(
        llm=llm, 
        retriever=retriever, 
        # verbose=True,
        return_source_documents=True, 
        combine_docs_chain_kwargs={"prompt": qa_prompt}
    )
    
    result = crc({'question': question, 'chat_history': chat_history})
    chat_history.append((question, result['answer']))
    
    return result, chat_history

## Running Code

In [3]:
data = load_document('page_4-8.pdf')
# print(data[1].page_content)
# print(data[1].metadata)

print(f'You have {len(data)} pages in your data.')
# print(f'There are {len(data[20].page_content)} chracters in the page.')

Loading page_4-8.pdf
You have 5 pages in your data.


In [4]:
print(data[1].page_content)

 5 R e { }|| c o s ( ) , I m { } || s i n ( )ac c c bc c c== ∠== ∠  
Note especially that the quadrant ambiguity of th e inverse tangent must be resolved in making 
these computations. For example, 
 1(1 ) tan ( 1 / 1) / 4j π−∠− = − = −  
while 
 1(1 ) t a n ( 1 / (1 ) ) 3 / 4 j π−∠−+ = − =  
It is important to be able to mentally comput e the sine, cosine, and tangent of angles that are 
integer multiples of /4π , since many problems will be set up this way to avoid the distraction of 
calculators. 
 
You should also be familiar with Euler’s formula, 
 cos( ) sin( )jejθθθ =+  
and the complex exponential representa tion for trigonometric functions: 
 cos( ) , sin( )22jj jjee ee
jθθθ θ
θθ−−+−==  
 
Notions of complex numbers extend to notions of co mplex-valued functions (of a real variable) in 
the obvious way. For example, we can thi nk of a complex-valued function of time, ()xt, in the 
rectangular form 
 {}{} () R e () I m ()xtx t j x t=+  
In a simpler notation this can be written

In [7]:
webdata = load_webdocument()
# print(webdata[1].page_content)

In [55]:
with open("webdata.txt", 'w', encoding='utf-8') as f:
    f.writelines(webdata[1].page_content)

In [8]:
# chunks = chunk_data(data, chunk_size=256, chunk_overlap=50)
chunks= chunk_data(webdata)
print(len(chunks))
print(chunks[10].page_content)

561
Previous
   



Next
   





What is Signal?
Signal is a time varying physical phenomenon which is intended to convey information.
OR
Signal is a function of time.
OR
Signal is a function of one or more independent variables, which contain some information.
Example: voice signal, video signal, signals on telephone wires etc.
Note:  Noise is also a signal, but the information conveyed by noise is unwanted hence it is considered as undesirable.


In [13]:
print_embedding_cost(webdata)

Total Tokens: 47406
Embedding Cost in USD: 0.018962


In [10]:
index_name = "signal_system_index"
vector_store = chroma_insert_or_fetch_embeddings(index_name, chunks)
# vector_store.persist()

Ok


In [15]:
index_name = "signal_system_webdata"
vector_store = chroma_insert_or_fetch_embeddings(index_name, webdata)

Creating index signal_system_webdata and embeddings ...

Exception occurred invoking consumer for subscription 8b20df42965048d88e1f9d3e2c06260dto topic persistent://default/default/c72d63a8-1cdb-4d30-ba60-15aaa71eb627 init_index(): incompatible function arguments. The following argument types are supported:
    1. (self: hnswlib.Index, max_elements: int, M: int = 16, ef_construction: int = 200, random_seed: int = 100, allow_replace_deleted: bool = False) -> None

Invoked with: <hnswlib.Index(space='cosine', dim=1536)>; kwargs: max_elements=1000, ef_construction=100, M=16, is_persistent_index=True, persistence_location='webdb\\e11a882e-a3d6-4190-861c-aa2bd7bfad50'
Exception occurred invoking consumer for subscription 8b20df42965048d88e1f9d3e2c06260dto topic persistent://default/default/c72d63a8-1cdb-4d30-ba60-15aaa71eb627 init_index(): incompatible function arguments. The following argument types are supported:
    1. (self: hnswlib.Index, max_elements: int, M: int = 16, ef_construction: int = 200, random_seed: int = 100, allow_replace_delete

Ok


In [17]:
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
print(retriever)

tags=['Chroma', 'OpenAIEmbeddings'] vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001A096AD6110> search_kwargs={'k': 3}


In [18]:
import time 

i = 1
print('Write Quit or Exit to quit.')
while True:
    q =input(f'Question #{i}: ')
    i = i + 1

    if q.lower() in ['quit', 'exit']:
        print('Quitting ... bye bye!')
        time.sleep(2)
        break

    answer = ask_and_get_answer(vector_store, q)
    print(f'\nAnswer: {answer}')
    print(f'\n {"-" * 50} \n')

Write Quit or Exit to quit.


Question #1:  give me one example of time invarient system?


NameError: name 'ask_and_get_answer' is not defined

In [21]:
## asking with memory
chat_history = []
question = '$y(t) = x(2t)$. Is it a linear system?'
# question = 'what is system and signal'
result, chat_history = ask_with_memory(vector_store, question, chat_history)
print(result['answer'])
# print(chat_history)

Number of requested results 3 is greater than number of elements in index 0, updating n_results = 0


To determine if the system described by $y(t) = x(2t)$ is linear, we need to check if it satisfies the two properties of linearity: homogeneity and superposition.

1. **Homogeneity**:
For a system to be linear, it must satisfy the property of homogeneity. This property states that if the input is scaled by a constant factor, then the output is also scaled by the same factor.

Let's check if the system described by $y(t) = x(2t)$ satisfies homogeneity:
- Let $x_1(t)$ be an input signal and $a$ be a constant.
- If we apply $x_2(t) = ax_1(t)$ as the input to the system, the output will be $y_2(t) = x(2t)$ where $x(t) = ax_1(t)$.

Now, let's substitute $x_2(t)$ into the system equation:
$y_2(t) = x(2t) = a x_1(2t)$

Since $y_2(t) = ax_1(2t)$, the system satisfies the property of homogeneity.

2. **Superposition**:
For a system to be linear, it must satisfy the property of superposition. This property states that the response to the sum of two inputs is the sum of the responses to each inpu

In [23]:
## asking with memory
chat_history = []
# $y(t) =x(2t)$. Is it a time-variant system?
question = '$y(t) = x(2t)$. Is it a time variant system?'
# question = 'what is system and signal'
result, chat_history = ask_with_memory(vector_store, question, chat_history)
print(result['answer'])
# print(chat_history)

Number of requested results 3 is greater than number of elements in index 0, updating n_results = 0


To determine if the system is time-variant, we need to check if a time shift in the input signal results in a different time shift in the output signal.

Given the system: \( y(t) = x(2t) \)

Let's consider an input signal \( x_1(t) \) and its corresponding output \( y_1(t) \) when the input is \( x_1(t) \).

Now, let's consider the same input signal \( x_1(t) \) but delayed by \( t_0 \) seconds, denoted as \( x_2(t) = x_1(t - t_0) \). The corresponding output will be \( y_2(t) = x(2(t - t_0)) \).

If the system is time-invariant, we should have \( y_2(t) = y_1(t - t_0) \) for all \( t_0 \).

Let's substitute the expressions for \( y_1(t) \) and \( y_2(t) \) into the equation and simplify to see if the equality holds.


In [22]:
from IPython.display import display, Latex
display(Latex(result['answer']))

<IPython.core.display.Latex object>

In [24]:
from IPython.display import display, Latex
display(Latex(result['answer']))

<IPython.core.display.Latex object>