### RAG Pipelines Data Ingestion to Vector DB Pipeline

In [1]:
import os
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def process_all_pdfs(pdf_directory : str):

    loader = DirectoryLoader(
        pdf_directory,
        glob = "**/*.pdf",
        loader_cls = PyMuPDFLoader,
        show_progress = False
    )

    documents = loader.load()

    for doc in documents:
        doc.metadata['source_file'] = Path(doc.metadata['source']).name
        doc.metadata['file_type'] = 'pdf'
    print(f"\n✅ Loaded {len(documents)} documents from {pdf_directory}")
    return documents

all_pdf_documents = process_all_pdfs("../data")


✅ Loaded 129 documents from ../data


In [3]:
all_pdf_documents[5]

Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2022-12-17T08:32:38+00:00', 'source': '../data/pdf_files/unit2.pdf', 'file_path': '../data/pdf_files/unit2.pdf', 'total_pages': 23, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2022-12-17T08:32:38+00:00', 'trapped': '', 'modDate': "D:20221217083238+00'00'", 'creationDate': "D:20221217083238+00'00'", 'page': 5, 'source_file': 'unit2.pdf', 'file_type': 'pdf'}, page_content='6 \n \n \ncomputer for processing, which increases the response time. \n➢ Lower Communication Cost  \n• In distributed database systems, if data is located locally where it is mostly used, then \nthe communication costs for data manipulation can be minimized.  \n• This is not feasible in centralized systems. \n \nWhy Distributed Databases \n➢ Organizational and economic reasons \n➢ Interconnection of existing databases \n➢ Incremental growth 

In [4]:
# ### process all pdfs inside directory 

# def process_all_pdfs(pdf_directory):
#     ## process all pdf in the directory 
#     all_documents = []
#     pdf_dir = Path(pdf_directory)

#     ##find all pdf files recursively
#     pdf_files = list(pdf_dir.glob("**/*.pdf"))

#     print(f"found {len(pdf_files)} PDF files to process")

#     for pdf_file in pdf_files:
#         print(f"\nProcessing: {pdf_file.name}")
#         try:
#             loader = PyPDFLoader(str(pdf_file))
#             documents = loader.load()

#             ## add source info to metadata
#             for doc in documents:
#                 doc.metadata['source_file'] = pdf_file.name
#                 doc.metadata['file_type'] = 'pdf'
            
#             all_documents.extend(documents)
#             print(f" Loaded {len(documents)} pages")
        
#         except Exception as e:
#             print(f" Error: {e}")
            
#     print(f"\nTotal documents loaded: {len(all_documents)}")
#     return all_documents

# # process all documents in data directory
# all_pdf_documents = process_all_pdfs("../data")


In [5]:
for doc in all_pdf_documents[:5]:  # first 5 docs
    print(doc.metadata)


{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2022-12-17T08:32:38+00:00', 'source': '../data/pdf_files/unit2.pdf', 'file_path': '../data/pdf_files/unit2.pdf', 'total_pages': 23, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2022-12-17T08:32:38+00:00', 'trapped': '', 'modDate': "D:20221217083238+00'00'", 'creationDate': "D:20221217083238+00'00'", 'page': 0, 'source_file': 'unit2.pdf', 'file_type': 'pdf'}
{'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2022-12-17T08:32:38+00:00', 'source': '../data/pdf_files/unit2.pdf', 'file_path': '../data/pdf_files/unit2.pdf', 'total_pages': 23, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2022-12-17T08:32:38+00:00', 'trapped': '', 'modDate': "D:20221217083238+00'00'", 'creationDate': "D:20221217083238+00'00'", 'page': 1, 'sourc

In [6]:
### text splitting into chunks

def split_documents(documents, chunk_size=1000, chunk_overlap=200):
    #splitting documents into smaller chunks for better RAG performance
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = chunk_size,
        chunk_overlap = chunk_overlap,
        length_function = len,
        separators = ["\n\n", "\n", " ", ""]
    )
    split_docs = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(split_docs)} chunks")

    #show example of chunk
    if split_docs:
        print(f"\nExample chunk:")
        print(f"Content: {split_docs[0].page_content[:200]}...")
        print(f"Metadata: {split_docs[0].metadata}")

    return split_docs

In [7]:
chunks = split_documents(all_pdf_documents)


Split 129 documents into 319 chunks

Example chunk:
Content: 1 
 
 
 
 
 
 
SCHOOL OF COMPUTING 
 
DEPARTMENT OF COMPUTER SCIENCE AND 
ENGINEERING 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
     
 
    UNIT – I  - DISTRIBUTED DATABASE AND INFORMATION SY...
Metadata: {'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2022-12-17T08:32:38+00:00', 'source': '../data/pdf_files/unit2.pdf', 'file_path': '../data/pdf_files/unit2.pdf', 'total_pages': 23, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2022-12-17T08:32:38+00:00', 'trapped': '', 'modDate': "D:20221217083238+00'00'", 'creationDate': "D:20221217083238+00'00'", 'page': 0, 'source_file': 'unit2.pdf', 'file_type': 'pdf'}


### Embedding and VectorStoreDB

In [8]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Tuple, Any
from sklearn.metrics.pairwise import cosine_similarity



In [9]:
class EmbeddingManager:
    # handles document embedding generation using SentenceTransformer

    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        # initialise embedding manager
        #args: model_name = HuggingFace model for sentence embedding

        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        # load SentenceTransformer model
        try:
            print(f"Loading embedded model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name)
            print(f"Model loaded successfully. Embedding dimensions: {self.model.get_sentence_embedding_dimension()}")
        except Exception as e:
            print(f"Error loading the model {self.model_name} : {e}")
            raise

    def generate_embeddings(self, texts : List[str]) -> np.ndarray:
        # generate embedding for list of texts

        #args:
            # texts: list of text strings to embeddings

        # returns:
            # numpy array of embeddings with shape (len(texts), embedding_dim)

        if not self.model:
            raise ValueError("Model not Loaded")

        print(f"generate embeddings for {len(texts)} texts...")
        embeddings = self.model.encode(texts, show_progress_bar=True)
        print(f"generated embeddings with shape: {embeddings.shape}")

        return embeddings
    
## initialise the embeddding manager

embedding_manager = EmbeddingManager()
embedding_manager


Loading embedded model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimensions: 384


<__main__.EmbeddingManager at 0x11d68dbe0>

### VectorStore

In [10]:
class VectorStore:
    # manage document embeddings in chromaDB vector store

    def __init__(self, collection_name: str = "pdf_documents", persist_directory: str = "../data/vector_store"):
        # initialize the vector store

        # Args:
            # collection_name : name of chromaDB collection
            # persist_directory : directory to persist the vector store
        
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()
    
    def _initialize_store(self):
        # initialize chromaDB client and collection
        try:
            #create persistent chromaDB client
            os.makedirs(self.persist_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path = self.persist_directory)

            #get or create collection
            self.collection = self.client.get_or_create_collection(
                name = self.collection_name,
                metadata={"description" : "PDF documents for RAG"}
            )
            print(f"Vector store initialized. Collection : {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")

        except Exception as e:
            print(f"Error initializing vector store: {e}")
            raise

    def add_documents(self, documents : List[Any], embeddings: np.ndarray):
        # add documents and their embeddings to the vector store

        # Args:
            #documents: list of LangChain documents
            #embeddings: corresponding embeddings for the documents

        if len(documents) != len(embeddings):
            raise ValueError("number of documents must match number of embeddings")
        
        print(f"Adding {len(documents)} documents to vector store...")

        # prepare data for chromaDB
        ids = []
        metadatas = []
        documents_text = []
        embeddings_list = []

        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            # generate unique id
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)

            # prepare metadata
            metadata = dict(doc.metadata)
            metadata['doc_index'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)

            # document content
            documents_text.append(doc.page_content)

            # embedding
            embeddings_list.append(embedding.tolist())

        # ids = []
        # metadatas = []
        # documents_text = []
        # embeddings_list = []

        # for i in range(len(documents)):
        #     doc = documents[i]
        #     embedding = embeddings[i]

        #     # Generate a unique document ID
        #     doc_id = "doc_" + uuid.uuid4().hex[:8] + "_" + str(i)
        #     ids.append(doc_id)

        #     # Prepare metadata dictionary
        #     metadata = dict(doc.metadata)
        #     metadata["doc_index"] = i
        #     metadata["content_length"] = len(doc.page_content)
        #     metadatas.append(metadata)

        #     # Store document content
        #     documents_text.append(doc.page_content)

        #     # Store embedding as list
        #     embeddings_list.append(embedding.tolist())


        # add to collection
        try:
            self.collection.add(
                ids = ids,
                embeddings = embeddings_list,
                metadatas = metadatas,
                documents = documents_text
            )
            print(f"Successfully added {len(documents)} documents to Vector Store")
            print(f"Total documents in collection: {self.collection.count()}")
        
        except Exception as e:
            print(f"Error adding  documents to vector store: {e}")
            raise

vectorstore = VectorStore()
vectorstore



Vector store initialized. Collection : pdf_documents
Existing documents in collection: 0


<__main__.VectorStore at 0x11d68d940>

In [11]:
chunks[:5]

[Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2022-12-17T08:32:38+00:00', 'source': '../data/pdf_files/unit2.pdf', 'file_path': '../data/pdf_files/unit2.pdf', 'total_pages': 23, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2022-12-17T08:32:38+00:00', 'trapped': '', 'modDate': "D:20221217083238+00'00'", 'creationDate': "D:20221217083238+00'00'", 'page': 0, 'source_file': 'unit2.pdf', 'file_type': 'pdf'}, page_content='1 \n \n \n \n \n \n \nSCHOOL OF COMPUTING \n \nDEPARTMENT OF COMPUTER SCIENCE AND \nENGINEERING \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n     \n \n    UNIT – I  - DISTRIBUTED DATABASE AND INFORMATION SYSTEMS- SCSA3008'),
 Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2022-12-17T08:32:38+00:00', 'source': '../data/pdf_fil

In [12]:
# convert the text to embeddings
texts = [doc.page_content for doc in chunks]

# generate the embeddings
embeddings = embedding_manager.generate_embeddings(texts)

# store it in the vector database
vectorstore.add_documents(chunks, embeddings)

generate embeddings for 319 texts...


Batches: 100%|██████████| 10/10 [00:02<00:00,  3.46it/s]


generated embeddings with shape: (319, 384)
Adding 319 documents to vector store...
Successfully added 319 documents to Vector Store
Total documents in collection: 319


### Retriever Pipeline query retriever for Vector Store

In [13]:
class RAGRetriever:
    # handles query based retriver for rag.

    def __init__(self, vector_store : VectorStore, embedding_manager: EmbeddingManager):
        # initialize retriever
        #args:
            # vector_store: vector store containing document embeddings.
            # embedding_manager : manager for generating query embeddings.
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(self, query : str, top_k : int = 5, score_threshold : float = 0.0) -> List[Dict[str, Any]]:
        """
        Retrieve relevant documents for a query
        
        Args:
            query: The search query
            top_k: Number of top results to return
            score_threshold: Minimum similarity score threshold
            
        Returns:
            List of dictionaries containing retrieved documents and metadata
        """
        print(f"Retrieving documents for query : '{query}'")
        print(f"Top K : {top_k}, score threshold: {score_threshold}")

        # generate query embedding
        query_embedding  = self.embedding_manager.generate_embeddings([query])[0]
        
        # search in vector database
        try:
            results = self.vector_store.collection.query(
                query_embeddings = [query_embedding.tolist()],
                n_results = top_k
            )
            # process results
            retrieved_docs = []
            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]

                # for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                # # Convert distance to similarity score (ChromaDB uses cosine distance)
                # similarity_score = 1 - distance
                
                # if similarity_score >= score_threshold:
                #     retrieved_docs.append({
                #         'id': doc_id,
                #         'content': document,
                #         'metadata': metadata,
                #         'similarity_score': similarity_score,
                #         'distance': distance,
                #         'rank': i + 1
                #     })
                index = 0
                for doc_id in ids:
                    document = documents[index]
                    metadata = metadatas[index]
                    distance = distances[index]

                    similarity_score = 1 - distance
                    if similarity_score >= score_threshold:
                        retrieved_docs.append({
                            'id' : doc_id,
                            'content' : document,
                            'metadata' : metadata,
                            'similarity_score': similarity_score,
                            'distance': distance,
                            'rank': index + 1
                        })
                    index += 1

                print(f"Retrieved {len(retrieved_docs)} documents (after filtering)")
            else:
                print("No documents found")
            return retrieved_docs
        
        except Exception as e:
            print(f"Error during retrieval: {e}")
            return []
                
rag_retriever = RAGRetriever(vectorstore, embedding_manager)

                

In [14]:
rag_retriever.retrieve("what is digital signature")

Retrieving documents for query : 'what is digital signature'
Top K : 5, score threshold: 0.0
generate embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 15.57it/s]

generated embeddings with shape: (1, 384)
Retrieved 5 documents (after filtering)





[{'id': 'doc_2874f6fc_65',
  'content': 'Digital Signatures \nA digital signature is a mathematical scheme for verifying the authenticity of digital \nmessages or documents. A valid digital signature, where the prerequisites are satisfied, \ngives a recipient very high confidence that the message was created by a known sender, \nand that the message was not altered in transit.  \nDigital signatures are a standard element of most cryptographic protocol suites, and are \ncommonly used for software distribution, financial transactions, contract management \nsoftware, and in other cases where it is important to detect forgery or tampering. \nDigital signatures are often used to implement electronic signatures, which includes any \nelectronic data that carries the intent of a signature, but not all electronic signatures use \ndigital signatures.   \nDigital signatures employ asymmetric cryptography. In many instances, they provide a \nlayer of validation and security to messages sent throug

### Integration vectorDB context pipeline with LLM output

In [None]:
## simple RAG pipeline with groq LLM
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
load_dotenv

## initialize groq LLM with groq API key
groq_api_key = ""

llm = ChatGroq(groq_api_key = groq_api_key, model = "llama-3.3-70b-versatile", temperature = 0.1, max_tokens = 1024)

# simple RAG function : retrieve context + generate response
def rag_simple(query, retriever, llm, top_k = 3):
    #retriever the context
    results = retriever.retrieve(query, top_k=top_k)
    context = "\n\n".join([doc['content'] for doc in results]) if results else ""
    if not context:
        return "No relevent context found to answer the question."
    
    # generate answer using groq llm
    prompt=f"""Use the following context to answer the question concisely.
        Context:
        {context}

        Question: {query}

        Answer:"""
    response = llm.invoke([prompt.format(context=context,query=query)])
    return response.content

In [16]:
answer = rag_simple("what are the 5 classification of information system", rag_retriever, llm)
print(answer)

Retrieving documents for query : 'what are the 5 classification of information system'
Top K : 3, score threshold: 0.0
generate embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 31.74it/s]

generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)





Based on the provided context, the classification of information systems is not explicitly listed as 5. However, it mentions two main classifications: 

1. Operations support system
2. Management support system

Additionally, under the "PRESENTATION OF INFORMATION SYSTEMS" section, it mentions four main types of information systems:

1. Operations support systems
2. Management information systems
3. (The context does not explicitly mention the other two types, but it does mention Decision Support System in a different section)

Note that the context does not provide a clear list of 5 classifications of information systems.


### Enhanced RAG Pipeline

In [18]:
def rag_advanced(query, retriever, llm, top_k = 3, min_score=0.2, return_context=False):
    """
        rag pipeline with extra features
        return answer, sources , confidence score, and optionally full context
    """
    results = retriever.retrieve(query, top_k=top_k, score_threshold=min_score)
    if not results:
        return {'answer': 'No relevant context found.', 'source':[], 'confidence':0.0, 'context':0}
    # Prepare context and sources
    context = "\n\n".join([doc['content'] for doc in results])
    sources = [{
        'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
        'page': doc['metadata'].get('page', 'unknown'),
        'score': doc['similarity_score'],
        'preview': doc['content'][:300] + '...'
    } for doc in results]
    confidence = max([doc['similarity_score'] for doc in results])
    
    # Generate answer
    prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"""
    response = llm.invoke([prompt.format(context=context, query=query)])
    
    output = {
        'answer': response.content,
        'sources': sources,
        'confidence': confidence
    }
    if return_context:
        output['context'] = context
    return output

# Example usage:
result = rag_advanced("what are the 5 classification of information system", rag_retriever, llm, top_k=3, min_score=0.1, return_context=True)
print("Answer:", result['answer'])
print("Sources:", result['sources'])
print("Confidence:", result['confidence'])
print("Context Preview:", result['context'][:300])

Retrieving documents for query : 'what are the 5 classification of information system'
Top K : 3, score threshold: 0.1
generate embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 15.15it/s]

generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)





Answer: Based on the provided context, the classification of information systems is not explicitly listed as 5. However, it mentions two main classifications: 

1. Operations support system
2. Management support system

And later, it mentions four main types of information systems:

1. Operations support systems
2. Management information systems
3. (The other two types are not explicitly mentioned in the provided context, but they are often referred to as)
4. Decision support systems
5. Executive support systems (not mentioned in the context, but a common classification)

Note: The context does not provide a clear list of 5 classifications, but based on general knowledge, the above list can be considered.
Sources: [{'source': 'unit4.pdf', 'page': 4, 'score': 0.3658398389816284, 'preview': 'Information is data that has been processed into a form that is meaningful to the user. \nAn information system (IS) is an organized combination of people, hardware, software, \ncommunications networ

In [19]:
# --- Advanced RAG Pipeline: Streaming, Citations, History, Summarization ---
from typing import List, Dict, Any
import time

class AdvancedRAGPipeline:
    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm
        self.history = []  # Store query history

    def query(self, question: str, top_k: int = 5, min_score: float = 0.2, stream: bool = False, summarize: bool = False) -> Dict[str, Any]:
        # Retrieve relevant documents
        results = self.retriever.retrieve(question, top_k=top_k, score_threshold=min_score)
        if not results:
            answer = "No relevant context found."
            sources = []
            context = ""
        else:
            context = "\n\n".join([doc['content'] for doc in results])
            sources = [{
                'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
                'page': doc['metadata'].get('page', 'unknown'),
                'score': doc['similarity_score'],
                'preview': doc['content'][:120] + '...'
            } for doc in results]
            # Streaming answer simulation
            prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"""
            if stream:
                print("Streaming answer:")
                for i in range(0, len(prompt), 80):
                    print(prompt[i:i+80], end='', flush=True)
                    time.sleep(0.05)
                print()
            response = self.llm.invoke([prompt.format(context=context, question=question)])
            answer = response.content

        # Add citations to answer
        citations = [f"[{i+1}] {src['source']} (page {src['page']})" for i, src in enumerate(sources)]
        answer_with_citations = answer + "\n\nCitations:\n" + "\n".join(citations) if citations else answer

        # Optionally summarize answer
        summary = None
        if summarize and answer:
            summary_prompt = f"Summarize the following answer in 2 sentences:\n{answer}"
            summary_resp = self.llm.invoke([summary_prompt])
            summary = summary_resp.content

        # Store query history
        self.history.append({
            'question': question,
            'answer': answer,
            'sources': sources,
            'summary': summary
        })

        return {
            'question': question,
            'answer': answer_with_citations,
            'sources': sources,
            'summary': summary,
            'history': self.history
        }

# Example usage:
adv_rag = AdvancedRAGPipeline(rag_retriever, llm)
result = adv_rag.query("what is information system", top_k=3, min_score=0.1, stream=True, summarize=True)
print("\nFinal Answer:", result['answer'])
print("Summary:", result['summary'])
print("History:", result['history'][-1])

Retrieving documents for query : 'what is information system'
Top K : 3, score threshold: 0.1
generate embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.78it/s]

generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Streaming answer:
Use the following context to answer the question concisely.
Context:
and introduction of computers. 
 
Information System 
 
An information system can be defined as set of coordinated network of components, 
which act together towards producing, distributing and or processing information. An 
important characteristic o




f computer-based information systems information is 
precision, which may not apply to other types. 
In any given organization information system can be classified based on the usage of the 
information. Therefore, information systems in business can be divided into operations 
support system and management support system. 
 
Information Technology 
 
Everyday knowingly or unknowingly, everyone is utilizing information technology. It 
has grown rapidly and covers many areas of our day to day life like movies, mobile 
phones, the internet, etc. 
Information technology can be broadly defined as integration of computer with 
telecommunication equipment for storing, retrieving, manipulating and storage of data.

Information is data that has been processed into a form that is meaningful to the user. 
An information system (IS) is an organized combination of people, hardware, software, 
communications network, and data resources that collects, transforms and disseminates 
information in an o