In [1]:
#%pip install --upgrade pip
%pip install -q langchain langchain-community faiss-cpu

Note: you may need to restart the kernel to use updated packages.




In [1]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

def create_deepseek_llm(model_name="deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", 
                        max_new_tokens=512,
                        temperature=0.1):
    """Create a DeepSeek LLM pipeline for use with LangChain"""
    
    # Load the model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        torch_dtype="auto",
        device_map="auto"
    )
    
    # Create a text generation pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=0.95,
        repetition_penalty=1.1
    )
    
    # Create a LangChain LLM from the pipeline
    llm = HuggingFacePipeline(pipeline=pipe)
    
    return llm

def create_rag_chain():
    """Create a simple RAG chain with the DeepSeek model"""
    
    # Initialize the DeepSeek LLM
    llm = create_deepseek_llm()
    
    # Create a prompt template
    template = """
    You are an AI assistant providing helpful information.
    
    Context information is below:
    ---------------------
    {context}
    ---------------------
    
    Given the context information and not prior knowledge, answer the question.
    Question: {question}
    
    Answer:
    """
    
    prompt = PromptTemplate(
        input_variables=["context", "question"],
        template=template
    )
    
    # Create the chain
    chain = LLMChain(
        llm=llm,
        prompt=prompt,
        output_parser=StrOutputParser(),
        verbose=True
    )
    
    return chain

def main():
    # Create the RAG chain
    chain = create_rag_chain()
    
    # Example context and question
    context = """
    DeepSeek is an advanced large language model (LLM) developed by DeepSeek AI.
    It's designed for both natural language understanding and code generation tasks.
    The model comes in several sizes, including 7B and 67B parameter versions.
    DeepSeek-Coder is specifically fine-tuned for programming tasks and supports
    multiple programming languages including Python, Java, C++, JavaScript, and more.
    """
    
    question = "What is DeepSeek good at and what sizes is it available in?"
    
    # Run the chain
    result = chain.invoke({
        "context": context,
        "question": question
    })
    
    print("\nQuestion:", question)
    print("\nAnswer:", result)
    
    # Another example
    question = "What programming languages does DeepSeek-Coder support?"
    
    result = chain.invoke({
        "context": context,
        "question": question
    })
    
    print("\nQuestion:", question)
    print("\nAnswer:", result)

In [2]:
main()

tokenizer_config.json:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

configuration_deepseek.py:   0%|          | 0.00/10.3k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct:
- configuration_deepseek.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_deepseek.py:   0%|          | 0.00/78.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct:
- modeling_deepseek.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


ImportError: cannot import name 'is_torch_greater_or_equal_than_1_13' from 'transformers.pytorch_utils' (C:\Users\panup\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\pytorch_utils.py)

In [1]:
# Basic Text-based RAG with LangChain
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# Multimodal Embeddings
from transformers import CLIPProcessor, CLIPModel
from transformers import WhisperProcessor, WhisperForConditionalGeneration

#FAISS for vector search
import faiss

#and arXiv integration
import gradio as gr
from datasets import load_dataset
import requests
from io import BytesIO
from pytube import YouTube

ModuleNotFoundError: No module named 'langchain'