In [6]:
!pip install langchain #Used to access langchain tool.
!pip install langchain_huggingface #By using which we can connect huggingface models through langchain.
!pip install transformers #This is standalone library provided by python devloped by huggingface, to access LLM directly through LLM.
!pip install huggingface_hub #It is also a standalone library, Used for entirely download all the LLMs through huggingface.


Collecting langchain_huggingface
  Downloading langchain_huggingface-1.2.0-py3-none-any.whl.metadata (2.8 kB)
Downloading langchain_huggingface-1.2.0-py3-none-any.whl (30 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-1.2.0


In [None]:
import os

In [None]:
from google.colab import userdata
hf = userdata.get('hf1')

In [None]:
# 1st environment
os.environ["HF_TOKEN"] = hf # This environment will be used by langchain_huggingface, when we are calling huggingface model through langchain by using huggingface endpoint.

# Here we are not use langchain directly because we use here huggingface LLM model and for that
# we need to directly download tokenizers/Models from huggingface there is no langchain

# 2st environment
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf
# Why "HUGGINGFACEHUB_API_TOKEN" is this required?
#---> Because we are using / downloading an LLM through LangChain from Hugging Face Hub.
# and Since LangChain fetches the LLM from Hugging Face Hub, it needs the Hugging Face API token — and it reads it from HUGGINGFACEHUB_API_TOKEN.

# NOTE: Both environmentuse the same API KEY because API_KEY we're creating from huggingface only

In [None]:
# Download the tokenizer and model

In [None]:
# Important Note on System Resources: Before downloading or running any Large Language Model (LLM), always check your system resources, especially RAM and storage.
# 1. If your system has limited RAM or storage, avoid downloading large models.
# 2. Choose lightweight or smaller LLMs that can run efficiently on low-resource machines.
# 3. On most personal devices, RAM is around 8 GB, so only models that require up to ~15 GB (with optimizations like quantization) should be considered.
# 4. Using oversized models on low-resource systems may lead to out-of-memory errors, crashes, or slow performance.

# final note: Always match the model size with your available hardware resources to ensure smooth execution.

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct")
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [16]:
tokenizer.save_pretrained("/content/sample_data/deepseek")
model.save_pretrained("/content/sample_data/deepseek")
# Model saved locally on your system at above mension path

In [None]:
# Create the pipeline

In [4]:
# Creates a Hugging Face text-generation pipeline using the loaded model and tokenizer
# This pipeline handles tokenization, inference, and text generation internally

from transformers import pipeline

deepseek_hf = pipeline(model = model ,tokenizer=tokenizer, task = "text-generation")



Device set to use cuda:0


In [7]:
# Accessing the Model with LangChain

from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

# HuggingFacePipeline: Wraps the Hugging Face Transformers model so LangChain can use it.
# The model runs locally and uses your system RAM.

# ChatHuggingFace: Converts the wrapped model into a chat-style interface.
# Allows interaction using messages like Human / AI chat, similar to ChatGPT.

# Note: This pipeline wraps the Hugging Face pipeline so LangChain can easily access and use the model.


In [8]:
model1 = HuggingFacePipeline(pipeline = deepseek_hf)

# Acts as a regular LangChain LLM by wrapping the Hugging Face Transformers pipeline

In [10]:
import time
x = time.time()
response = model1.invoke("tell me about deep learning")
y =time.time()
print(y-x)

13.152397632598877


In [11]:
response

'tell me about deep learning\n\n\t\tdeep learning is a subset of machine learning. it is a type of artificial intelligence that uses algorithms that operate by learning from data.\n\t\tdeep learning is most commonly used for image and speech recognition.\n\t\tdeep learning models are built using neural networks with multiple layers of nodes or neurons. these nodes learn and adapt to the input data.\n\t\tthe hidden layer is a layer of intermediaries between the input and output layer, it performs a series of transformations to the input data.\n\t\tthe output layer of the deep learning model is the final result of the learning process.\n\t\tthere are two types of deep learning models: supervised and unsupervised. supervised models learn from labeled data and unsupervised models learn from unlabeled data.\n\t\tsupervised learning methods are those where the model is trained on a labeled dataset. unsupervised learning methods are those where the model is trained on an unlabeled dataset.\n\

In [13]:
deepc_m = ChatHuggingFace(llm = model1)
# Converts the regular LLM into a chat-style model

In [14]:
response = deepc_m.invoke([("system","give the output in chinese"),("user","Tell me about china")])

In [15]:
response.content #result

"<｜begin▁of▁sentence｜>give the output in chinese### Instruction:\nTell me about china\n### Response:\nI'm sorry, this question is beyond my expertise. I am an AI programming and machine learning model trained to assist with questions about programming and computer science. If you have any questions about those topics, feel free to ask.\n"