In [None]:
!pip install langchain --quiet
!pip install openai --quiet
!pip install cohere --quiet
!pip install langchain_community --quiet

#OpenAI Model - Paid Version

Get your OpenAI API key here
https://platform.openai.com/usage

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "Your own OPENAI_API_KEY"

#Better way
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get("OPENAI_API_KEY")

In [None]:
from langchain.llms import OpenAI

llm=OpenAI(temperature=0.9, max_tokens=256)
response = llm.invoke("Write a 4 line poem on AI")
print(response)

# - temperature: Set to 0.9, which controls the randomness of the output.
#   A higher temperature results in more varied and unpredictable outputs,
#   while a lower temperature produces more deterministic and conservative outputs.
#   This is often used in generative tasks to balance between creativity and relevance.

# - max_tokens: Set to 256, which specifies the maximum number of tokens (words or pieces of words)
#   that the model can generate in a single response.


In [None]:
llm=OpenAI(temperature=0)
response = llm.invoke("What is overfitting in Machine Learning? Explain it to a layman")
print(response)

#Cohere

Get your Cohere Trail API key here
https://dashboard.cohere.com/api-keys

In [None]:
os.environ['COHERE_API_KEY'] = "Your own COHERE_API_KEY"
#Better way
os.environ['COHERE_API_KEY'] = userdata.get("COHERE_API_KEY")

In [None]:
from langchain.llms import Cohere

llm = Cohere(temperature=0.9, max_tokens=256)
response = llm.invoke("Write a 4 line poem on AI")
print(response)

In [None]:
llm=Cohere(temperature=0)
response = llm.invoke("What is overfitting in Machine Learning? Explain it to a layman")
print(response)

# Open source models


* Mistral Model (Mistral 7B, Mixtral8-7B)
* LLama (Llam2, Llama3)
* Bloom by Hugging Face
* Falcon 180B
* Opt 175B
* Xgen-7B
* Vicuna-13B



### Top Open-Source Large Language Models for 2024

1. **LLaMA 2**:
   - Developed by Meta, LLaMA 2 is a generative text model with 7 to 70 billion parameters, fine-tuned with reinforcement learning from human feedback (RLHF).
   - Released for research and commercial use in July 2023.
   - Includes versions like LLaMA Chat and Code LLaMA for varied natural language tasks.

2. **BLOOM**:
   - Launched by Hugging Face in 2022, BLOOM is an autoregressive model with 176 billion parameters.
   - Supports 46 languages and 13 programming languages.
   - Emphasizes transparency and is available for free through Hugging Face.

3. **BERT**:
   - Introduced by Google in 2018, BERT is known for its bidirectional encoder representations from transformers.
   - Achieved state-of-the-art performance in many NLP tasks and is widely used, including in Google Search.

4. **Falcon 180B**:
   - Released by the Technology Innovation Institute in the UAE in 2023.
   - With 180 billion parameters, it rivals models like LLaMA 2 and GPT-3.5.
   - Requires significant computing resources.

5. **OPT-175B**:
   - Part of Meta's suite of pre-trained transformers, released in 2022.
   - Ranges from 125M to 175B parameters.
   - Available for research use only due to its non-commercial license.

6. **XGen-7B**:
   - Launched by Salesforce in July 2023, designed for longer context windows.
   - Utilizes only 7 billion parameters.
   - Available for commercial and research purposes, with some variants under a non-commercial license.

7. **GPT-NeoX and GPT-J**:
   - Developed by EleutherAI, GPT-NeoX has 20 billion parameters and GPT-J has 6 billion parameters.
   - Available for various NLP tasks via the NLP Cloud API.

8. **Vicuna-13B**:
   - Fine-tuned from LLaMA 13B, Vicuna-13B is a conversational model.
   - Performs well in customer service, healthcare, education, and more.
   - Achieves high quality, comparable to ChatGPT and Google Bard.

### Choosing the Right Open-Source LLM
Consider the following factors:
- **Purpose**: Ensure the LLM's licensing fits your use case, especially for commercial purposes.
- **Necessity**: Evaluate if an LLM is essential for your goals.
- **Accuracy**: Larger models typically offer higher accuracy.
- **Investment**: Consider the cost of resources for training and operating the LLM.
- **Pre-trained Models**: Leverage existing pre-trained models for specific use cases to save resources.

#HuggingFace models

https://huggingface.co/mistralai

In [None]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "Your own HUGGINGFACEHUB_API_TOKEN"

#Better way
from google.colab import userdata
os.environ['HUGGINGFACEHUB_API_TOKEN'] = userdata.get("HUGGINGFACEHUB_API_TOKEN")

In [None]:
from langchain.llms import HuggingFaceHub

repo_id="mistralai/Mistral-7B-Instruct-v0.2"

llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.9, "max_length": 256},
)

response = llm.invoke("Write a 4 line poem on AI")
print(response)

In [None]:
repo_id="mistralai/Mistral-7B-Instruct-v0.2"

llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.3, "max_length": 1000},
)

response = llm.invoke("How to pick a stock based on Revenue, Profit and profit margin trends?")
print(response)

# Llama from Hugging Facehub
https://huggingface.co/meta-llama

* You need to fill the contact info and wait for the approval.
https://huggingface.co/meta-llama/Meta-Llama-3.1-8B

In [None]:
repo_id="meta-llama/Meta-Llama-3.1-8B"
#Throws an error
#The model meta-llama/Meta-Llama-3.1-8B is too large to be loaded automatically (16GB > 10GB).
#Please use Spaces (https://huggingface.co/spaces) or Inference Endpoints (https://huggingface.co/inference-endpoints).

llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.9},
)

response = llm.invoke("What are some ways to boost creativity?")
print(response)

#Replicate

- Run and fine-tune open-source models with Replicate's API.https://replicate.com/home
- Deploy custom models at scale using one line of code.
- Avoid managing infrastructure or learning machine learning details.
- Use open-source models or package your own.
- Choose to make models public or keep them private.
- Start with any open-source model with just one line of code.


Replciate API Token

On top Left >>> Home>>Click on your id>> API Tokens
https://replicate.com/account/api-tokens

In [None]:
!pip install replicate

In [None]:
os.environ["REPLICATE_API_TOKEN"] = userdata.get("REPLICATE_API_TOKEN")

In [None]:
from langchain.llms import Replicate

replicate_llm = Replicate(
    model="meta/meta-llama-3.1-405b-instruct",
    model_kwargs={"temperature": 0.6},
)

response = replicate_llm.invoke("What are some good strategies for studying?")
print(response)

# Groq

* Developed the LPU(Language Processing Unit) chip to run LLMs faster and cheaper.
* Offers Groq Cloud to try open-source LLMs like Llama3 or Mixtral.
* Allows free use of Llama3 or Mixtral in apps via Groq API Key with rate limits.
* Models on Groq https://console.groq.com/docs/models
* Get your Groq API key https://console.groq.com/keys


In [None]:
!pip install langchain-groq

In [None]:
os.environ["GROQ_API_KEY"] = userdata.get("GROQ_API_KEY")

In [None]:
from langchain_groq import ChatGroq
llm=ChatGroq(
    model="llama3-70b-8192"
)
result=llm.invoke("what are the top 10 quotes about ignorance?")
print(result)

# Many more ways

https://python.langchain.com/v0.1/docs/integrations/llms/