# Google Colab Setup for LLM Projects

In [None]:

# Step 1: Install Required Libraries
!pip install transformers openai sentence-transformers scikit-learn matplotlib streamlit
    

In [None]:

# Step 2: Import Required Libraries
import openai
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import streamlit as st
    

## Step 3: Set OpenAI API Key

In [None]:

# Set your OpenAI API Key (Replace 'YOUR_OPENAI_API_KEY' with your actual key)
openai.api_key = 'YOUR_OPENAI_API_KEY'
    

## Project 1: Exploring Tokenization and Embeddings

In [None]:

def tokenize_text(text):
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
    tokens = tokenizer.tokenize(text)
    token_ids = tokenizer.convert_tokens_to_ids(tokens)
    print(f"Number of tokens: {len(tokens)}")
    print(f"Tokens: {tokens}")
    return tokenizer, tokens

def visualize_embeddings(text):
    tokenizer, tokens = tokenize_text(text)
    model = AutoModel.from_pretrained('bert-base-uncased')
    inputs = tokenizer(text, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.squeeze().numpy()

    pca = PCA(n_components=2)
    reduced_embeddings = pca.fit_transform(embeddings)

    plt.figure(figsize=(10, 6))
    plt.scatter(reduced_embeddings[:, 0], reduced_embeddings[:, 1])
    for i, token in enumerate(tokens):
        plt.annotate(token, (reduced_embeddings[i, 0], reduced_embeddings[i, 1]))
    plt.title("Token Embeddings Visualization (PCA)")
    plt.show()
    

## Project 2: Crafting the Perfect Prompt

In [None]:

def get_gpt_response(prompt, text):
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=f"{prompt}\n\n{text}",
        max_tokens=150
    )
    return response.choices[0].text.strip()

generic_prompt = "Summarize the following article."
detailed_prompt = "Summarize the following article, focusing on the main arguments and conclusions."
specific_prompt = "In 100 words or less, summarize the key findings of the following research article, emphasizing its implications for future studies."
    

## Project 3: Building a Mini Application

In [None]:

def run_streamlit_app():
    st.title('LLM-Powered Application')
    user_input = st.text_area('Enter your text here:')
    if st.button('Process'):
        response = get_gpt_response(specific_prompt, user_input)
        st.write('Response:', response)
    

## Project 4: Advanced Prompt Techniques

In [None]:

chain_prompt = "To solve the problem, we need to follow these steps:"
few_shot_prompt = """Example 1:
Input: [input text]
Output: [desired output]

Example 2:
Input: [input text]
Output: [desired output]

Now, process the following input accordingly."""
role_play_prompt = "As a historian, explain the significance of the Renaissance."
    

## Running the Streamlit App in Colab

In [None]:

# Uncomment the below lines to run a specific project
# visualize_embeddings("Your sample text here.")
# run_streamlit_app()

# To run Streamlit in Colab:
# !streamlit run your_script.py & npx localtunnel --port 8501
    