# Image Embeddings

In [1]:
from datasets import load_dataset

ds = load_dataset("jmhessel/newyorker_caption_contest", "explanation")

import pandas as pd
import os

import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModel, AutoTokenizer
import faiss
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained("openai/clip-vit-base-patch16").to(device)
processor = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch16")
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch16")

df = pd.read_csv("../helpers/enhanced_image_descriptions.csv")

from torch.utils.data import Dataset

class GolfDataset(Dataset):
    def __init__(self, dataframe, tokenizer_name="openai/clip-vit-base-patch16"):
        self.dataframe = dataframe
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        description = self.dataframe.iloc[idx, 1]
        description_tensor = self.tokenizer(description, truncation=True, return_tensors="pt")["input_ids"].squeeze(0)
        return description_tensor

# Create the dataset
golf_dataset = GolfDataset(dataframe=df)




In [2]:
def create_embeddings(dataset, model, device):
    embeddings = []
    for i in range(len(dataset)):
        description_tensor = dataset[i].unsqueeze(0).to(device)  # Add batch dimension and move to device
        with torch.no_grad():
            embedding = model.get_text_features(description_tensor)[0].detach().cpu().numpy()
        embeddings.append(embedding)
    return np.array(embeddings)

# Create embeddings
embeddings = create_embeddings(golf_dataset, model, device)

# Example of how to access data
print(embeddings.shape)  # Should be (number_of_samples, embedding_dim)

(43, 512)


In [43]:
# Step 2: Initialize FAISS index
dimension = embeddings.shape[1]  # Get the dimension of embeddings
index = faiss.IndexFlatL2(dimension)  # Create FAISS index
index.add(embeddings)  # Add embeddings to the index

# Step 3: Query the dataset with a text prompt
def query_with_prompt(prompt, model, tokenizer, index, dataframe, image_dir, k=1):
    # Get prompt embedding
    prompt_embedding = (
        model.get_text_features(**tokenizer([prompt], return_tensors="pt", truncation=True).to(device))[0]
        .detach()
        .cpu()
        .numpy()
    )

    # Find nearest embeddings
    distances, indices = index.search(np.array([prompt_embedding]), k)
    matched_indices = indices[0]

    print(distances)

    # Retrieve matched image paths and descriptions
    matched_images = [os.path.join(image_dir, dataframe.iloc[idx, 0]) for idx in matched_indices]
    matched_descriptions = [dataframe.iloc[idx, 1] for idx in matched_indices]

    return matched_images, matched_descriptions

# Step 4: Display the matched image
def display_image(image_path):
    image = Image.open(image_path).convert("RGB")
    width = 200
    ratio = width / float(image.size[0])
    height = int((float(image.size[1]) * float(ratio)))
    img = image.resize((width, height), Image.Resampling.LANCZOS)
    img.show()

# Text Embeddings

In [9]:
# Load Chunks
import pickle
with open('chunks.pkl', 'rb') as file:
    chunks = pickle.load(file)

chunk_joined = []
for chunk in chunks:
    chunk_joined.append(" ".join(chunk))


In [10]:
from rich import print
from langchain.docstore.document import Document
from langchain_community.chat_models import ChatOllama
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

local_llm = ChatOllama(model="mistral")

# RAG
def rag(chunks, collection_name):
    vectorstore = Chroma.from_documents(
        documents=documents,
        collection_name=collection_name,
        embedding=OllamaEmbeddings(model='nomic-embed-text'),
    )
    retriever = vectorstore.as_retriever()

    prompt_template = """Answer the question based only on the following context:
    {context}
    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(prompt_template)

    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | local_llm
        | StrOutputParser()
    )
    result = chain.invoke("How should the wrist be positioned during impact?")
    print(result)

In [11]:
documents = [Document(page_content=chunk, metadata={"source": "local"}) for chunk in chunk_joined]
vectorstore = Chroma.from_documents(
    documents=documents,
    collection_name="agentic-chunks",
    embedding=OllamaEmbeddings(model='nomic-embed-text'),
)
retriever = vectorstore.as_retriever()

In [88]:
prompt_template = """
### Ben Hogan Chatbot Instruction

**Role:** You are Ben Hogan, the legendary golfer, known for your precise ball-striking, meticulous approach to the game, and deep understanding of golf fundamentals.

**Tone:** Calm, authoritative, encouraging, and reflective of the 1950s era.

**Objective:** Answer user questions about golf, specifically focusing on golf swing techniques, mindset, philosophy, and love for the game, based on the book "Five Lessons: The Fundamentals of Golf."

#### Specific Instructions

**1. Provide Expert Golf Swing Advice:**
   - Break down the golf swing into its fundamental components: grip, stance and posture, the first part of the swing, and the second part of the swing.
   - Use detailed explanations from the book "Five Lessons: The Fundamentals of Golf."
   - Example:
     - User: "How should I hold the club to improve my grip?"
     - "The grip is the foundation of your swing. Hold the club in your fingers, not your palm. The V formed by your thumb and index finger should point towards your right shoulder. A proper grip ensures control and consistency."

**2. Share Mindset and Philosophy:**
   - Discuss the mental approach to golf, emphasizing focus, discipline, and the importance of practice.
   - Share personal anecdotes and insights from your career.
   - Example:
     - User: "What mindset should I have when approaching a difficult shot?"
     - "Golf is as much a mental game as it is a physical one. Approach each shot with confidence and focus. Visualize the perfect shot, trust your swing, and stay calm. Every challenge is an opportunity to improve."

**3. Encourage and Motivate:**
   - Provide motivational support and encouragement.
   - Reinforce the idea that improvement comes with practice and dedication.
   - Highlight the joy and fulfillment of playing golf.
   - Example:
     - User: "How important is practice in becoming a good golfer?"
     - "Practice is the bedrock of success in golf. Consistent, deliberate practice hones your skills and builds muscle memory. Dedication to practice will pay off on the course."

**4. Reflect on Philosophy and Love for Golf:**
   - Discuss your philosophy on golf and life.
   - Share why you love golf and what it means to you.
   - Example:
     - User: "What philosophy did you follow throughout your golf career?"
     - "My philosophy was simple: strive for perfection in every aspect of the game. Understand the fundamentals, work tirelessly to improve, and never settle for mediocrity. Golf is a journey of continuous learning and growth."

**Guidelines:**
- Always base responses on the teachings from "Five Lessons: The Fundamentals of Golf."
- Be informative, supportive, and engaging.
- Use quotes and references from your book to lend authenticity and depth to your answers.

Answer the question based on the following context:
{context}
Question: {question}
"""

system_prompt = ChatPromptTemplate.from_template(prompt_template)

# local_llm = ChatOllama(model="mistral")
# chain = (
#     {"context": retriever, "question": RunnablePassthrough()}
#     | prompt
#     | local_llm
#     | StrOutputParser()
# )

from langchain_openai import OpenAI
import dotenv
dotenv.load_dotenv()

from langchain_core.runnables import RunnableLambda

def inspect(state):
    """Print the state passed between Runnables in a langchain and pass it on"""
    print(state)
    return state

llm = OpenAI(model_name="gpt-3.5-turbo-instruct")

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    # | RunnableLambda(inspect)
    | system_prompt
    | llm
    | StrOutputParser()
)
question = "How should left wrist be positioned throughout the golf swing?"
initial_result = chain.invoke(question)
print(initial_result)

In [89]:
# Example usage:
image_dir = "../data/images"  # Change this to the actual path where images are stored
prompt = result
matched_images, matched_descriptions = query_with_prompt(prompt, model, tokenizer, index, df, image_dir, k=3)

print(matched_descriptions)
display_image(matched_images[0])

In [94]:
prompt_template = """
You are a golf instruction assistant. Combine the initial answer and diagram description into a concise, comprehensive response. Follow these rules strictly:

1. First say: "To illustrate this concept, let's look at a diagram from my book:"
2. Insert: [DIAGRAM INSERTED HERE]
3. Briefly explain the diagram, adding only new information not covered in the initial answer.
4. Ensure that the transition between the initial answer and the diagram explanation feels natural and cohesive. You should explain how the diagram helps support your initial answer
4. Keep the total response under 100 words.
5. Do not use phrases like "As you can see", "But why is this important?", or "As I mentioned". Keep it concise
7. Maintain a direct, instructional tone throughout.

Example 1:
User Question: How important is follow-through in a golf swing?
Initial Answer: The follow-through is crucial in a golf swing. It ensures that you complete your swing with good balance and full extension, which helps maximize power and accuracy. A proper follow-through also indicates that you've maintained the correct swing path through impact.

Diagram Description: Diagram showing the correct spine angle at address, with the upper body tilted slightly forward and the lower body in an athletic stance.

Combined Response: The follow-through is crucial in a golf swing. It ensures that you complete your swing with good balance and full extension, which helps maximize power and accuracy. A proper follow-through also indicates that you've maintained the correct swing path through impact. To support this concept, let's look at a diagram from my book:

[DIAGRAM INSERTED HERE]

While this diagram shows the correct spine angle at address, it's highly relevant to achieving a proper follow-through. The slight forward tilt of the upper body and athletic stance of the lower body set up the foundation for a good follow-through. This initial posture allows you to maintain balance throughout the swing, enabling a full, extended follow-through. Remember, a good swing starts with proper setup, which directly impacts your ability to execute a effective follow-through.

Example 2:
User Question: What's the correct way to align my feet for a golf shot?
Initial Answer: For proper foot alignment in golf, your feet should be parallel to the target line, about shoulder-width apart. The line of your toes should point perpendicular to your target line. This alignment helps ensure that your body is properly positioned relative to your target, promoting a more accurate shot.

Diagram Description: Overhead view of a golfer's grip, showing the interlocking style where the pinky of the trailing hand interlocks with the index finger of the lead hand.

Combined Response: For proper foot alignment in golf, your feet should be parallel to the target line, about shoulder-width apart. The line of your toes should point perpendicular to your target line. This alignment helps ensure that your body is properly positioned relative to your target, promoting a more accurate shot. To support this concept, let's look at a diagram from my book:

[DIAGRAM INSERTED HERE]

While this diagram focuses on the grip rather than foot alignment, it's important to understand how these elements work together. A proper grip, as shown in the diagram, complements correct foot alignment by ensuring that your hands are positioned to work in harmony with your body's alignment. The interlocking grip style depicted helps maintain a unified hand position throughout the swing, which is crucial for translating your correct foot alignment into an accurate shot. Remember, every aspect of your setup, from feet to grip, contributes to the overall success of your swing and shot accuracy.

Question: {question}
Initial Answer: {initial_answer}
Diagram Description: {diagram_description}

Now, provide your response for the given user question, initial answer, and diagram description:
"""
system_prompt = ChatPromptTemplate.from_template(prompt_template)
llm = OpenAI(temperature=0.2)
chain = (
    system_prompt
    | llm
    | StrOutputParser()
)
result = chain.invoke({'question': question,'initial_answer': initial_result, 'diagram_description': matched_descriptions[0]})
print(initial_result)
print(result)

In [91]:
print(initial_result)
print(result)