In [None]:
from collections import Counter
import math

1. Dataset ( corpus of document/sentences)

In [None]:
corpus_of_documents = [
    "Take a leisurely walk in the park and enjoy the fresh air.",
    "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.",
    "Go for a hike and admire the natural scenery.",
    "Have a picnic with friends and share some laughs.",
    "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.",
    "Attend a workshop or lecture on a topic you're interested in.",
    "Visit an amusement park and ride the roller coasters."
]


2. Cosine similarity

In [None]:
def cosine_similarity(query,document):
    # conver the query to its lower case and split by words and get them as a list of the tokens/words
  query_tokens = query.lower().split(" ")
  document_tokens = document.lower().split(" ")

  # get the embedding for tokens by frequency
  query_counter = Counter(query_tokens)
  document_counter = Counter(document_tokens)

  #calculate dot product
  dot_product = sum(query_counter[token] * document_counter[token] for token in query_counter.keys())

  #calculate magnitudes
  query_magnitude = math.sqrt(sum(query_counter[token] ** 2 for token in query_counter))
  document_magnitude = math.sqrt(sum(document_counter[token] ** 2 for token in document_counter))

  #calculate cosine similarity
  similarity = (dot_product)/(query_magnitude*document_magnitude) if query_magnitude * document_magnitude else 0

  return similarity

3. rank results

In [None]:
def return_response(query, corpus):
  similarities = [] #empty similarity list
  for doc in corpus: #match the similarity of each and every document/sentence
    similarity = cosine_similarity(query,doc)
    similarities.append(similarity)
  return corpus_of_documents[similarities.index(max(similarities))] #return max similarity documents

In [None]:
user_input ="i like fresh air"

In [None]:
relevant_document = return_response(query,corpus_of_documents)

In [None]:
relevant_document

'Take a leisurely walk in the park and enjoy the fresh air.'

4. configure llm

* setum ollama in colab

In [None]:
# (Cell 1) Install Ollama
!curl -fsSL https://ollama.com/install.sh | sh

# (Cell 2) Start the Ollama server in the background
import subprocess, time, requests, json, os, sys
server = subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)

# Wait until the server is up
for _ in range(60):
    try:
        requests.get("http://localhost:11434/api/tags", timeout=2)
        break
    except Exception:
        time.sleep(1)
else:
    raise RuntimeError("Ollama server didn't start.")
print("Ollama is running.")


>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
Ollama is running.


In [None]:
# (Cell 3) Download the model (this can take a while)
!ollama pull llama2

[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A

In [None]:
# (Cell 4) Your variables for this run:
user_input = "Family-friendly ideas for a rainy afternoon."
relevant_document = "Indoor board games and a hot chocolate bar."

import requests, json

full_response = []
prompt = """
You are a bot that makes recommendations for activities. You answer in very short sentences and do not include extra information.
This is the recommended activity: {relevant_document}
The user input is: {user_input}
Compile a recommendation to the user based on the recommended activity and the user input.
"""

url = 'http://localhost:11434/api/generate'

data = {
    "model": "llama2",
    "prompt": prompt.format(user_input=user_input, relevant_document=relevant_document)
}

headers = {'Content-Type': 'application/json'}

response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)

try:
    for line in response.iter_lines():
        if line:
            decoded_line = json.loads(line.decode('utf-8'))
            full_response.append(decoded_line.get('response', ''))
finally:
    response.close()

print(''.join(full_response))


 "Indoor board games and a hot chocolate bar are a perfect match for a rainy afternoon! 😊"
