<a href="https://colab.research.google.com/github/ningxia202109/llm-learn/blob/main/GraphRAG/GraphRAG_Ollama_llama3_1_8b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Ollama on T4 GPU
# Install Ollama and GPU Package
%%capture --no-stderr
!curl https://ollama.ai/install.sh | sh

!echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
!sudo apt-get update && sudo apt-get install -y cuda-drivers

import os
# Set LD_LIBRARY_PATH so the system NVIDIA library
os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})
os.environ.update({'OLLAMA_HOST': '0.0.0.0'})

!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb

import subprocess
import threading
import time
import socket

def iframe_thread(port):
    while True:
        time.sleep(0.5)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        result = sock.connect_ex(('127.0.0.1', port))
        if result == 0:
            break
        sock.close()

    p = subprocess.Popen(["cloudflared", "tunnel", "--url", f"http://127.0.0.1:{port}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in p.stderr:
        l = line.decode()
        if "trycloudflare.com " in l:
            print("\n\n\n\n\n")
            print("running ollama server\n\n", l[l.find("http"):], end='')
            print("\n\n\n\n\n")

threading.Thread(target=iframe_thread, daemon=True, args=(11434,)).start()

In [2]:
# Start Ollama
MODEL_NAME="llama3.1:8b"
!ollama serve > ollama-server.log 2>&1 &
!ollama --version

# Run LLM model
!ollama run llama3.1:8b > llama3-1-8b.log 2>&1 &
# Wait for AI MODEL
!while ! ollama list | grep -q "$MODEL_NAME"; do \
  echo "Waiting for $MODEL_NAME to become available..."; \
  sleep 15; \
done
!echo "$MODEL_NAME is now available."

# Pull Ollama embedding
!ollama pull mxbai-embed-large
!ollama list







running ollama server

 https://vessels-bd-pty-tried.trycloudflare.com                                            |






ollama version is 0.3.3
Waiting for llama3.1:8b to become available...
Waiting for llama3.1:8b to become available...
Waiting for llama3.1:8b to become available...
Waiting for llama3.1:8b to become available...
llama3.1:8b is now available.
[?25lpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest 
pulling 819c2adf5ce6...   0% ▕▏    0 B/669 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulli

In [3]:
!curl http://localhost:11434/v1/models

{"object":"list","data":[{"id":"mxbai-embed-large:latest","object":"model","created":1722902140,"owned_by":"library"},{"id":"llama3.1:8b","object":"model","created":1722902121,"owned_by":"library"}]}


In [4]:
# Install packages of GraphRAG
%%capture --no-stderr
!pip install graphrag

In [5]:
# Intial GraphRAG folder
%cd /content
!mkdir -p ./ragtest/input
!python -m graphrag.index --init --root ./ragtest

/content
2024-08-05 23:57:09.993852: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-05 23:57:10.298725: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-05 23:57:10.376688: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[2KInitializing project at .[35m/[0m[95mragtest[0m
⠋ GraphRAG Indexer 

In [6]:
# Configurea GraphRAG
%%writefile ./ragtest/settings.yaml

encoding_model: cl100k_base
skip_workflows: []
llm:
  api_key: ollama
  type: openai_chat # or azure_openai_chat
  model:  llama3.1:8b
  model_supports_json: true # recommended if this is available for your model.
  max_tokens: 3000
  # request_timeout: 180.0
  api_base: http://localhost:11434/v1
  # api_version: 2024-02-15-preview
  # organization:
  # deployment_name:
  tokens_per_minute: 6000 # set a leaky bucket throttle
  requests_per_minute: 2 # set a leaky bucket throttle
  max_retries: 3
  # max_retry_wait: 10.0
  # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
  # concurrent_requests: 25 # the number of parallel inflight requests that may be made

parallelization:
  stagger: 0.3
  # num_threads: 50 # the number of threads to use for parallel processing

async_mode: threaded # or asyncio

embeddings:
  ## parallelization: override the global parallelization settings for embeddings
  async_mode: threaded # or asyncio
  llm:
    api_key: ollama
    # type: openai_embedding # or azure_openai_embedding
    model: mxbai-embed-large:latest
    api_base: http://localhost:11434/v1
    # api_version: 2024-02-15-preview
    # organization:
    # deployment_name:
    # tokens_per_minute: 150_000 # set a leaky bucket throttle
    # requests_per_minute: 10_000 # set a leaky bucket throttle
    # max_retries: 10
    # max_retry_wait: 10.0
    # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
    # concurrent_requests: 25 # the number of parallel inflight requests that may be made
    # batch_size: 16 # the number of documents to send in a single request
    # batch_max_tokens: 8191 # the maximum number of tokens to send in a single request
    # target: required # or optional



chunks:
  size: 300
  overlap: 100
  group_by_columns: [id] # by default, we don't allow chunks to cross documents

input:
  type: file # or blob
  file_type: text # or csv
  base_dir: "input"
  file_encoding: utf-8
  file_pattern: .*\.txt$

cache:
  type: file # or blob
  base_dir: "cache"
  # connection_string:
  # container_name:

storage:
  type: file # or blob
  base_dir: "output/${timestamp}/artifacts"
  # connection_string:
  # container_name:

reporting:
  type: file # or console, blob
  base_dir: "output/${timestamp}/reports"
  # connection_string:
  # container_name:

entity_extraction:
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  prompt: "prompts/entity_extraction.txt"
  entity_types: [organization,person,geo,event]
  max_gleanings: 0

summarize_descriptions:
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  prompt: "prompts/summarize_descriptions.txt"
  max_length: 500

claim_extraction:
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  # enabled: true
  prompt: "prompts/claim_extraction.txt"
  description: "Any claims or facts that could be relevant to information discovery."
  max_gleanings: 0

community_report:
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  prompt: "prompts/community_report.txt"
  max_length: 2000
  max_input_length: 8000

cluster_graph:
  max_cluster_size: 10

embed_graph:
  enabled: false # if true, will generate node2vec embeddings for nodes
  # num_walks: 10
  # walk_length: 40
  # window_size: 2
  # iterations: 3
  # random_seed: 597832

umap:
  enabled: false # if true, will generate UMAP embeddings for nodes

snapshots:
  graphml: false
  raw_entities: false
  top_level_nodes: false

local_search:
  # text_unit_prop: 0.5
  # community_prop: 0.1
  # conversation_history_max_turns: 5
  # top_k_mapped_entities: 10
  # top_k_relationships: 10
  # max_tokens: 12000

global_search:
  # max_tokens: 12000
  # data_max_tokens: 12000
  # map_max_tokens: 1000
  # reduce_max_tokens: 2000
  # concurrency: 32

Overwriting ./ragtest/settings.yaml


In [7]:
# Sample text
%%writefile /content/ragtest/input/sample.txt

MARLEY'S GHOST

Marley was dead, to begin with. There is no doubt whatever about that.
The register of his burial was signed by the clergyman, the clerk, the
undertaker, and the chief mourner. Scrooge signed it. And Scrooge's name
was good upon 'Change for anything he chose to put his hand to. Old
Marley was as dead as a door-nail.

Writing /content/ragtest/input/sample.txt


In [8]:
# Setup Environment Variables
from google.colab import userdata
import os
# GROQ_API_KEY = '' # @param {type:"string"}
# HUGGING_FACE_TOKEN = '' # @param {type:"string"}

# Get your API Key from https://console.groq.com/keys
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')
os.environ['GRAPHRAG_API_KEY'] = 'DUMMY_KEY'
# If gated LLM
# os.environ['HUGGING_FACE_TOKEN'] = HUGGING_FACE_TOKEN

if len(os.getenv("GROQ_API_KEY"))<25:
    assert False, "GROQ_API_KEY is required. Sign up and Get your API Key from https://console.groq.com/keys"
# print(os.getenv('REDDIT_NAME'))

In [9]:
# GraphRAG Indexing
!python -m graphrag.index --root ./ragtest

2024-08-05 23:58:22.183985: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-05 23:58:22.218988: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-05 23:58:22.229348: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[2K🚀 [32mReading settings from ragtest/settings.yaml[0m
[2K⠦ GraphRAG Indexer 
[2K[1A[2K⠦ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100%[0m [36m0:00:00[0m [33m0:00:00[0m
[2K[1A[2K[1A[2K⠦ GraphRAG Indexer 
├── Loading Input (text) - 1 files loaded (0 filtered) [90m━━━━━━━━━━━━━

In [22]:
# Global query
!python -m graphrag.query --root ./ragtest --method global "Who is Marley?"

2024-08-05 23:26:38.646085: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-05 23:26:38.666646: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-05 23:26:38.673018: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO: Reading settings from ragtest/settings.yaml
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/graphrag/query/__main_

In [None]:
# Local query
!python -m graphrag.query --root ./ragtest --method local "Who is Marley?"