# GraphRAG Guide with PostgreSQL

Import data CSV data to Pandas DataFrame

In [28]:
import pandas as pd
df = pd.read_csv('./data/movies.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,id,title,overview,release_date,popularity,vote_average,vote_count
0,0,19404,Dilwale Dulhania Le Jayenge,"Raj is a rich, carefree, happy-go-lucky second...",1995-10-20,18.433,8.7,2763
1,1,724089,Gabriel's Inferno Part II,Professor Gabriel Emerson finally learns the t...,2020-07-31,8.439,8.7,1223
2,2,278,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,1994-09-23,65.57,8.7,18637
3,3,238,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",1972-03-14,63.277,8.7,14052
4,4,761053,Gabriel's Inferno Part III,The final part of the film adaption of the ero...,2020-11-19,26.691,8.7,773


Make sure to deploy Azure infrastructure using ```terraform apply``` command in terraform folder.

In [None]:
import subprocess
import os
os.chdir('terraform')
PGHOST = subprocess.run(['terraform', 'output', '-raw', 'PGHOST'], stdout=subprocess.PIPE).stdout.decode('utf-8')
PGDATABASE = subprocess.run(['terraform', 'output', '-raw', 'PGDATABASE'], stdout=subprocess.PIPE).stdout.decode('utf-8')
PGUSER = subprocess.run(['terraform', 'output', '-raw', 'PGUSER'], stdout=subprocess.PIPE).stdout.decode('utf-8')
PGPASSWORD = subprocess.run(['terraform', 'output', '-raw', 'PGPASSWORD'], stdout=subprocess.PIPE).stdout.decode('utf-8')
os.chdir('..')

db_uri = f"postgresql://{PGUSER}:{PGPASSWORD}@{PGHOST}/{PGDATABASE}?sslmode=require"
print(f"Using {db_uri} as the database connection string")

Using postgresql://psqladmin:)ycxlsxlLRKks*g#@psql-graphrag-psbv.postgres.database.azure.com/demo?sslmode=require as the database connection string


In [24]:
import psycopg2
from psycopg2 import sql

conn = psycopg2.connect(db_uri)

### Install and configure extensions

List extensions

In [25]:
command = """
SELECT * FROM pg_extension;
"""

with conn.cursor() as cursor:
    cursor.execute(command)
    result = cursor.fetchall()
    columns = [desc[0] for desc in cursor.description]

df = pd.DataFrame(result, columns=columns)
df


Unnamed: 0,oid,extname,extowner,extnamespace,extrelocatable,extversion,extconfig,extcondition
0,14258,plpgsql,10,11,False,1.0,,
1,24760,vector,10,2200,True,0.7.0,,
2,25080,pg_diskann,10,2200,False,0.3.2,,
3,25099,azure_ai,10,11,False,1.1.0,,


In [21]:
command = """
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_diskann;
CREATE EXTENSION IF NOT EXISTS azure_ai;
"""

try:
    with conn.cursor() as cursor:
        cursor.execute(command)
        conn.commit()
except psycopg2.Error as e:
    print(f"Error: {e}")
    conn.rollback()

Create table movies

In [26]:
command = """
CREATE TABLE IF NOT EXISTS movies (
    id SERIAL PRIMARY KEY,
    title VARCHAR(255),
    overview TEXT,
    combined_text TEXT,
    embeddings vector(3072),
    full_text_search tsvector GENERATED ALWAYS AS (to_tsvector('english', combined_text)) STORED
);
"""

try:
    with conn.cursor() as cursor:
        cursor.execute(command)
        conn.commit()
except psycopg2.Error as e:
    print(f"Error: {e}")
    conn.rollback()


Insert data into movies table

In [29]:
import psycopg2.extras

with conn.cursor() as cursor:
    for _, row in df.iterrows():
        combined_text = f"TITLE: {row['title']} OVERVIEW: {row['overview']}"
        cursor.execute(
            """
            INSERT INTO movies (id, title, overview, combined_text)
            VALUES (%s, %s, %s, %s)
            ON CONFLICT (id) DO NOTHING;
            """,
            (row['id'], row['title'], row['overview'], combined_text)
        )
    conn.commit()

In [30]:
command = """
SELECT * FROM movies LIMIT 10;
"""

with conn.cursor() as cursor:
    cursor.execute(command)
    result = cursor.fetchall()
    columns = [desc[0] for desc in cursor.description]

pd.DataFrame(result, columns=columns)

Unnamed: 0,id,title,overview,combined_text,embeddings,full_text_search
0,19404,Dilwale Dulhania Le Jayenge,"Raj is a rich, carefree, happy-go-lucky second...",TITLE: Dilwale Dulhania Le Jayenge OVERVIEW: R...,,'adher':38 'baldev':25 'begin':77 'carefre':11...
1,724089,Gabriel's Inferno Part II,Professor Gabriel Emerson finally learns the t...,TITLE: Gabriel's Inferno Part II OVERVIEW: Pro...,,'anoth':61 'arm':63 'back':53 'come':23 'dant'...
2,278,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,TITLE: The Shawshank Redemption OVERVIEW: Fram...,,'1940s':9 'account':36 'admir':54 'amor':42 'a...
3,238,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",TITLE: The Godfather OVERVIEW: Spanning the ye...,,'1945':8 '1955':10 'american':18 'attempt':32 ...
4,761053,Gabriel's Inferno Part III,The final part of the film adaption of the ero...,TITLE: Gabriel's Inferno Part III OVERVIEW: Th...,,'adapt':14 'anonym':26 'author':28 'canadian':...
5,696374,Gabriel's Inferno,An intriguing and sinful exploration of seduct...,TITLE: Gabriel's Inferno OVERVIEW: An intrigui...,,'captiv':22 'earn':41 'escap':31 'explor':10 '...
6,791373,Zack Snyder's Justice League,Determined to ensure Superman's ultimate sacri...,TITLE: Zack Snyder's Justice League OVERVIEW: ...,,'align':21 'approach':40 'bruce':19 'catastrop...
7,399566,Godzilla vs. Kong,"In a time when monsters walk the Earth, humani...",TITLE: Godzilla vs. Kong OVERVIEW: In a time w...,,'age':48 'battl':45 'collid':41 'collis':26 'c...
8,441130,Wolfwalkers,"In a time of superstition and magic, when wolv...",TITLE: Wolfwalkers OVERVIEW: In a time of supe...,,'apprentic':26 'come':28 'demon':16 'destroy':...
9,424,Schindler's List,The true story of how businessman Oskar Schind...,TITLE: Schindler's List OVERVIEW: The true sto...,,'businessman':11 'factori':30 'ii':34 'jewish'...


### Cleanup

In [None]:
command = "DROP TABLE movies;"

with conn.cursor() as cursor:
    cursor.execute(command)
    conn.commit()

print("Table 'movies' has been dropped.")