In [2]:
import sqlite3

connection = sqlite3.connect("wikipedia_chunks_48.db")
cursor = connection.cursor()

create_table_sql = """
CREATE TABLE documents (
    document_id INTEGER PRIMARY KEY AUTOINCREMENT,
    document_title TEXT
);

CREATE TABLE sections (
    section_id INTEGER PRIMARY KEY AUTOINCREMENT,
    document_id INTEGER,
    section_title TEXT,
    FOREIGN KEY (document_id) REFERENCES documents (document_id)
);

CREATE TABLE text_chunks (
    chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,
    section_id INTEGER,
    document_id INTEGER,
    content TEXT,
    FOREIGN KEY (section_id) REFERENCES sections (section_id)
    FOREIGN KEY (document_id) REFERENCES documents (section_id)
);

CREATE INDEX idx_section_id ON text_chunks (section_id);
CREATE INDEX idx_document_id ON text_chunks (document_id);
CREATE INDEX idx_section_document_id ON sections (document_id);

"""

cursor.executescript(create_table_sql)
connection.commit()
connection.close()


In [9]:
import sqlite3

connection = sqlite3.connect("wikipedia_chunks_64.db")
cursor = connection.cursor()

create_table_sql = """
CREATE TABLE documents (
    document_id INTEGER PRIMARY KEY AUTOINCREMENT,
    document_title TEXT
);

CREATE TABLE sections (
    section_id INTEGER PRIMARY KEY AUTOINCREMENT,
    document_id INTEGER,
    section_title TEXT,
    FOREIGN KEY (document_id) REFERENCES documents (document_id)
);

CREATE TABLE text_chunks (
    chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,
    section_id INTEGER,
    document_id INTEGER,
    content TEXT,
    FOREIGN KEY (section_id) REFERENCES sections (section_id)
    FOREIGN KEY (document_id) REFERENCES documents (section_id)
);

CREATE INDEX idx_section_id ON text_chunks (section_id);
CREATE INDEX idx_document_id ON text_chunks (document_id);
CREATE INDEX idx_section_document_id ON sections (document_id);

"""

cursor.executescript(create_table_sql)
connection.commit()
connection.close()


In [45]:
import sqlite3

connection = sqlite3.connect("wikipedia_chunks_256.db")
cursor = connection.cursor()

create_table_sql = """
CREATE TABLE documents (
    document_id INTEGER PRIMARY KEY AUTOINCREMENT,
    document_title TEXT
);

CREATE TABLE sections (
    section_id INTEGER PRIMARY KEY AUTOINCREMENT,
    document_id INTEGER,
    section_title TEXT,
    FOREIGN KEY (document_id) REFERENCES documents (document_id)
);

CREATE TABLE text_chunks (
    chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,
    section_id INTEGER,
    document_id INTEGER,
    content TEXT,
    FOREIGN KEY (section_id) REFERENCES sections (section_id)
    FOREIGN KEY (document_id) REFERENCES documents (section_id)
);

CREATE INDEX idx_section_id ON text_chunks (section_id);
CREATE INDEX idx_document_id ON text_chunks (document_id);
CREATE INDEX idx_section_document_id ON sections (document_id);

"""

cursor.executescript(create_table_sql)
connection.commit()
connection.close()


In [39]:
import sqlite3

def list_tables(database_path):
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()

    connection.close()

    return [table[0] for table in tables]

database_path = "wikipedia_chunks_256.db"
tables = list_tables(database_path)

print("Tables in the database:")
for table in tables:
    print(table)


Tables in the database:
documents
sqlite_sequence
sections
text_chunks


In [3]:
import sqlite3

def print_schemas(database_path):
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    # Query for all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()

    for table in tables:
        table_name = table[0]
        print(f"Schema for table {table_name}:")

        # Query for the schema of the current table
        cursor.execute(f"PRAGMA table_info({table_name});")
        schema = cursor.fetchall()

        for column in schema:
            column_id, column_name, column_type, not_null, default_value, primary_key = column
            constraints = []
            if not_null:
                constraints.append("NOT NULL")
            if primary_key:
                constraints.append("PRIMARY KEY")
            constraints_str = " ".join(constraints)
            print(f"  {column_name} {column_type} {constraints_str}")

    connection.close()

database_path = "wikipedia_chunks_512.db"
print_schemas(database_path)


Schema for table documents:
  document_id INTEGER PRIMARY KEY
  document_title TEXT 
Schema for table sqlite_sequence:
  name  
  seq  
Schema for table sections:
  section_id INTEGER PRIMARY KEY
  document_id INTEGER 
  section_title TEXT 
Schema for table text_chunks:
  chunk_id INTEGER PRIMARY KEY
  section_id INTEGER 
  document_id INTEGER 
  content TEXT 


In [5]:

import sqlite3

def get_count(database_path):
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    # Query for all tables
    cursor.execute("SELECT COUNT(*) FROM documents;")
    count = cursor.fetchone()[0]
    cursor.close()

    connection.close()
    print(count)

database_path = "wikipedia_chunks_512.db"
get_count(database_path)


6082528


In [41]:
import sqlite3

def print_schemas_and_indices(database_path):
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    # Query for all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()

    for table in tables:
        print("----------------------------")
        table_name = table[0]
        print(f"Schema for table {table_name}:")

        # Query for the schema of the current table
        cursor.execute(f"PRAGMA table_info({table_name});")
        schema = cursor.fetchall()

        for column in schema:
            column_id, column_name, column_type, not_null, default_value, primary_key = column
            constraints = []
            if not_null:
                constraints.append("NOT NULL")
            if primary_key:
                constraints.append("PRIMARY KEY")
            constraints_str = " ".join(constraints)
            print(f"  {column_name} {column_type} {constraints_str}")

        # Query for the indices of the current table
        cursor.execute(f"PRAGMA index_list({table_name});")
        indices = cursor.fetchall()

        print(f"Indices for table {table_name}:")
        for index in indices:
            index_name, unique, origin, partial = index[1:5]
            print(f"  Index Name: {index_name}, Unique: {unique}, Origin: {origin}")

            # Query for the columns in the current index
            cursor.execute(f"PRAGMA index_info({index_name});")
            index_columns = cursor.fetchall()
            column_names = [col[2] for col in index_columns]
            print(f"    Columns: {', '.join(column_names)}")

    connection.close()

database_path = "wikipedia_chunks_256.db"
print_schemas_and_indices(database_path)


----------------------------
Schema for table documents:
  document_id INTEGER PRIMARY KEY
  document_title TEXT 
Indices for table documents:
----------------------------
Schema for table sqlite_sequence:
  name  
  seq  
Indices for table sqlite_sequence:
----------------------------
Schema for table sections:
  section_id INTEGER PRIMARY KEY
  document_id INTEGER 
  section_title TEXT 
Indices for table sections:
  Index Name: idx_section_document_id, Unique: 0, Origin: c
    Columns: document_id
----------------------------
Schema for table text_chunks:
  chunk_id INTEGER PRIMARY KEY
  section_id INTEGER 
  document_id INTEGER 
  content TEXT 
Indices for table text_chunks:
  Index Name: idx_document_id, Unique: 0, Origin: c
    Columns: document_id
  Index Name: idx_section_id, Unique: 0, Origin: c
    Columns: section_id


In [42]:
import sqlite3

connection = sqlite3.connect("wikipedia_chunks_256.db")
cursor = connection.cursor()

try:
    # Insert a new document and retrieve its ID
    cursor.execute("INSERT INTO documents (document_title) VALUES (?)", ('Document Title',))
    document_id = cursor.lastrowid
    
    # Insert a new section using the document ID and retrieve its ID
    cursor.execute("INSERT INTO sections (document_id, section_title) VALUES (?, ?)", (document_id, 'Section Title'))
    section_id = cursor.lastrowid
    
    # Insert a new text chunk using the section ID and document ID
    cursor.execute("INSERT INTO text_chunks (section_id, document_id, content) VALUES (?, ?, ?)", (section_id, document_id, 'Content here'))
    connection.commit()

except sqlite3.Error as e:
    print("Error:", e)
    connection.rollback()

finally:
    connection.close()

In [11]:
import sqlite3

def query_documents(database_path):
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    query = """
    SELECT 
        d.document_id, d.document_title,
        s.section_id, s.section_title,
        tc.chunk_id, tc.content
    FROM documents d
    LEFT JOIN sections s ON s.document_id = d.document_id
    LEFT JOIN text_chunks tc ON tc.document_id = d.document_id AND tc.section_id = s.section_id
    ORDER BY d.document_id, s.section_id, tc.chunk_id;
    """

    cursor.execute(query)

    # Organize the results by document, section, and chunk
    documents = {}
    for row in cursor: # Iterate through cursor directly
        document_id, document_title, section_id, section_title, chunk_id, content = row
        
        # Add document if not already present
        if document_id not in documents:
            documents[document_id] = {
                'title': document_title,
                'sections': {}
            }

        # Add section if not already present
        if section_id and section_id not in documents[document_id]['sections']:
            documents[document_id]['sections'][section_id] = {
                'title': section_title,
                'chunks': {}
            }

        # Add chunk
        if chunk_id:
            documents[document_id]['sections'][section_id]['chunks'][chunk_id] = content

    connection.close()
    return documents

database_path = "wikipedia_chunks_64.db"
documents = query_documents(database_path)

# Print the results
for doc_id, doc_data in documents.items():
    print(f"Document {doc_id}: {doc_data['title']}")
    for sec_id, sec_data in doc_data['sections'].items():
        print(f"  Section {sec_id}: {sec_data['title']}")
        for chunk_id, content in sec_data['chunks'].items():
            print(f"    Chunk {chunk_id}: {content[:700]}...")  # Printing first 50 characters of each chunk
    break


KeyboardInterrupt: 

In [54]:
import sqlite3
import numpy as np

# Create SQLite database and table
conn = sqlite3.connect('embedding_vectors_256.db')
cursor = conn.cursor()

cursor.execute('''CREATE TABLE IF NOT EXISTS embeddings
                  (id INTEGER PRIMARY KEY,
                   vector BLOB)''')
conn.commit()


In [1]:
import sqlite3
import numpy as np

# Create SQLite database and table
conn = sqlite3.connect('embedding_vectors_48_head.db')
cursor = conn.cursor()

cursor.execute('''CREATE TABLE IF NOT EXISTS embeddings
                  (id INTEGER PRIMARY KEY,
                   vector BLOB)''')
conn.commit()


In [89]:
import sqlite3
import numpy as np

# Create SQLite database and table
conn = sqlite3.connect('embedding_vectors_256_head.db')
cursor = conn.cursor()

cursor.execute('''CREATE TABLE IF NOT EXISTS embeddings
                  (id INTEGER PRIMARY KEY,
                   vector BLOB)''')
conn.commit()


In [23]:
import sqlite3
import numpy as np

# Create SQLite database and table
conn = sqlite3.connect('embedding_vectors_512_head_bge_small_en.db')
cursor = conn.cursor()

cursor.execute('''CREATE TABLE IF NOT EXISTS embeddings
                  (id INTEGER PRIMARY KEY,
                   vector BLOB)''')
conn.commit()


In [2]:
import sqlite3

def count_total_embeddings(database_path="embedding_vectors_512_head_bge_small_en.db"):
    # Establish the database connection
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    # Prepare and execute the SQL query
    query = "SELECT COUNT(*) FROM embeddings;"
    cursor.execute(query)

    # Fetch and process the result
    row = cursor.fetchone()
    if row:
        total_embeddings = row[0]
        print(f"Total number of embeddings: {total_embeddings}")
    else:
        print("Could not retrieve the total number of chunks.")

    # Close the database connection
    connection.close()

# Usage example
database_path = "embedding_vectors_512_small_bge_base_en.db"
count_total_embeddings(database_path)


Total number of embeddings: 0


In [69]:
import sqlite3
import numpy as np

def read_embeddings_from_db(db_path='embedding_vectors_256_head.db'):
    # Connect to SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Initialize a dictionary to hold retrieved embeddings
    retrieved_embeddings = {}

    try:
        # Query to fetch all stored embeddings
        cursor.execute("SELECT id, vector FROM embeddings")
        rows = cursor.fetchall()

        # Convert blobs back to NumPy arrays
        for row in rows:
            embedding_id, embedding_blob = row
            embedding_vector = np.frombuffer(embedding_blob, dtype=np.float32)
            retrieved_embeddings[embedding_id] = embedding_vector

    except Exception as e:
        print(f"An error occurred while reading from the database: {e}")

    finally:
        # Close the connection
        conn.close()

    return retrieved_embeddings

# Read embeddings from the SQLite database
retrieved_embeddings = read_embeddings_from_db()

# Print or use the retrieved embeddings
dict_embeddings = {}
for id, vector in retrieved_embeddings.items():
    dict_embeddings[id] = vector


In [70]:
dict_embeddings[1]

array([-5.64432405e-02, -1.29048899e-01, -5.17341375e-01,  1.63462497e-02,
        5.16250730e-02,  2.69616842e-01,  2.30784461e-01,  2.95385391e-01,
       -1.05657652e-01,  3.38423029e-02,  9.50249434e-02, -1.25602156e-01,
       -4.20076668e-01,  3.55496973e-01,  2.82935172e-01, -3.50999206e-01,
        6.63529262e-02,  1.77731812e-01, -1.84400514e-01,  5.61893821e-01,
        9.75212902e-02, -2.53144145e-01, -1.96457982e-01, -2.95183986e-01,
        1.85404211e-01,  2.26720959e-01,  1.81408852e-01,  3.49225253e-02,
       -1.23024739e-01, -1.22507393e+00,  1.25242338e-01, -1.01833425e-01,
        5.61418273e-02,  3.43409833e-03, -6.11025132e-02,  3.18520010e-01,
        2.49926075e-01, -3.75524193e-01, -2.68672347e-01,  1.84110478e-01,
        5.76988533e-02,  2.80549228e-01,  3.03156674e-01, -6.65382072e-02,
        8.92705470e-03, -2.17787713e-01, -1.84129849e-01, -4.00107861e-01,
       -5.25339842e-01, -4.33660299e-01,  5.24237275e-01,  2.91567385e-01,
       -1.89090788e-01,  

In [71]:
from sentence_transformers.util import cos_sim
from sentence_transformers import SentenceTransformer

#embedding_model_path = "/mystuff/llm/gte-base"
#embedding_model_path = "/mystuff/llm/all-MiniLM-L12-v2"
embedding_model_path = "/mystuff/llm/bge-small-en"

embedding_model = SentenceTransformer(embedding_model_path, device='cuda')

#model = SentenceTransformer(embedding_model)
chunk_ids = []
scores = []
for chunk_id in dict_embeddings.keys():
    sentences = ['What is the definition of anarchism?']
    embeddings = embedding_model.encode(sentences)
    score = float(cos_sim(embeddings[0], dict_embeddings[chunk_id])[0][0])
    scores.append(score)
    chunk_ids.append(chunk_id)

In [72]:
%%time
sentences = ['What is the definition of anarchism?']
embeddings = embedding_model.encode(sentences)


CPU times: user 6.92 ms, sys: 0 ns, total: 6.92 ms
Wall time: 6.26 ms


In [73]:
import pandas as pd

df = pd.DataFrame()
df["chunk"] = chunk_ids
df["score"] = scores

In [74]:
df.sort_values(by=["score"], ascending=False)

Unnamed: 0,chunk,score
0,1,0.905538
224,225,0.839451
245,246,0.809687
2550,2551,0.807616
1236,1237,0.807363
...,...,...
1119,1120,0.624648
2157,2158,0.624088
2777,2778,0.621896
2608,2609,0.614418


In [75]:
import sqlite3

def fetch_chunk_by_id(database_path, chunk_id):
    # Establish the database connection
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    # Prepare and execute the SQL query
    query = """
    SELECT 
        tc.chunk_id, tc.content,
        s.section_id, s.section_title,
        d.document_id, d.document_title
    FROM text_chunks tc
    JOIN sections s ON tc.section_id = s.section_id
    JOIN documents d ON tc.document_id = d.document_id
    WHERE tc.chunk_id = ?;
    """
    cursor.execute(query, (chunk_id,))

    # Fetch and process the result
    row = cursor.fetchone()
    if row:
        chunk_id, content, section_id, section_title, document_id, document_title = row
        print(f"Chunk ID: {chunk_id}")
        print(f"Content: {content}")
        print(f"Belongs to Section ID: {section_id}, Title: {section_title}")
        print(f"Belongs to Document ID: {document_id}, Title: {document_title}")
    else:
        print("No chunk found with the given ID.")

    # Close the database connection
    connection.close()

# Usage example
database_path = "wikipedia_chunks_256.db"
chunk_id_to_query = 1  # Replace with the chunk_id you're interested in
fetch_chunk_by_id(database_path, chunk_id_to_query)


Chunk ID: 1
Content: '''Anarchism''' is a political philosophy and movement that is skeptical of all justifications for authority and seeks to abolish the institutions it claims maintain unnecessary coercion and hierarchy, typically including nation states, and capitalism. Anarchism advocates for the replacement of the state with stateless societies and voluntary free associations. As a historically left-wing movement, this reading of anarchism is placed on the farthest left of the political spectrum, usually described as the libertarian wing of the socialist movement (libertarian socialism).
Belongs to Section ID: 1, Title: Introduction
Belongs to Document ID: 1, Title: Anarchism


In [76]:
chunk_id_to_query = 225  # Replace with the chunk_id you're interested in
fetch_chunk_by_id(database_path, chunk_id_to_query)


Chunk ID: 225
Content: Alabama's 67 county sheriffs are elected in partisan, at-large races, and Republicans retain the vast majority of those posts. The current split is 18 Democrats, and 49 Republicans as of 2023. However, most of the Democratic sheriffs preside over urban and more populated counties. The majority of Republican sheriffs have been elected in the more rural counties with lower population. The state of Alabama has  and 11 African-American sheriffs.

==== Federal elections ====
The state's two U.S. senators are Katie Britt and Tommy Tuberville, both of whom are Republican. In the U.S. House of Representatives, the state is represented by seven members, six of whom are Republicans: (Jerry Carl, Mike Rogers, Robert Aderholt, Dale Strong, Barry Moore, and Gary Palmer) and one Democrat: Terri Sewell, who represents the Black Belt as well as most of the predominantly black portions of Birmingham, Tuscaloosa and Montgomery.
Belongs to Section ID: 29, Title: Law and government


In [20]:
import sqlite3

def fetch_document_and_chunks_by_title(database_path, document_title):
    # Establish the database connection
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    # Prepare and execute the SQL query
    query = """
    SELECT 
        d.document_id, d.document_title,
        s.section_id, s.section_title,
        tc.chunk_id, tc.content
    FROM documents d
    LEFT JOIN sections s ON s.document_id = d.document_id
    LEFT JOIN text_chunks tc ON tc.document_id = d.document_id AND tc.section_id = s.section_id
    WHERE d.document_title = ?
    ORDER BY d.document_id, s.section_id, tc.chunk_id;
    """
    cursor.execute(query, (document_title,))

    # Initialize an empty dictionary to hold the document and its chunks
    document = {
        'title': document_title,
        'sections': {}
    }

    # Fetch and process the result
    for row in cursor:
        document_id, _, section_id, section_title, chunk_id, content = row
        document["id"] = document_id

        # Add section if not already present
        if section_id and section_id not in document['sections']:
            document['sections'][section_id] = {
                'title': section_title,
                'chunks': {}
            }

        # Add chunk
        if chunk_id:
            document['sections'][section_id]['chunks'][chunk_id] = content

    # Close the database connection
    connection.close()

    return document

# Usage example
database_path = "wikipedia_chunks_256.db"

document_title_to_query = "Modified Newtonian dynamics"  # Replace with the document title you're interested in
#document_title_to_query = "April 6"  # Replace with the document title you're interested in
document = fetch_document_and_chunks_by_title(database_path, document_title_to_query)

# Print the document and its chunks
print(f"Document ID: {document['id']}")
print(f"Document Title: {document['title']}")
for sec_id, sec_data in document['sections'].items():
    print(f"  Section ID: {sec_id}, Title: {sec_data['title']}")
    for chunk_id, content in sec_data['chunks'].items():
        print(f"    Chunk ID: {chunk_id}, Content: {content[:50]}...")  # Printing first 50 characters of each chunk


Document ID: 2407533
Document Title: Modified Newtonian dynamics
  Section ID: 9022060, Title: Introduction
    Chunk ID: 21568871, Content: '''Modified Newtonian dynamics''' ('''MOND''') is ...
    Chunk ID: 21568872, Content: Created in 1982 and first published in 1983 by Isr...
    Chunk ID: 21568873, Content: MOND is an example of a class of theories known as...
    Chunk ID: 21568874, Content: The accurate measurement of the speed of gravitati...
  Section ID: 9022061, Title: Overview
    Chunk ID: 21568875, Content: M33

Several independent observations point to the...
    Chunk ID: 21568876, Content: (1) 

 There exists in galaxies large quantities o...
    Chunk ID: 21568877, Content: Newton's Laws do not apply to galaxies.


Option (...
    Chunk ID: 21568878, Content: Here  is the Newtonian force,  is the object's (gr...
    Chunk ID: 21568879, Content: By fitting his law to rotation curve data, Milgrom...
    Chunk ID: 21568880, Content: Whenever a small mass, ''m'' is near 

In [38]:
import sqlite3

def fetch_document_and_chunks_by_id(database_path, document_id):
    # Establish the database connection
    connection = sqlite3.connect(database_path)
    cursor = connection.cursor()

    # Prepare and execute the SQL query
    query = """
    SELECT 
        d.document_id, d.document_title,
        s.section_id, s.section_title,
        tc.chunk_id, tc.content
    FROM documents d
    LEFT JOIN sections s ON s.document_id = d.document_id
    LEFT JOIN text_chunks tc ON tc.document_id = d.document_id AND tc.section_id = s.section_id
    WHERE d.document_id = ?
    ORDER BY d.document_id, s.section_id, tc.chunk_id;
    """
    cursor.execute(query, (document_id,))

    # Initialize an empty dictionary to hold the document and its chunks
    document = {
        'id': document_id,
        'sections': {}
    }

    # Fetch and process the result
    for row in cursor:
        _, document_title, section_id, section_title, chunk_id, content = row

        # Set the document title
        document['title'] = document_title

        # Add section if not already present
        if section_id and section_id not in document['sections']:
            document['sections'][section_id] = {
                'title': section_title,
                'chunks': {}
            }

        # Add chunk
        if chunk_id:
            document['sections'][section_id]['chunks'][chunk_id] = content

    # Close the database connection
    connection.close()

    return document

# Usage example [1054330,       0, 1880579]
database_path = "wikipedia_chunks_256.db"
#always add 1 to index for doc id. 1880579 -> 1880580
document_id_to_query = 2102744 # Replace with the document ID you're interested in
#document_id_to_query = 643378
document = fetch_document_and_chunks_by_id(database_path, document_id_to_query)

# Print the document and its chunks
print(f"Document ID: {document['id']}, Title: {document['title']}")
for sec_id, sec_data in document['sections'].items():
    print(f"  Section ID: {sec_id}, Title: {sec_data['title']}")
    for chunk_id, content in sec_data['chunks'].items():
        if chunk_id == 248:
            print(f"    Chunk ID: {chunk_id}, Content: {len(content)} {content}...")  # Printing first 50 characters of each chunk
        else:
            print(f"    Chunk ID: {chunk_id}, Content: {len(content)} {content[:50]}...")  # Printing first 50 characters of each chunk
        


Document ID: 2102744, Title: Glossary of anarchism
  Section ID: 8131245, Title: Introduction
    Chunk ID: 19778104, Content: 800 The following is a list of terms specific to anarc...
    Chunk ID: 19778105, Content: 801 :A small non-hierarchical collective of individual...
    Chunk ID: 19778106, Content: 788 :Social disorder and civil war in an absence of go...
    Chunk ID: 19778107, Content: 1033 ==B==


:The "two red years" of political agitatio...
    Chunk ID: 19778108, Content: 820 :A participatory decision making process for colle...
    Chunk ID: 19778109, Content: 986 ==D==



:A private (or possibly cooperative) orga...
    Chunk ID: 19778110, Content: 981 ==F==


:The practice of retrieving edible food th...
    Chunk ID: 19778111, Content: 915 ==G==


:Second-hand stores where all goods are fr...
    Chunk ID: 19778112, Content: 933 :A philosophy which demands the embracing of immed...
    Chunk ID: 19778113, Content: 922 : Dutch anarchists influenced by Peter Kropotkin 

In [5]:
document

{'id': 643378,
 'sections': {3099532: {'title': 'Introduction',
   'chunks': {9051911: "'''Cosmic background''' may refer to:\n\n* Cosmic microwave background radiation (CMB)\n* Cosmic neutrino background (CνB)\n* Cosmic gravitational wave background (GWB)\n* Cosmic infrared background (CIB)\n* Cosmic background radiation\n* Cosmic X-ray background\n* Cosmic visible light background"}}},
 'title': 'Cosmic background'}