In [30]:
import couchdb
import dotenv
import os
import re

In [31]:
dotenv.load_dotenv()

True

In [32]:
COUCHDB_HOST = os.environ['COUCHDB_HOST']
COUCHDB_PORT = os.environ['COUCHDB_PORT']
COUCHDB_USERNAME = os.environ['COUCHDB_USERNAME']
COUCHDB_PASSWORD = os.environ['COUCHDB_PASSWORD']

In [57]:
class CouchDB:
    def __init__(self, dbname, host=COUCHDB_HOST, port=COUCHDB_PORT,
                 username=COUCHDB_USERNAME, password=COUCHDB_PASSWORD):
        self.host = host
        self.port = port
        self.username = username
        self.password = password

        self.instance_url = f"http://{self.username}:{self.password}@{self.host}:{self.port}"
        self.server = couchdb.Server(self.instance_url)
        self.db = self.get_or_create_database(dbname)

    def __repr__(self):
        return f"{self.server} - {self.db}"

    def get_or_create_database(self, dbname):
        try:
            db = self.server.create(dbname)
            print(f"Database '{dbname}' created successfully.")
            return db
        except couchdb.http.PreconditionFailed:
            print(f"Database '{dbname}' already exists.")
            return self.server[dbname]

    def upload_document(self, data, verbose=False):
        doc_id, doc_rev = self.db.save(data)
        if verbose:
            print(f"Document uploaded with ID: {doc_id}", end='\r')
        return doc_id

    def get_document(self, doc_id):
        try:
            doc = self.db[doc_id]
            return doc
        except couchdb.http.ResourceNotFound:
            print(f"Document with ID '{doc_id}' not found.")
            return None

    def delete_document(self, doc_id):
        try:
            doc = self.db[doc_id]
            self.db.delete(doc)
            print(f"Document with ID '{doc_id}' deleted successfully.")
        except couchdb.http.ResourceNotFound:
            print(f"Document with ID '{doc_id}' not found.")

    def update_document(self, doc_id, updated_data):
        doc = self.get_document(doc_id)
        if doc:
            doc.update(updated_data)
            self.db.save(doc)
            print(f"Document with ID '{doc_id}' updated successfully.")
        else:
            print(f"Document with ID '{doc_id}' not found.")

    def list_documents(self, limit=1):
        try:
            result = self.db.view("_all_docs", descending=True, limit=limit)
            latest_doc_id = result.rows[0].id
            latest_doc = self.get_document(latest_doc_id)
            return [latest_doc] if latest_doc else []
        except couchdb.http.ResourceNotFound:
            print("No documents found.")
            return []
        
    def get_document_by_id(self, doc_id):
        doc = self.get_document(doc_id)
        if doc:
            return dict(doc)
        else:
            return None

    def extract_and_get_data(self, response):
        match = re.search(r"\(doc._id == \"([^']+)\"\)", response)
        if match:
            doc_id = match.group(1)
            return self.get_document_by_id(doc_id)
        else:
            return None
        
    def get_last_document(self):
        response = str(self.list_documents()[0])
        return self.extract_and_get_data(response)

In [58]:
db = CouchDB('twitter')

Database 'twitter' already exists.


In [59]:
db.get_last_document()

{'_id': '0:1491562556611121153',
 '_rev': '2-20ad33e9793bcf9359c611753fb9b884',
 'tid': '1491562556611121153',
 'author': '1443931204990877696',
 'date': '2022-02-10T00:00:00.000Z',
 'lang': 'en',
 'content': "I think if fandom is the only thing in your life , it 's definitely not healthy . As long a you have other thing you care about a much or more , it 's a good escape and can fuel creativity . \\n\\nCompetitiveness is ridiculous in a fandom - it 's based on admiring other people 's work !",
 'location': None,
 'sal': None,
 'score': 9,
 'tags': ''}

## Sample usage, upload sal.json to database

In [5]:
import pandas as pd
from tqdm.notebook import tqdm

In [12]:
df = pd.read_json('../data/sal.json').T.reset_index()
df.drop('ste', axis=1, inplace=True)
df.columns = ['location', 'gcc', 'sal']
df

Unnamed: 0,location,gcc,sal
0,abbotsbury,1gsyd,10002
1,abbotsford (nsw),1gsyd,10003
2,acacia gardens,1gsyd,10014
3,agnes banks,1gsyd,10021
4,airds,1gsyd,10022
...,...,...,...
15335,christmas island,9oter,90001
15336,home island,9oter,90002
15337,jervis bay,9oter,90003
15338,norfolk island,9oter,90004


In [14]:
db = CouchDB('sal')

Database 'sal' created successfully.


In [15]:
for _, data in tqdm(df.iterrows(), total=len(df), desc="Updating Documents"):
    db.upload_document(data.to_dict())

Updating Documents:   0%|          | 0/15340 [00:00<?, ?it/s]