In [1]:
import couchdb
import dotenv
import os

In [2]:
dotenv.load_dotenv()

True

In [3]:
COUCHDB_HOST = os.environ['COUCHDB_HOST']
COUCHDB_PORT = os.environ['COUCHDB_PORT']
COUCHDB_USERNAME = os.environ['COUCHDB_USERNAME']
COUCHDB_PASSWORD = os.environ['COUCHDB_PASSWORD']

In [25]:
class CouchDB:
    def __init__(self, dbname, host=COUCHDB_HOST, port=COUCHDB_PORT,
                 username=COUCHDB_USERNAME, password=COUCHDB_PASSWORD):
        self.host = host
        self.port = port
        self.username = username
        self.password = password

        self.instance_url = f"http://{self.username}:{self.password}@{self.host}:{self.port}"
        self.server = couchdb.Server(self.instance_url)
        self.db = self.get_or_create_database(dbname)

    def __repr__(self):
        return f"{self.server} - {self.db}"

    def get_or_create_database(self, dbname):
        try:
            db = self.server.create(dbname)
            print(f"Database '{dbname}' created successfully.")
            return db
        except couchdb.http.PreconditionFailed:
            print(f"Database '{dbname}' already exists.")
            return self.server[dbname]

    def upload_document(self, data, verbose=False):
        doc_id, doc_rev = self.db.save(data)
        if verbose:
            print(f"Document uploaded with ID: {doc_id}", end='\r')
        return doc_id

    def get_document(self, doc_id):
        try:
            doc = self.db[doc_id]
            return doc
        except couchdb.http.ResourceNotFound:
            print(f"Document with ID '{doc_id}' not found.")
            return None

    def delete_document(self, doc_id):
        try:
            doc = self.db[doc_id]
            self.db.delete(doc)
            print(f"Document with ID '{doc_id}' deleted successfully.")
        except couchdb.http.ResourceNotFound:
            print(f"Document with ID '{doc_id}' not found.")

    def update_document(self, doc_id, updated_data):
        doc = self.get_document(doc_id)
        if doc:
            doc.update(updated_data)
            self.db.save(doc)
            print(f"Document with ID '{doc_id}' updated successfully.")
        else:
            print(f"Document with ID '{doc_id}' not found.")

    def list_documents(self):
        return [doc for doc in self.db.view("_all_docs")]

In [26]:
db = CouchDB('twitter')

Database 'twitter' already exists.


## Sample usage, upload sal.json to database

In [36]:
import pandas as pd
from tqdm.notebook import tqdm

In [40]:
df = pd.read_pickle('../data/sal.pickle')
df

Unnamed: 0,location,gcc,sal,real_loc
0,abbotsbury,1gsyd,10002,abbotsbury
1,abbotsford nsw,1gsyd,10003,abbotsford nsw
2,acacia gardens,1gsyd,10014,acacia gardens
3,agnes banks,1gsyd,10021,agnes banks
4,airds,1gsyd,10022,airds
...,...,...,...,...
17647,creek act,8acte,80013,rendezvous creek act
17648,weston creek,8acte,80016,weston creek act
17649,creek act,8acte,80016,weston creek act
17650,red hill,8acte,80112,red hill act


In [41]:
db = CouchDB('sal')

Database 'sal' created successfully.


In [None]:
for _, data in tqdm(df.iterrows(), total=len(df), desc="Updating Documents"):
    db.upload_document(data.to_dict())

Updating Documents:   0%|          | 0/17652 [00:00<?, ?it/s]