# Export data from Oracle to MongoDB
It could be a good script as a starting point to export data from Oracle to MongoDB. It can be modified easily to add some row transformations (like combining columns) or other things.

Tested with:
* pymongo version: 3.4.0
* cx_Oracle version: 6.0.2

URLs interessants:
* [PyMongo](http://api.mongodb.com/python/current/tutorial.html): Fast tutorial how pyMongo works.
* [cx_Oracle](http://cx-oracle.readthedocs.io/en/latest/index.html): cx_Oracle is a module that enables access to Oracle Database and conforms to the Python database API specification.
* [cx_Oracle querying best practices](http://www.oracle.com/technetwork/articles/dsl/prez-python-queries-101587.html): Oracle's best practices how to use cx_Oracle.

### Linux environment variables
Before running the script, the following environtment variables must be set (to the correct folder, here it's just an example). If it's executed in Jupyter Notebook, run the exports before the `jupyter notebook` command:

In [None]:
"""
export PATH=$PATH:$HOME/bin:/usr/lib/oracle/12.1/client64/bin
export LD_LIBRARY_PATH=/usr/lib/oracle/12.1/client64/lib
export ORACLE_HOME=/usr/lib/oracle/12.1/client64
export TNS_ADMIN=$ORACLE_HOME/network/admin
"""

---

### Import libraries

In [None]:
import pymongo
from pymongo import MongoClient
from datetime import datetime, date
from random import randint
import cx_Oracle
import re
from datetime import datetime

print('pymongo version: {v}'.format(v=pymongo.version))
print('cx_Oracle version: {v}'.format(v=cx_Oracle.version))

---
### Create exportation's functions

In [None]:
def create_collection_if_not_exists(mongo_database: pymongo.database.Database, collection_name: str):
    """
        Create a mongodb collection if it doesn't exist
    """
    if collection_name in mongo_database.collection_names():
        print('Collection "{collection}" already created'.format(collection=collection_name))
    else:
        mongo_database.create_collection(collection_name)
        print('Created collection "{c}"'.format(c=collection_name))


def export_data_from_oracle_to_mongodb(oracle_server: str, 
                                       oracle_port: int, 
                                       oracle_sid: str, 
                                       oracle_user: str, 
                                       oracle_password: str,
                                       mongodb_connection_string: str, 
                                       mongodb_database: str, 
                                       mongodb_collection: str,
                                       sql_query: str,
                                       create_mongodb_collection_if_not_exist: bool=False,
                                       num_rows_fetch: int=1000):
    """
    Exports the data of a query from oracle to a mongodb collection    
    Parameters:
    -----------
        - mongodb_connection_string: the connection string, without the database name
            more info in: https://docs.mongodb.com/manual/reference/connection-string/
        - sql_query: the query must not have a semi-colon ";" at the end of the sentence!
        - num_rows_fetch: 1000 is a good number to start with. If it's too small, it will 
            take longer because mongodb will need to do a lot of commits
    
    """
    # check all parameters (except create_mongodb_collection_if_not_exist and num_rows_fetch) are not None
    if (oracle_server is None):
        raise Exception('"oracle_server" parameter must be set')
    if (oracle_port is None):
        raise Exception('"oracle_port" parameter must be set')
    if (oracle_sid is None):
        raise Exception('"oracle_sid" parameter must be set')
    if (oracle_user is None):
        raise Exception('"oracle_user" parameter must be set')
    if (oracle_password is None):
        raise Exception('"oracle_password" parameter must be set')
    if (mongodb_connection_string is None):
        raise Exception('"mongodb_connection_string" parameter must be set')
    if (mongodb_database is None):
        raise Exception('"mongodb_database" parameter must be set')
    if (mongodb_collection is None):
        raise Exception('"mongodb_collection" parameter must be set')
    if (sql_query is None):
        raise Exception('"sql_query" parameter must be set')
        
    error_exception = None
    
    ## create connections
    
    # create oracle connection
    try:        
        dsn_tns = cx_Oracle.makedsn(oracle_server, oracle_port, oracle_sid)
        oracle_conn = cx_Oracle.connect(oracle_user, oracle_password, dsn_tns)
    except:
        raise Exception("could not create a connection to Oracle database")

    # create mongodb connection
    try:        
        mongodb_client = MongoClient(mongodb_connection_string)
        mongodb_db = mongodb_client.get_database(mongodb_database)
        if create_mongodb_collection_if_not_exist:
            create_collection_if_not_exists(mongodb_db, mongodb_collection)
        collection = mongodb_db[mongodb_collection]        
    except:
        # if an error ocurred while creating the connection to mongodb, oracle connection would be already created
        # we need to destroy it
        if oracle_conn is not None:
            oracle_conn.close()
        if mongodb_client is not None:
            mongodb_client.close()
        raise Exception("could not create a connection to MongoDB server / database")

    ## exporting data
    try:        
        cursor = oracle_conn.cursor()
        print ('executing the query in Oracle server...')
        cursor.execute(sql_query)

        # column names in lowercase because it's case sensitive
        ora_column_names = [col[0].lower() for col in cursor.description]
        
        # export rows fetching 'num_rows_fetch' every time
        print ('start exporting data...')
        rows = cursor.fetchmany(num_rows_fetch)
        while len(rows) > 0:
            # convert rows to a list of dicts
            mongo_rows = [dict(zip(ora_column_names, row)) for row in rows]
            
            ## if some row transformation is needed (like combining columns) it can be done here
            
            # insert into mongodb
            collection.insert_many(mongo_rows)
            # fetch next rows
            rows = cursor.fetchmany(num_rows_fetch)
            
        print('successfully exported the data from Oracle to MongoDB')

    except cx_Oracle.Error as error:
        error_exception = error

    finally:
        cursor.close()
        oracle_conn.close()
        mongodb_client.close()

    if error_exception:
        raise(error_exception)


---
### Execute the exportation

In [None]:
# Configuration parameters
ip = 'oracle_ip'
port = PORT_NUMBER
SID = 'ORACLE_SID'
ora_user = 'username'
ora_password = 'password'
my_mongodb = 'mongodb://localhost:27017/'
my_mongodb_db = 'mydb'
my_mongodb_col = 'my_collection'

consulta_sql = u"""SELECT * FROM DUAL"""

export_data_from_oracle_to_mongodb(oracle_server=ip, 
                                   oracle_port=port, 
                                   oracle_sid=SID, 
                                   oracle_user=ora_user, 
                                   oracle_password=ora_password,
                                   mongodb_connection_string=my_mongodb, 
                                   mongodb_database=my_mongodb_db, 
                                   mongodb_collection=my_mongodb_col,
                                   create_mongodb_collection_if_not_exist=True,
                                   sql_query=consulta_sql)    

### Checking the data in MongoDB database

In [None]:
mongodb_client = MongoClient(my_mongodb)
mongodb_db = mongodb_client.get_database(my_mongodb_db)

print ('In collection {col} there are {count} documents'.
       format(count=mongodb_db[my_mongodb_col].count(), col=my_mongodb_col))

mongodb_client.close()