![Banner](images/banner.png)

# DML - INSERT, UPDATE, DELETE, and MERGE Statements

In [None]:
import oracledb

In [None]:
un = "pythondemo"
pw = "welcome"
cs = "localhost/orclpdb1"

connection = oracledb.connect(user=un, password=pw, dsn=cs)

In [None]:
cursor = connection.cursor()
cursor.execute("drop table mytab")

In [None]:
cursor.execute("create table mytab (id number, data varchar2(1000))")

# Binding for Insertion

Documentation reference link: [Using Bind Variables](https://python-oracledb.readthedocs.io/en/latest/user_guide/bind.html)

Binding is very, very important. It:
- eliminates escaping special characters and helps prevent SQL injection attacks
- is important for performance and scalability

In [None]:
with connection.cursor() as cursor:
    cursor.execute("truncate table mytab")

    sql = "insert into mytab (id, data) values (:idVal, :dataVal)"

    # bind by position using a sequence (list or tuple)
    cursor.execute(sql, [1, "String 1"])
    cursor.execute(sql, (2, "String 2"))

    # bind by name using a dictionary
    cursor = connection.cursor()
    cursor.execute(sql, {"idVal": 3, "dataVal": "String 3"})

    # bind by name using keyword arguments
    cursor.execute(sql, idVal=4, dataVal="String 4")

    print("Done")

# Batch execution - Inserting multiple rows with executemany()

Documentation reference link: [Executing Batch Statements and Bulk Loading](https://python-oracledb.readthedocs.io/en/latest/user_guide/batch_statement.html)

In [None]:
with connection.cursor() as cursor:
    cursor.execute("truncate table mytab")

    rows = [ (1, "First" ),
             (2, "Second" ),
             (3, "Third" ),
             (4, "Fourth" ),
             (5, "Fifth" ),
             (6, "Sixth" ),
             (7, "Seventh" ) ]

    # Using setinputsizes helps avoid memory reallocations.
    # The parameters correspond to the insert columns.  
    # The value None says use python-oracledb's default size for a NUMBER column.  
    # The second value is the maximum input data (or column) width for the VARCHAR2 column
    cursor.setinputsizes(None, 7)

    cursor.executemany("insert into mytab(id, data) values (:1, :2)", rows)

    # Now query the results back

    for row in cursor.execute('select * from mytab'):
        print(row)

    connection.rollback()

### Benchmark - executemany() vs execute()

In [None]:
import matplotlib.pyplot as plt
import time

cursor = connection.cursor()
cursor.execute("truncate table mytab")

# Row counts to test inserting
numrows = (1, 5, 10, 100, 1000)

longstring = "x" * 1000

def create_data(n):
    d = []
    for i in range(n):
        d.append((i, longstring))
    return d

ex = []  # seconds for execute() loop
em = []  # seconds for executemany()

for n in numrows:
    
    rows = create_data(n)
    
    ############################################################
    #
    # Loop over each row
    #

    start=time.time()

    for r in rows:
        cursor.execute("insert into mytab(id, data) values (:1, :2)", r)          # <==== Loop over execute()
        
    elapsed = time.time() - start
    ex.append(elapsed)
    
    r, = cursor.execute("select count(*) from mytab").fetchone()
    print("execute() loop {:6d} rows in {:06.4f} seconds".format(r, elapsed))    
    connection.rollback()
    
    ############################################################# 
    #
    # Insert all rows in one call
    #

    start = time.time()

    cursor.executemany("insert into mytab(id, data) values (:1, :2)", rows)       # <==== One executemany()
    
    elapsed = time.time() - start
    em.append(elapsed)
    
    r, = cursor.execute("select count(*) from mytab").fetchone()
    print("executemany()  {:6d} rows in {:06.4f} seconds".format(r, elapsed))  
    connection.rollback()


print("Plot is:")
plt.xticks(numrows)
plt.plot(numrows, ex, label="execute() loop", marker="o")
plt.plot(numrows, em, label="one executemany()", marker="o")
plt.xscale("log")
plt.xlabel('Number of rows')
plt.ylabel('Seconds')
plt.legend(loc="upper left")
plt.show()

### Noisy Data - Batch Errors

Dealing with bad data is easy with the `batcherrors` parameter.

In [None]:
# Initial data

with connection.cursor() as cursor:

    for row in cursor.execute("select * from ParentTable order by ParentId"):
        print(row)

    for row in cursor.execute("select * from ChildTable order by ChildId"):
        print(row)

In [None]:
dataToInsert = [
    (1016, 10, 'Child Red'),
    (1018, 20, 'Child Blue'),
    (1018, 30, 'Child Green'),  # duplicate key
    (1022, 40, 'Child Yellow'),
    (1021, 75, 'Child Orange')  # parent does not exist
]

with connection.cursor() as cursor:
    
    cursor.executemany("insert into ChildTable values (:1, :2, :3)", dataToInsert, batcherrors=True)
       
    print("\nErrors in rows that were not inserted:\n")
    for error in cursor.getbatcherrors():
        print("Error", error.message, "at row offset", error.offset)    
        
    print("\nRows that were successfully inserted:\n")
    for row in cursor.execute("select * from ChildTable order by ChildId"):
        print(row)

Now you can choose whether or not to fix failed records and reinsert them.
You can then rollback or commit.

This is true even if you had enabled autocommit mode - no commit will occur if there are batch errors.

In [None]:
connection.rollback()