# Trimming Whitespace

This is a short example profiling some of the various alternatives for trimming whitespace when uploading columns. This example was designed to study the impacts of addressing easyaccess [Issue #52](https://github.com/mgckind/easyaccess/issues/52).

I ran this from home on a "bad internet" day, so the absolute values of the reported times are hopefully not representative. However, it is the relative times that we are interested in.

In [None]:
import numpy as np
import pandas as pd
import fitsio
import time

import easyaccess as ea

In [None]:
tablename='TMP'
col = 'TAG'

print "Loading FITS data..."
d = fitsio.read('infile.fits',columns=[col])
ds = copy.deepcopy(d) # Copy for stripping

print "Converting to pandas..."
p = pd.DataFrame(d)
ps = copy.deepcopy(p) # Copy for stripping

print "Found %s rows."%len(d)
print "dtype: ",d.dtype.descr
print "unique values: ",np.unique(d[col])
print

print "Creating easyaccess connection..."
con = ea.connect()


In [None]:
print "Creating table..."
try: con.drop_table(tablename)
except: pass

query = "CREATE TABLE %s ( %s VARCHAR2(13) );"%(tablename,col)
print query
con.onecmd(query)

In [None]:
start = time.time()
print "Inserting numpy (no strip) into table..."
query = "INSERT INTO %s ( %s ) values (:%s )"%(tablename,col,col)
print query
con.cursor().executemany(query, d.tolist())
print "Runtime: %.2fs"%(time.time() - start)
 
start = time.time()
print "Inserting numpy (strip) into table..."
query = "INSERT INTO %s ( %s ) values (:%s )"%(tablename,col,col)
print query
d[col] = np.char.strip(d[col])
con.cursor().executemany(query, d.tolist())
print "Runtime: %.2fs"%(time.time() - start)
 
start = time.time()
print "Inserting pandas (no strip) into table..."
query = "INSERT INTO %s ( %s ) values (:%s )"%(tablename,col,col)
print query
con.cursor().executemany(query, p.values.tolist())
print "Runtime: %.2fs"%(time.time() - start)
 
start = time.time()
print "Inserting pandas (strip) into table..."
query = "INSERT INTO %s ( %s ) values (:%s )"%(tablename,col,col)
print query
p[col] = p[col].str.strip()
con.cursor().executemany(query, p.values.tolist())
print "Runtime: %.2fs"%(time.time() - start)

start = time.time()
print "Inserting numpy (oracle) into table..."
query = "INSERT INTO %s ( %s ) values ( TRIM(:%s) )"%(tablename,col,col)
print query
con.cursor().executemany(query, d.tolist())
print "Runtime: %.2fs"%(time.time() - start)

In [None]:
try: con.drop_table(tablename)
except: pass

query = "CREATE TABLE %s ( %s VARCHAR2(13) );"%(tablename,col)
print query
con.onecmd(query)

start = time.time()
print "Inserting numpy (oracle) into table..."
query = "INSERT INTO %s ( %s ) values ( TRIM(:%s) )"%(tablename,col,col)
print query
con.cursor().executemany(query, d.tolist())
print "Runtime: %.2fs"%(time.time() - start)

print "Counting table rows..."
query = "select count(*) from %s;"%(tablename)
print query
con.onecmd(query)

print "Selecting original entry..."
query = "select count(*) from %s where %s = '%s';"%(tablename,col,d[col][0])
print query
con.onecmd(query)

print "Selecting stripped entry..."
query = "select count(*) from %s where %s = '%s';"%(tablename,col,d[col][0].strip())
print query
con.onecmd(query)