In [1]:
import pandas as pd
import sqlite3 as sql

In [2]:
mydb = 'Abstracts_aug4.db'

In [38]:
jsonFile = "json4.json"

In [10]:
def jsonDF(jsonFile):
    f = open(jsonFile, "r+")
    return pd.read_json(f, orient='index')
    

Unnamed: 0,Abstract,Author affiliation,Authors,Conf,Title,terms,year
abstract,"For real, i mean Rice Krispie Treat Cereal and...",Some Fancy Pants Nancy Place or My cereal bowl,"mr brown, mr blue, mr mogatu",ECSA,Monty Python and the Holy Grail,"one potato, two potato, three potato, four",2009


In [3]:
def getContents():
    '''
    : param NONE
    : output : Returns a json dictionary of the table names, entry counts, and links to tables 
                of all table names in the database
    ''' 
    with sql.connect(mydb) as con:
    
        cursor = con.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        mytables = (cursor.fetchall())
        myt = [t[0] for t in mytables]
        
        return myt
            

In [4]:
t = getContents()
t

[u'sqlite_sequence',
 u'ABSTRACTSTOTAL',
 u'CONFERENCES',
 u'PUBLICATIONS',
 u'KEYS',
 u'AUTHORS',
 u'AFFILIATIONS',
 u'PAPER',
 u'PAPERKEY',
 u'AFFILIATIONPAPER',
 u'PAPERAUTHOR']

In [5]:
def sqlCMDToPD(table, 
               db):
    '''Take a sql db and return as a readable pandas DataFrame
       : param db : str. Name of db. (ie. 'Abstracts.db'
       : param sqlcmd : str. Sqlite3 cmd to execute. 
               default: "SELECT * FROM Abstracts" 
                           > select all from Abstracts table
    '''
    
    #connect to a db
    with sql.connect(db) as con:
        
        #run command
        sqlcmd = "SELECT * FROM '%s'" %table
        df = pd.read_sql_query(sqlcmd, con)
        
        # Check resulting pandas DF shape
        print df.shape
        
        return df


In [8]:
c = sqlCMDToPD("CONFERENCES", mydb)
t = sqlCMDToPD("ABSTRACTSTOTAL", mydb)
pap = sqlCMDToPD("PAPER", mydb)
pub = sqlCMDToPD("PUBLICATIONS", mydb)
k = sqlCMDToPD("KEYS", mydb)

au = sqlCMDToPD("AUTHORS", mydb)
aff = sqlCMDToPD("AFFILIATIONS", mydb)

pk = sqlCMDToPD("PAPERKEY", mydb)
ap = sqlCMDToPD("AFFILIATIONPAPER", mydb)
pa = sqlCMDToPD("PAPERAUTHOR", mydb)
pub

(4, 2)
(1141, 7)
(1102, 8)
(27, 3)
(5557, 2)
(1949, 2)
(830, 2)
(17436, 2)
(1102, 2)
(3432, 2)


Unnamed: 0,pubID,year,confName
0,1,2007,ECSA
1,2,2008,ECSA
2,3,2009,ECSA
3,4,2010,ECSA
4,5,2011,ECSA
5,6,2012,ECSA
6,7,2013,ECSA
7,8,2014,ECSA
8,9,2005,QoSA
9,10,2006,QoSA


In [92]:
def insert_toTable(db, df, table = 'ABSTRACTSTOTAL'):
    '''Check to insert a new record into a database table, inserts if does not exist
    param  db str : Database name to connect to
    param df pandas dataframe : dataframe being inspected for entry
    param table str : Table Name to insert into, if does not exist will create
    output : new entry inserted
    '''
    with sql.connect(db) as con:
        df.to_sql(table, con, flavor='sqlite', 
                      schema=None, if_exists='append',
                      index=False, index_label=None,
                      chunksize=None, dtype=None)
            
        print("Records %s inserted"%table)
    
def insertcheckRecord(db, df, table = 'CONFERENCES', un = 'confName' ):
    '''Check to insert a new record into a database table, inserts if does not exist
    param  db str : Database name to connect to
    param df pandas dataframe : dataframe being inspected for entry
    param table str : Table Name to insert into, if does not exist will create
    param un str : unique column to check for entry to create a new pk, if not will just append
    '''
    t = sqlCMDToPD(table, mydb)
    if df[un][0] not in t[un].unique():
        insert_toTable(db, df[un], table)
        return True
    else:
        print(" %s already exists, try upserting with key value or deleting" %df[un][0])
        return False  
    
def insertcheckRecordTWO(db, df, table = 'PUBLICATIONS', un = 'confName', un1 = 'year' ):
    '''Check to insert a new record into a database table, inserts if does not exist, checks for multiple
    entries as unique
    param  db str : Database name to connect to
    param df pandas dataframe : dataframe being inspected for entry
    param table str : Table Name to insert into, if does not exist will create
    param un str : unique column to check for entry to create a new pk, if not will just append
    param un1 str : unique column2 to check for entry to create a new pk, if fail un, if not will just append
    '''
    t = sqlCMDToPD(table, mydb)
    if df[un][0] not in t[un].unique():
        print df[un][0], 'is new'
        insert_toTable(db, df[[un, un1]], table)
    else:
        
        tmp = t.query('@conf == confName') 
        
        if df[un1][0] not in tmp[un1].unique():
            print (df[[un, un1]].values), 'is a new entry'
            insert_toTable(db, df[[un, un1]], table)
        else:
            print(" %s already exists, try upserting with key value or deleting" %df[[un, un1]].values)


def insertValue(db, table, value):
    '''Insert a new record by value into a database table
    param  db str : Database name to connect to
    param table str : Table Name to insert into, if does not exist will create
    param value str : unique value entered into table
    '''
    with sql.connect(db) as con:
        con.execute("INSERT INTO {tn} VALUES(NULL,'%s')".format(tn=table)%value)
        print('%s inserted into %s')%(value, table)
        
def enterValueCheck_nested(db, table, values, cn):
    '''Check to insert a new record into a database table, inserts if does not exist
    param  db str : Database name to connect to
    param table str : Table Name to insert into, if does not exist will create
    param values python series : series being parsed and formated to inspection and entry into table
    param cn str : column name to check for entry to create a new pk
    '''
    tableDF = sqlCMDToPD(table, db)
    for i, ky in enumerate(values):
        for key in ky.split(','):
            if key not in tableDF[cn].unique():
                print key, 'is new'
                insertValue(db, table, key)
            else:
                print key, 'already exists in table'
                
def deleteRowPK(db, table, pkcol, entryID):
    '''Deleting a Record by PRIMARY KEY
    param  db str : Database name to connect to
    param table str : Table Name to delete from
    param pkcol str : primary column name being used, 
    param entryID int : integer value (Primary Key Value) to delete from table
    '''
    with sql.connect(db) as con:
        
        con.execute("DELETE FROM {tn} WHERE {idf}={my_id}".format(tn=table, idf=pkcol, my_id=entryID))

        con.commit()

def deleteRowOTHER(db, table, cn, entry):
    '''Deleting a Record
    param  db str : Database name to connect to
    param table str : Table Name to delete from
    param cn str : column name being used for deletion comparason (if no PK column, ie, abstracts Total)
    param entry str : the str to be used to find and remove records (removes all records
    '''
    with sql.connect(db) as con:
        
        con.execute("DELETE FROM {tn} WHERE {idf}='%s'".format(tn=table, idf=cn)%entry)

        con.commit()


        
def entryintotables(db, jsonfile):
    '''Inserting a Record from a JsonFile
    param  db str : Database name to connect to
    param jsonfile str : name of Json File to be read into the database
    '''
    f = open(jsonfile, "r+")
    jdf = pd.read_json(f, orient='index')
    jdf.Conf = 'WAKA'
    #TOTALABSTRACTS, check and then insert if needed, uniqueness based on Abstract column
    #insertcheckRecord(db, jdf, table = 'ABSTRACTSTOTAL', un =  'Abstract')
    
    #renaming of columns
    jdf.rename(columns = {'Conf':'confName'}, inplace= True)
    
    #CONFERENCES, check and then insert if needed, uniqueness based on Abstract column
    insertcheckRecord(db, jdf, table = 'CONFERENCES', un = 'confName' )
    
    #PUBLICATIONS, check and then insert if needed, uniqueness based on Abstract column
    insertcheckRecordTWO(db, jdf, table = 'PUBLICATIONS', un = 'confName', un1 = 'year' )
    
    #AFFILIATIONS
    jdf.rename(columns = {'Author affiliation' : 'affiliation'}, inplace = True)
    insertcheckRecord(db = db, df = jdf, table = 'AFFILIATIONS', un = 'affiliation')
    
    #For the nested: authors, keywords, and need to reparse/reformat, also to show numerous ways to insert:
    #KEYS
    enterValueCheck_nested(db=db, table = 'KEYS', values = jdf.terms, cn = 'keyword')
    #AUTHORS
    enterValueCheck_nested(db=db, table = 'AUTHORS', values = jdf.Authors, cn = 'authorName')
    
    #enter
    return jdf



In [89]:
enterValueCheck(mydb, 'KEYS', j.terms, 'keyword')
enterValueCheck(mydb, 'AUTHORS', j.Authors, 'authorName')

(5561, 2)
one potato already exists in table
 two potato already exists in table
 three potato already exists in table
 four already exists in table
(1949, 2)
mr brown is new
mr brown inserted into AUTHORS
 mr blue is new
 mr blue inserted into AUTHORS
 mr mogatu is new
 mr mogatu inserted into AUTHORS


In [90]:
au = sqlCMDToPD('AUTHORS', mydb)
au.tail()

(1952, 2)


Unnamed: 0,authorID,authorName
1947,1948,ller Bernhard
1948,1949,Wang Mingxue
1949,1950,mr brown
1950,1951,mr blue
1951,1952,mr mogatu


In [107]:
keys = sqlCMDToPD('KEYS', mydb)
keys.tail()

(5561, 2)


Unnamed: 0,keyID,keyword
5556,5557,Component based software architecture
5557,5558,one potato
5558,5560,two potato
5559,5561,three potato
5560,5562,four


In [108]:
deleteRowPK(mydb, 'KEYS', 'keyID', 5562)
deleteRowPK(mydb, 'KEYS', 'keyID', 5561)
deleteRowPK(mydb, 'KEYS', 'keyID', 5560)
deleteRowPK(mydb, 'KEYS', 'keyID', 5558)
deleteRowPK(mydb, 'AUTHORS', 'authorID', 1950)
deleteRowPK(mydb, 'AUTHORS', 'authorID', 1951)
deleteRowPK(mydb, 'AUTHORS', 'authorID', 1952)
deleteRowPK(mydb, 'AUTHORS', 'authorID', 1950)
deleteRowPK(mydb, 'AFFILIATIONS', 'affilID', 831)
keys = sqlCMDToPD('KEYS', mydb)
au = sqlCMDToPD('AUTHORS', mydb)
aff = sqlCMDToPD('AFFILIATIONS', mydb)
aff.tail()

(5557, 2)
(1949, 2)
(830, 2)


Unnamed: 0,affilID,affiliation
825,826,"Federal University of Bahia, Av Adhemar de Ba..."
826,827,"University of Groningen, Nijenborgh 9, 9747 AG..."
827,828,"Department of Informatics, Universidad Tecnica..."
828,829,"Dept of Computer Systems, University of S o..."
829,830,"ABB Corporate Research Germany, Industrial Sof..."


In [110]:
au.tail()

Unnamed: 0,authorID,authorName
1944,1945,Tomic Slobodanka
1945,1946,Manna Valerio Panzica La
1946,1947,Song Hui
1947,1948,ller Bernhard
1948,1949,Wang Mingxue


In [115]:
upsertToTable_two('CONFERENCES', mydb, j, 'confID', 'confName')

(830, 2)
Records AFFILIATIONS inserted
new affil


In [106]:
#print jdf
#replace_inTablePK(mydb, jdf, 4, 'confID', 'confName')
aff = sqlCMDToPD('AFFILIATIONS', mydb)
aff.tail()

(831, 2)


Unnamed: 0,affilID,affiliation
826,827,"University of Groningen, Nijenborgh 9, 9747 AG..."
827,828,"Department of Informatics, Universidad Tecnica..."
828,829,"Dept of Computer Systems, University of S o..."
829,830,"ABB Corporate Research Germany, Industrial Sof..."
830,831,Some Fancy Pants Nancy Place or My cereal bowl


In [None]:

def upsert(db, pkcol, infocol, table, test):
# Connecting to the database file
    conn = sql.connect(db)
    c = conn.cursor()
    
    try:
        q = "INSERT INTO '%s' ('%s', '%s') VALUES (?, ?)" % (table, pkcol, infocol)
        c.executemany(q, test.values)
            
    except sql.IntegrityError:
        print('ERROR: ID already exists in PRIMARY KEY column')

# A) Inserts an ID with a specific value in a second column
    

# B) Tries to insert an ID (if it does not exist yet) with a specific value in a second column
    print('Inserting new pk')
    q = "INSERT OR IGNORE INTO '%s' ('%s', '%s') VALUES (?, ?)"%(table, pkcol, infocol)
    c.executemany(q, test.values)
    
# C) Updates the newly inserted or pre-existing entry 
    print('Update')
    q = "UPDATE '%s' SET '%s'=('%s') WHERE 's'=('%s')"%(table, infocol, test['Conf'], pkcol , test["confID"])
    c.execute(q)  
    
    conn.commit()

In [None]:
import sqlite3

sqlite_file = 'my_first_db.sqlite'
table_name = 'my_table_3'

# Connecting to the database file
conn = sqlite3.connect(sqlite_file)
c = conn.cursor()

# Retrieve column information
# Every column will be represented by a tuple with the following attributes:
# (id, name, type, notnull, default_value, primary_key)
c.execute('PRAGMA TABLE_INFO({})'.format(table_name))

# collect names in a list
names = [tup[1] for tup in c.fetchall()]
print(names)
# e.g., ['id', 'date', 'time', 'date_time']

# Closing the connection to the database file
conn.close()