# Creating databases and tables

Creating databases is different for every database type, and often requires the use of a command line tool or management application.

With SQLite, the **create_engine()** statement will create the database and file is they do not already exist.

In [1]:
# Import create_engine, MetaData, and Table
from sqlalchemy import create_engine, MetaData, Table, func, desc

# Create engine: engine
engine = create_engine('sqlite:///employees.sqlite')
connection = engine.connect()

# Create a metadata object: metadata
metadata = MetaData()

In [7]:
# Import all the necessary
from sqlalchemy import (Table, Column, String,
                       Integer, DECIMAL, Boolean)

# Create the table and few columns
employees = Table('employees', metadata, 
                 Column('id', Integer()),
                 Column('name', String(255)),
                 Column('salary', DECIMAL()),
                 Column('active', Boolean())
                 )

# Create the table in the database
metadata.create_all(engine)

# Verify that the table was created
engine.table_names()

InvalidRequestError: Table 'employees' is already defined for this MetaData instance.  Specify 'extend_existing=True' to redefine options and columns on an existing Table object.

## Creating Tables

Still uses the Table object like we did for reflection

Replaces the autoload keyword arguments with **Column objects**

Creates the tables in the actual database by using the **create_all()** method on the *MetaData* instance. 

You need to use other tools to handle database table updates, such as **Alembic** or **raw SQL**

Constrains:
- **unique** - forces all values for the data in a column to be unique
- **nullable** - determines if a column can be empty in a row
- **default** - sets a default value if one isn't supplied. 


In [9]:
# Import
from sqlalchemy import Float

# Create the Table
employees = Table('employees2', metadata,
                 Column('id', Integer()),
                 Column('name', String(255), unique=True, nullable=False),
                 Column('salary', Float(), default=100.00),
                 Column('active', Boolean(), default=True))

# Check to see what constrains are on the table
employees.constraints

{CheckConstraint(<sqlalchemy.sql.elements.BinaryExpression object at 0x0000018544E84828>, name='_unnamed_', table=Table('employees2', MetaData(bind=None), Column('id', Integer(), table=<employees2>), Column('name', String(length=255), table=<employees2>, nullable=False), Column('salary', Float(), table=<employees2>, default=ColumnDefault(100.0)), Column('active', Boolean(), table=<employees2>, default=ColumnDefault(True)), schema=None), _create_rule=<sqlalchemy.util.langhelpers.portable_instancemethod object at 0x0000018544E907E0>, _type_bound=True),
 PrimaryKeyConstraint(),
 UniqueConstraint(Column('name', String(length=255), table=<employees2>, nullable=False))}

 You'd still use the **Table** object; however, you'd need to replace the **autoload** and **autoload_with** parameters with Column objects.
 
The Column object takes: 
 - a *name*
 - a *SQLAlchemy type* with an *optional format*,
 - an *optional keyword arguments* for different constraints.

In [10]:
# Import Table, Column, String, Integer, Float, Boolean from sqlalchemy
from sqlalchemy import Table, Column, String, Integer, Float, Boolean

# Define a new table with a name, count, amount, and valid column: data
data = Table('data', metadata,
             Column('name', String(255)),
             Column('count', Integer()),
             Column('amount', Float()),
             Column('valid', Boolean())
)

# Use the metadata to create the table
metadata.create_all(engine)

# Print table details
print(repr(data))

Table('data', MetaData(bind=None), Column('name', String(length=255), table=<data>), Column('count', Integer(), table=<data>), Column('amount', Float(), table=<data>), Column('valid', Boolean(), table=<data>), schema=None)


In [11]:
# Import Table, Column, String, Integer, Float, Boolean from sqlalchemy
from sqlalchemy import Table, Column, String, Integer, Float, Boolean

# Define a new table with a name, count, amount, and valid column: data
data = Table('data2', metadata,
             Column('name', String(255), unique=True),
             Column('count', Integer(), default=1),
             Column('amount', Float()),
             Column('valid', Boolean(), default=False)
)

# Use the metadata to create the table
metadata.create_all(engine)

# Print the table details
print(repr(metadata.tables['data']))

Table('data', MetaData(bind=None), Column('name', String(length=255), table=<data>), Column('count', Integer(), table=<data>), Column('amount', Float(), table=<data>), Column('valid', Boolean(), table=<data>), schema=None)


# Inserting data into a Table

To insert data into the table we use the **insert()** statement.
 - Takes the table we are loading data into as the argument
 - We add all the values we want to insert in with the **values clause** as *column=value* pairs.

In [17]:
# Import insert
from sqlalchemy import insert

# Create the statement
stmt = insert(employees).values(
    id=1, name='Jason', salary=1.00, active=True)

# Save the result proxy
result_proxy = connection.execute(stmt)

# Print the number of rows inserted
print(result_proxy,rowcount())

IntegrityError: (sqlite3.IntegrityError) UNIQUE constraint failed: employees2.name
[SQL: INSERT INTO employees2 (id, name, salary, active) VALUES (?, ?, ?, ?)]
[parameters: (1, 'Jason', 1.0, 1)]
(Background on this error at: http://sqlalche.me/e/gkpj)

## Inserting Multiple Rows

Build an insert statement without any values

Build a list of dictionaries that represent all the values clauses for the rows you want to insert. 

Pass both the stmt and the values list to the execute method on connection.

In [19]:
# Create the statement
stmt = insert(employees)

# Create a list with the values to insert
values_list = [
    {'id':2, 'name':'Rebecca', 'salary':2.00, 'active':True},
    {'id':3, 'name':'Bob', 'salary':0.00, 'active':False}
]

# Create the result proxy with the statement and the list
result_proxy = connection.execute(stmt, values_list)

# Print the number of rows inserted
print(result_proxy.rowcount)

2


There are several ways to perform an insert with SQLAlchemy; however, we are going to focus on the one that follows the same pattern as the select statement.

In [20]:
# Import insert and select from sqlalchemy
from sqlalchemy import select, insert

# Build an insert statement to insert a record into the data table: insert_stmt
insert_stmt = insert(data).values(name='Anna', count=1, amount=1000.00, valid=True)

# Execute the insert statement via the connection: results
results = connection.execute(insert_stmt)

# Print result rowcount
print(results.rowcount)

# Build a select statement to validate the insert: select_stmt
select_stmt = select([data]).where(data.columns.name == 'Anna')

# Print the result of executing the query.
print(connection.execute(select_stmt).first())

1
('Anna', 1, 1000.0, True)


In [21]:
# Build a list of dictionaries: values_list
values_list = [
    {'name': 'Anna', 'count': 1, 'amount': 1000.00, 'valid': True},
    {'name': 'Taylor', 'count': 1, 'amount': 750.00, 'valid': False}
]

# Build an insert statement for the data table: stmt
stmt = insert(data)

# Execute stmt with the values_list: results
results = connection.execute(stmt, values_list)

# Print rowcount
print(results.rowcount)

IntegrityError: (sqlite3.IntegrityError) UNIQUE constraint failed: data2.name
[SQL: INSERT INTO data2 (name, count, amount, valid) VALUES (?, ?, ?, ?)]
[parameters: (('Anna', 1, 1000.0, 1), ('Taylor', 1, 750.0, 0))]
(Background on this error at: http://sqlalche.me/e/gkpj)

### Loading a CSV into a table

You've done a great job so far at inserting data into tables! You're now going to learn how to load the contents of a CSV file into a table.

One way to do that would be to read a CSV file line by line, create a dictionary from each line, and then use insert(), like you did in the previous exercise.

But there is a faster way using pandas. You can read a CSV file into a DataFrame using the read_csv() function (this function should be familiar to you, but you can run help(pd.read_csv) in the console to refresh your memory!). Then, you can call the .to_sql() method on the DataFrame to load it into a SQL table in a database. 

Paramters **to_sql()**:
 - **name** = name of the SQL table (as string)
 - **con** = connection to the database that you will use to upload the data
 - **if_exists** = hot to behave if the table already exists in the database ('fail', 'replace', 'append')
 - **index** = specifies whether to write the DataFrame's index as a column

In [22]:
# import pandas
import pandas as pd

# read census.csv into a dataframe : census_df
census_df = pd.read_csv("census.csv", header=None)

# rename the columns of the census dataframe
census_df.columns = ['state', 'sex', 'age', 'pop2000', 'pop2008']

# append the data from census_df to the "census" table via connection
census_df.to_sql(name='census', con=connection, if_exists='append', index=False)

# Updating Data in a Table

To update data in a database we use the **update** statement. 

Similar to the insert statement but includes a **where** clause to determine what record will be updated. 

The **values** clause contains only the *column=value* pairs we want to change

In [23]:
# Import the update
from sqlalchemy import update

# Make the statement to update
stmt = update(employees)

# Specify the values to update
stmt = stmt.where(employees.columns.id == 3)

# Update the values
stmt = stmt.values(active=True)

# Make the connection
result_proxy = connection.execute(stmt)

# Print the results
print(result_proxy.rowcount)

1


It's possible to update multiple records by having a where clause that would target multiple records

In [24]:
# Make the update statement 
stmt = update(employees)

# Where clause to target the values to update
stmt = stmt.where(employees.columns.active == True)

# Update the values
stmt = stmt.values(active=False, salary=0.00)

# Make the connection 
result_proxy = connection.execute(stmt)

# Print the result
print(result_proxy.rowcount)

3


In [25]:
# Make a statement
new_salary = select([employees.columns.salary])

# Order the values
new_salary = new_salary.order_by(desc(employees.columns.salary))

# Set the limit
new_salary = new_salary.limit(1)

# Make the update statement
stmt = update(employees)

# Update the values
stmt = stmt.values(salary = new_salary)

# Make the connection
result_proxy = connection.execute(stmt)

# Print the results
print(result_proxy.rowcount)


3


In [27]:
# Build a select statement: select_stmt
select_stmt = select([state_fact]).where(state_fact.columns.name == 'New York')

# Execute select_stmt and fetch the results
results = connection.execute(select_stmt).fetchall()

# Print the results of executing the select_stmt
print(results)

# Print the FIPS code for the first row of the result
print(results[0]['fips_state'])

# Build a statement to update the fips_state to 36: update_stmt
update_stmt = update(state_fact).values(fips_state = 36)

# Append a where clause to limit it to records for New York state
update_stmt = update_stmt.where(state_fact.columns.name == 'New York')

# Execute the statement: update_results
update_results = connection.execute(update_stmt)

# Execute select_stmt again and fetch the new results
new_results = connection.execute(select_stmt).fetchall()

# Print the new_results
print(new_results)

# Print the FIPS code for the first row of the new_results
print(results[0]['fips_state'])

NameError: name 'state_fact' is not defined

In [28]:
# Build a statement to update the notes to 'The Wild West': stmt
stmt = update(state_fact).values(notes='The Wild West')

# Append a where clause to match the West census region records: stmt_west
stmt_west = stmt.where(state_fact.columns.census_region_name == 'West')

# Execute the statement: results
results = connection.execute(stmt_west)

# Print rowcount
print(results.rowcount)

NameError: name 'state_fact' is not defined

In [29]:
# Build a statement to select name from state_fact: fips_stmt
fips_stmt = select([state_fact.columns.name])

# Append a where clause to match the fips_state to flat_census fips_code: fips_stmt
fips_stmt = fips_stmt.where(
    state_fact.columns.fips_state == flat_census.columns.fips_code)

# Build an update statement to set the name to fips_stmt_where: update_stmt
update_stmt = update(flat_census).values(state_name=fips_stmt)

# Execute update_stmt: results
results = connection.execute(update_stmt)

# Print rowcount
print(results.rowcount)

NameError: name 'state_fact' is not defined

# Deleting Data from a Table

To delete data from a table, we use the **delete** statement
 - takes the table we are loading data into as the argument
 - a **where()** clause is used to choose which rows to delete.
 - Hard to undo so BE CAREFUL!!!
 
Build a where clause that will select all the records you want to delete.

In [31]:
from sqlalchemy import delete

stmt = select([func.count(extra_employees.columns.id)])

connection.execute(stmt).scalar()

delete_stmt = delete(extra_employees)

result_proxy = connection.execute(delete_stmt)

result_proxy.rowcount

NameError: name 'extra_employees' is not defined

In [32]:
stmt = delete(employees).where(employees.columns.id == 3)

result_proxy = connection.execute(stmt)

result_proxy.rowcount

1

## Dropping a Table Completely

Uses the drop method on the table

Accepts the engine as an argument so it knows where to remove the table from

Won't remove it from metadata until the python process is restarted.

In [33]:
extra_employees.drop(engine)

# To Verify if it still exists
print(extra_employees.exists(engine))

NameError: name 'extra_employees' is not defined

## Dropping all the tables

Is possible to drop all the tables in a database by using the **drop_all** method on the metadata object.

In [34]:
# Import delete, select
from sqlalchemy import delete, select

# Build a statement to empty the census table: stmt
delete_stmt = delete(census)

# Execute the statement: results
results = connection.execute(delete_stmt)

# Print affected rowcount
print(results.rowcount)

# Build a statement to select all records from the census table : select_stmt
select_stmt = select([census])

# Print the results of executing the statement to verify there are no rows
print(connection.execute(select_stmt).fetchall())

NameError: name 'census' is not defined

By using a where() clause, you can target the delete statement to remove only certain records.

In [35]:
# Build a statement to count records using the sex column for Men ('M') age 36: count_stmt
count_stmt = select([func.count(census.columns.sex)]).where(
    and_(census.columns.sex == 'M',
         census.columns.age == 36)
)

# Execute the select statement and use the scalar() fetch method to save the record count
to_delete = connection.execute(count_stmt).scalar()

# Build a statement to delete records from the census table: delete_stmt
delete_stmt = delete(census)

# Append a where clause to target Men ('M') age 36: delete_stmt
delete_stmt = delete_stmt.where(
    and_(census.columns.sex == 'M',
         census.columns.age == 36)
)

# Execute the statement: results
results = connection.execute(delete_stmt)

# Print affected rowcount and to_delete record count, make sure they match
print(results.rowcount, to_delete)


NameError: name 'census' is not defined

In [36]:
# Drop the state_fact table
state_fact.drop(engine)

# Check to see if state_fact exists
print(state_fact.exists(engine))

# Drop all tables
metadata.drop_all(engine)

# Check to see if census exists
print(census.exists(engine))

NameError: name 'state_fact' is not defined