In [26]:
# Import initial dependencies
import pandas as pd
import numpy as np
import os

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func

In [27]:
# CSV files
metadata_csv = "DataSets/Belly_Button_Biodiversity_Metadata.csv"
otu_csv = "DataSets/belly_button_biodiversity_otu_id.csv"
samples_csv = "DataSets/belly_button_biodiversity_samples.csv"

In [28]:
# Read CSV file into a pandas DataFrame
metadata_df = pd.read_csv(metadata_csv, dtype=object)
metadata_df.head()

Unnamed: 0,SAMPLEID,EVENT,ETHNICITY,GENDER,AGE,WFREQ,BBTYPE,LOCATION,COUNTRY012,ZIP012,...,DOG,CAT,IMPSURFACE013,NPP013,MMAXTEMP013,PFC013,IMPSURFACE1319,NPP1319,MMAXTEMP1319,PFC1319
0,940,BellyButtonsScienceOnline,Caucasian,F,24,2,I,Beaufort/NC,usa,22306.0,...,no,no,8852.0,37.17222214,54.5,,1.0,,33.99000168,25.5
1,941,,Caucasian/Midleastern,F,34,1,I,Chicago/IL,,,...,,,,,,,,,,
2,943,BellyButtonsScienceOnline,Caucasian,F,49,1,I,Omaha/NE,,,...,,,,,,,,,,
3,944,BellyButtonsScienceOnline,European,M,44,1,I,NewHaven/CT,usa,7079.0,...,no,yes,,35.81666565,16.0,,0.0,6567.0,32.40333176,28.5
4,945,BellyButtonsScienceOnline,Caucasian,F,48,1,I,Philidelphia/PA,usa,84404.0,...,no,no,,37.78333282,4.0,,0.0,5613.0,33.63444519,24.0


In [29]:
# Read CSV file into a pandas DataFrame
otu_df = pd.read_csv(otu_csv, dtype=object)
otu_df.head()

Unnamed: 0,otu_id,lowest_taxonomic_unit_found
0,1,Archaea;Euryarchaeota;Halobacteria;Halobacteri...
1,2,Archaea;Euryarchaeota;Halobacteria;Halobacteri...
2,3,Archaea;Euryarchaeota;Halobacteria;Halobacteri...
3,4,Archaea;Euryarchaeota;Methanobacteria;Methanob...
4,5,Archaea;Euryarchaeota;Methanobacteria;Methanob...


In [30]:
# Read samples CSV file into a pandas DataFrame
samples_df = pd.read_csv(samples_csv, dtype=object)
samples_df.head()

Unnamed: 0,otu_id,BB_940,BB_941,BB_943,BB_944,BB_945,BB_946,BB_947,BB_948,BB_949,...,BB_1562,BB_1563,BB_1564,BB_1572,BB_1573,BB_1574,BB_1576,BB_1577,BB_1581,BB_1601
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
# Use `declarative_base` from SQLAlchemy to model the Metadata table as an ORM class
# Make sure to specify types for each column, e.g. Integer, Text, etc.
# http://docs.sqlalchemy.org/en/latest/core/type_basics.html
Base = declarative_base()

class Metadata(Base):
    __tablename__ = 'metadata'

    sampleid = Column(Integer, primary_key=True)
    event = Column(Text)
    ethnicity = Column(Text)
    gender = Column(Text)
    age = Column(Integer)
    wrefq = Column(Integer)
    bbtype = Column(Text)
    location = Column(Text)
    country = Column(Text)
    zip012 = Column(Integer)
    country1319 = Column(Text)
    zip1319 = Column(Integer)
    dog = Column(Text)
    cat = Column(Text)
    impsurface013 = Column(Integer)
    npp013 = Column(Float)
    mmaxtemp013 = Column(Float)
    pfc013 = Column(Float)
    impsurface1319 = Column(Integer)
    npp1319 = Column(Float)
    mmaxtemp1319 = Column(Float)
    pfc1319 = Column(Float)
   
    def __repr__(self):
        return f"id={self.sampleid}"

In [32]:
class OTU(Base):
    __tablename__ = 'otu'

    sampleid = Column(Integer, primary_key=True)
    event = Column(Text)
    ethnicity = Column(Text)
    gender = Column(Text)
    age = Column(Integer)
    wrefq = Column(Integer)
    bbtype = Column(Text)
    location = Column(Text)
    country = Column(Text)
    zip012 = Column(Integer)
    country1319 = Column(Text)
    zip1319 = Column(Integer)
    dog = Column(Text)
    cat = Column(Text)
    impsurface013 = Column(Integer)
    npp013 = Column(Float)
    mmaxtemp013 = Column(Float)
    pfc013 = Column(Float)
    impsurface1319 = Column(Integer)
    npp1319 = Column(Float)
    mmaxtemp1319 = Column(Float)
    pfc1319 = Column(Float)
   
    def __repr__(self):
        return f"id={self.sampleid}"

In [42]:
# Create engine using the `sqlite` database file created provided in the data set
engine = create_engine("sqlite:///belly_button_biodiversity.sqlite")

In [43]:
# Use `create_all` to create the tables
Base.metadata.create_all(engine)

In [44]:
# Verify that the table names exist in the database
engine.table_names()

['metadata', 'otu']

In [45]:
# Use Pandas to Bulk insert each CSV file into their appropriate table
def populate_table(engine, table, dataframe):
    """Populates a table from a Pandas DataFrame."""
    # connect to the database
    conn = engine.connect()
    
    # Load the CSV file into a pandas dataframe 
    df_of_data_to_insert = dataframe
    
    # Orient='records' creates a list of data to write
    data = df_of_data_to_insert.to_dict(orient='records')

    # Optional: Delete all rows in the table 
    conn.execute(table.delete())

    # Insert the dataframe into the database in one bulk insert
    conn.execute(table.insert(), data)
    
# Call the function to insert the data for each table
populate_table(engine, Metadata.__table__, metadata_df)
populate_table(engine, OTU.__table__, otu_df)

In [47]:
engine.execute("SELECT * FROM metadata LIMIT 5").fetchall()

[(1, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None),
 (2, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None),
 (3, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None),
 (4, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None),
 (5, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)]

In [17]:
# Declare a Base using `automap_base()`
#Base = automap_base()

In [18]:
# Use the Base class to reflect the database tables
#Base.prepare(engine, reflect=True)

In [19]:
# Print all of the classes mapped to the Base
#Base.classes.keys()

[]

In [None]:
# Create the inspector and connect it to the engine
#inspector = inspect(engine)

# Collect the names of tables within the database
#inspector.get_table_names()

In [None]:
#inspectorinspect .get_columns(*INSERT TABLE NAME*)

In [None]:

#inspectorinspect .get_columns('TABLE NAME')

In [None]:
# Use `engine.execute` to select and display the first 10 rows from the table

#engine.execute('SELECT * FROM ___ LIMIT 10').fetchall()

In [None]:
# Reflect Database into ORM class
#____ = Base.classes.*INSERT CLASS NAME*


In [None]:
# Start a session to query the database 
#session = Session(engine)

In [None]:
#test = engine.execute('SELECT ____ FROM _____').fetchall()

In [None]:
#test