In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# CSV file
csvfile = "demographics.csv"

In [3]:
# Read CSV file into a pandas DataFrame
### BEGIN SOLUTION
df = pd.read_csv(csvfile, dtype=object)
### END SOLUTION

In [4]:
# Use `dropna` to drop any rows where there is missing data
### BEGIN SOLUTION
df = df.dropna(axis=0)
### END SOLUTION

In [5]:
# Drop the `name` column and reset the index
### BEGIN SOLUTION
df.drop(['name'], axis=1).reset_index(drop=True)
### END SOLUTION

Unnamed: 0,id,age,height_meter,weight_kg,children,occupation,academic_degree,salary,location
0,0,58,1.87,53,1,Choreographer,PhD,68,South Dakota
1,1,65,1.80,40,0,Cellarman,Bachelor,73,Delaware
2,2,32,1.80,73,1,Veterinary Surgeon,Master,69,South Dakota
3,3,61,1.79,89,0,Hawker,PhD,88,Louisiana
4,4,23,1.64,51,2,Choreographer,Bachelor,83,West Virginia
5,5,20,1.68,60,4,Medical Physicist,Bachelor,65,South Dakota
6,6,31,1.56,62,0,Weaver,Master,72,Louisiana
7,7,56,1.60,42,0,Lighthouse Keeper,Master,65,Louisiana
8,8,30,1.62,44,3,Millwright,Master,87,Louisiana
9,9,44,1.69,51,5,Medical Supplier,PhD,72,West Virginia


In [8]:
# Save the cleaned data to a file called `cleaned_demographics.csv`
### BEGIN SOLUTION
clean_data = "cleaned_csv.csv"
df.to_csv(clean_data, index=False)
### END SOLUTION

In [9]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float

In [10]:
# Create an engine to a database file called `demographics.sqlite`
### BEGIN SOLUTION
engine = create_engine("sqlite:///demographics.sqlite")
### END SOLUTION

In [11]:
# Create a connection to the engine called `conn`
### BEGIN SOLUTION
conn = engine.connect()
### END SOLUTION

In [12]:
# Use `declarative_base` from SQLAlchemy to model the demographics table as an ORM class
# Make sure to specify types for each column
### BEGIN SOLUTION

Base = declarative_base()

class Demographics(Base):
    __tablename__ = 'demographics'

    id = Column(Integer, primary_key=True)
    age = Column(Integer)
    name = Column(Text)
    height_meter = Column(Float)
    weight_kg = Column(Float)
    children = Column(Integer)
    occupation = Column(Text)
    academic_degree = Column(Text)
    salary = Column(Integer)
    location = Column(Text)

    def __repr__(self):
        return f"id={self.id}, name={self.name}"
### END SOLUTION

SyntaxError: invalid syntax (<ipython-input-12-47e105baf1d7>, line 22)

In [13]:
# Use `create_all` to create the demographics table in the database
### BEGIN SOLUTION
Base.metadata.create_all(engine)
### END SOLUTION

NameError: name 'Base' is not defined

In [12]:
# Load the cleaned csv file into a pandas dataframe
### BEGIN SOLUTION
df_of_data_to_insert = pd.read_csv(clean_data)
### END SOLUTION

In [13]:
# Use Orient='records' to create a list of data to write
# http://pandas-docs.github.io/pandas-docs-travis/io.html#orient-options
### BEGIN SOLUTION
data = df_of_data_to_insert.to_dict(orient='records')
data[0]
### END SOLUTION

{'academic_degree': 'PhD',
 'age': 58,
 'children': 1,
 'height_meter': 1.87,
 'id': 0,
 'location': 'South Dakota',
 'name': 'Darlena Avila',
 'occupation': 'Choreographer',
 'salary': 68,
 'weight_kg': 53}

In [14]:
# Use MetaData from SQLAlchemy to reflect the tables
### BEGIN SOLUTION
metadata = MetaData(bind=engine)
metadata.reflect()
### END SOLUTION

In [15]:
# Save the reference to the `demographics` table as a variable called `table`
### BEGIN SOLUTION
table = sqlalchemy.Table('demographics', metadata, autoload=True)
### END SOLUTION

In [16]:
# Use `table.delete()` to remove any existing data.
# Note that this is a convenience function so that you can re-run the example code multiple times.
# You would not likely do this step in production.
### BEGIN SOLUTION
conn.execute(table.delete())
### END SOLUTION

<sqlalchemy.engine.result.ResultProxy at 0x10e624c50>

In [17]:
# Use `table.insert()` to insert the data into the table
### BEGIN SOLUTION
conn.execute(table.insert(), data)
### END SOLUTION

<sqlalchemy.engine.result.ResultProxy at 0x10e5b9f60>

In [18]:
# Test that the insert works by fetching the first 5 rows. 
conn.execute("select * from demographics limit 5").fetchall()

[(0, 58, 'Darlena Avila', 1.87, 53.0, 1, 'Choreographer', 'PhD', 68, 'South Dakota'),
 (1, 65, 'Yan Boyd', 1.8, 40.0, 0, 'Cellarman', 'Bachelor', 73, 'Delaware'),
 (2, 32, 'Joette Lane', 1.8, 73.0, 1, 'Veterinary Surgeon', 'Master', 69, 'South Dakota'),
 (3, 61, 'Jazmine Hunt', 1.79, 89.0, 0, 'Hawker', 'PhD', 88, 'Louisiana'),
 (4, 23, 'Remedios Gomez', 1.64, 51.0, 2, 'Choreographer', 'Bachelor', 83, 'West Virginia')]

In [19]:
# Optional - Clean Intermediate Files
os.remove(clean_data)

In [20]:
# Run this line to remove the sqlite db file to clear the cache.
# !rm demographics.sqlite

In [21]:
# !ls