In [1]:
from env import host, username, password, get_db_url
import os
import pandas as pd 
import numpy as np

In [19]:
def acquire_zillow_data(use_cache=True):
    '''
    This function returns a snippet of zillow's database as a Pandas DataFrame. 
    When this SQL data is cached and extant in the os directory path, return the data as read into a df. 
    If csv is unavailable, aquisition proceeds regardless,
    reading the queried database elements into a dataframe, creating a cached csv file
    and lastly returning the dataframe for some sweet data science perusal.
    '''

    # If the cached parameter is True, read the csv file on disk in the same folder as this file 
    if os.path.exists('zillow.csv') and use_cache:
        print('Using cached CSV')
        return pd.read_csv('zillow.csv')

    # When there's no cached csv, read the following query from Codeup's SQL database.
    print('CSV not detected.')
    print('Acquiring data from SQL database instead.')
    df = pd.read_sql(
        '''
        SELECT *
        FROM properties_2017
        JOIN (
              SELECT parcelid, 
                     logerror, 
                     MAX(transactiondate) AS transactiondate 
              FROM predictions_2017 GROUP BY parcelid, logerror
              ) 
               AS pred_2017 USING(parcelid) 
        LEFT JOIN airconditioningtype 
            USING(airconditioningtypeid)
        LEFT JOIN architecturalstyletype 
            USING(architecturalstyletypeid)
        LEFT JOIN buildingclasstype 
            USING(buildingclasstypeid)
        LEFT JOIN heatingorsystemtype 
            USING(heatingorsystemtypeid)
        LEFT JOIN propertylandusetype 
            USING(propertylandusetypeid)
        LEFT JOIN storytype 
            USING(storytypeid)
        LEFT JOIN typeconstructiontype 
            USING(typeconstructiontypeid)
        WHERE properties_2017.latitude IS NOT NULL
        AND properties_2017.longitude IS NOT NULL;             
        '''
                    , get_db_url('zillow'))
    
    print('Acquisition Complete. Dataframe available and is now cached for future use.')
    # create a csv of the dataframe for the sake of efficiency. 
    df.to_csv('zillow.csv', index=False)
    
    return df

In [20]:
df = acquire_zillow_data()

CSV not detected.
Acquiring data from SQL database instead.
Acquisition Complete. Dataframe available and is now cached for future use.


In [21]:
zillow = acquire_zillow_data()

Using cached CSV


  zillow = acquire_zillow_data()


In [22]:
zillow

Unnamed: 0,typeconstructiontypeid,storytypeid,propertylandusetypeid,heatingorsystemtypeid,buildingclasstypeid,architecturalstyletypeid,airconditioningtypeid,parcelid,id,basementsqft,...,censustractandblock,logerror,transactiondate,airconditioningdesc,architecturalstyledesc,buildingclassdesc,heatingorsystemdesc,propertylandusedesc,storydesc,typeconstructiondesc
0,,,261.0,,,,,14297519,1727539,,...,6.059063e+13,0.025595,2017-01-01,,,,,Single Family Residential,,
1,,,261.0,,,,,17052889,1387261,,...,6.111001e+13,0.055619,2017-01-01,,,,,Single Family Residential,,
2,,,261.0,,,,,14186244,11677,,...,6.059022e+13,0.005383,2017-01-01,,,,,Single Family Residential,,
3,,,261.0,2.0,,,,12177905,2288172,,...,6.037300e+13,-0.103410,2017-01-01,,,,Central,Single Family Residential,,
4,,,266.0,2.0,,,1.0,10887214,1970746,,...,6.037124e+13,0.006940,2017-01-01,Central,,,Central,Condominium,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77570,,,266.0,2.0,,,1.0,10833991,2864704,,...,6.037132e+13,-0.002245,2017-09-20,Central,,,Central,Condominium,,
77571,,,261.0,2.0,,,,11000655,673515,,...,6.037101e+13,0.020615,2017-09-20,,,,Central,Single Family Residential,,
77572,,,261.0,,,,,17239384,2968375,,...,6.111008e+13,0.013209,2017-09-21,,,,,Single Family Residential,,
77573,,,261.0,2.0,,,1.0,12773139,1843709,,...,6.037434e+13,0.037129,2017-09-21,Central,,,Central,Single Family Residential,,


In [24]:
zillow.dtypes

typeconstructiontypeid    float64
storytypeid               float64
propertylandusetypeid     float64
heatingorsystemtypeid     float64
buildingclasstypeid       float64
                           ...   
buildingclassdesc          object
heatingorsystemdesc        object
propertylandusedesc        object
storydesc                  object
typeconstructiondesc       object
Length: 68, dtype: object