In [1]:
from env import host, username, password, get_db_url
import os
import pandas as pd 
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer


def wrangle_zillow():
    
    df = prep_zillow(acquire_zillow_data())
    
    # train/validate/test split
    train_validate, test = train_test_split(df, test_size=.2, random_state=123)
    train, validate = train_test_split(train_validate, test_size=.3, random_state=123)
    
    return train, validate, test
    



def acquire_zillow_data(use_cache=True):
    '''
    This function returns a snippet of zillow's database as a Pandas DataFrame. 
    When this SQL data is cached and extant in the os directory path, return the data as read into a df. 
    If csv is unavailable, aquisition proceeds regardless,
    reading the queried database elements into a dataframe, creating a cached csv file
    and lastly returning the dataframe for some sweet data science perusal.
    '''

    # If the cached parameter is True, read the csv file on disk in the same folder as this file 
    if os.path.exists('zillow.csv') and use_cache:
        print('Using cached CSV')
        return pd.read_csv('zillow.csv', dtype={'buildingclassdesc': 'str', 'propertyzoningdesc': 'str'})

    # When there's no cached csv, read the following query from Codeup's SQL database.
    print('CSV not detected.')
    print('Acquiring data from SQL database instead.')
    df = pd.read_sql(
        '''
 SELECT
    prop.*,
    predictions_2017.logerror,
    predictions_2017.transactiondate,
    air.airconditioningdesc,
    arch.architecturalstyledesc,
    build.buildingclassdesc,
    heat.heatingorsystemdesc,
    landuse.propertylandusedesc,
    story.storydesc,
    construct.typeconstructiondesc
FROM properties_2017 prop
JOIN (
    SELECT parcelid, MAX(transactiondate) AS max_transactiondate
    FROM predictions_2017
    GROUP BY parcelid
) pred USING(parcelid)
JOIN predictions_2017 ON pred.parcelid = predictions_2017.parcelid
                      AND pred.max_transactiondate = predictions_2017.transactiondate
LEFT JOIN airconditioningtype air USING (airconditioningtypeid)
LEFT JOIN architecturalstyletype arch USING (architecturalstyletypeid)
LEFT JOIN buildingclasstype build USING (buildingclasstypeid)
LEFT JOIN heatingorsystemtype heat USING (heatingorsystemtypeid)
LEFT JOIN propertylandusetype landuse USING (propertylandusetypeid)
LEFT JOIN storytype story USING (storytypeid)
LEFT JOIN typeconstructiontype construct USING (typeconstructiontypeid)
WHERE prop.latitude IS NOT NULL
  AND prop.longitude IS NOT NULL
  AND transactiondate <= '2017-12-31';             
        '''
                    , get_db_url('zillow'))
    
    df.propertyzoningdesc.astype(str)
    
    
    print('Acquisition Complete. Dataframe available and is now cached for future use.')
    # create a csv of the dataframe for the sake of efficiency. 
    df.to_csv('zillow.csv', index=False)
    
    return df

def remove_columns(df, cols_to_remove):
    '''
    This function takes in a pandas dataframe and a list of columns to remove. It drops those columns from the original df and returns the df.
    '''
    df = df.drop(columns=cols_to_remove)
    return df
                 
                 
def handle_missing_values(df, prop_required_column=0.5 , prop_required_row=0.5):
    '''
    This function takes in a pandas dataframe, default proportion of required columns (set to 50%) and proprtion of required rows (set to 75%).
    It drops any rows or columns that contain null values more than the threshold specified from the original dataframe and returns that dataframe.
    
    Prior to returning that data, it will print statistics and list counts/names of removed columns/row counts 
    '''
    original_cols = df.columns.to_list()
    original_rows = df.shape[0]
    threshold = int(round(prop_required_column * len(df.index), 0))
    df = df.dropna(axis=1, thresh=threshold)
    threshold = int(round(prop_required_row * len(df.columns), 0))
    df = df.dropna(axis=0, thresh=threshold)
    remaining_cols = df.columns.to_list()
    remaining_rows = df.shape[0]
    dropped_col_count = len(original_cols) - len(remaining_cols)
    dropped_cols = list((Counter(original_cols) - Counter(remaining_cols)).elements())
    print(f'The following {dropped_col_count} columns were dropped because they were missing more than {prop_required_column * 100}% of data: \n{dropped_cols}\n')
    dropped_rows = original_rows - remaining_rows
    print(f'{dropped_rows} rows were dropped because they were missing more than {prop_required_row * 100}% of data')
    return df

# combined in one function
def data_prep(df, cols_to_remove=[], prop_required_column=0.5, prop_required_row=0.5):
    '''
    This function calls the remove_columns and handle_missing_values to drop columns that need to be removed. It also drops rows and columns that have more 
    missing values than the specified threshold.
    '''
    df = remove_columns(df, cols_to_remove)
    df = handle_missing_values(df, prop_required_column, prop_required_row)
    return df

def remove_outliers(df, k, col_list):
    ''' remove outliers from a list of columns in a dataframe 
        and return that dataframe
    '''
    
    for col in col_list:
        # get quartiles
        q1, q3 = df[f'{col}'].quantile([.25, .75])  
        # calculate interquartile range
        iqr = q3 - q1   
        # get upper bound
        upper_bound = q3 + k * iqr 
        # get lower bound
        lower_bound = q1 - k * iqr   

        # return dataframe without outliers
        
        df = df[(df[f'{col}'] > lower_bound) & (df[f'{col}'] < upper_bound)]
        
    return df




In [2]:
def prep_zillow(df):
    
    df = data_prep(df)
    
    df = df[(df.propertylandusedesc == 'Single Family Residential') |
      (df.propertylandusedesc == 'Mobile Home') |
      (df.propertylandusedesc == 'Manufactured, Modular, Prefabricated Homes') |
      (df.propertylandusedesc == 'Cluster Home')]
    
    # Remove properties that couldn't even plausibly be a studio. 
    df= df[(df.bedroomcnt > 0) & (df.bathroomcnt > 0)]
    
    # Remove properties where there is not a single bathroom.
    df = df[df.bathroomcnt > 0]
    
 # keep only properties with square footage greater than 70 (legal size of a bedroom)
    df = df[df.calculatedfinishedsquarefeet > 70]
    

    
    # Minimum lot size of single family units.
    df = df[df.lotsizesquarefeet >= 5000].copy()
    
    
    #df = df[~df['propertylandusetypeid'].isin([263, 265, 275])]
    
    # Clear indicators of single unit family. Other codes non-existent or indicate commercial sites. 
   # 0100 - Single Residence
   # 0101 Single residence with pool
   # 0104 - Single resident with therapy pool 
    df = df[(df.propertycountylandusecode == '0100') |
      (df.propertycountylandusecode == '0101') |
      (df.propertycountylandusecode == '0104')]
    
    # With so few remaining nulls it is safe to now control where unit count is only 1 
    df = df[df.unitcnt == 1.0]
    
    # The last nulls can be dropped altogether. 
    df = df.dropna()
    
    # Property where finished area is 152 but bed count is 5. 
    df = df.drop(labels=75325, axis=0)
    
    # This removes the 30 rows where solar is at play. 1% of data. 
    df = df[df['heatingorsystemtypeid'] != 20]
    
    df['yearbuilt'] = df['yearbuilt'].astype(int)
    df.yearbuilt = df.yearbuilt.astype(object) 
    df['age'] = 2017-df['yearbuilt']
    df = df.drop(columns='yearbuilt')
    df['age'] = df['age'].astype('int')
    
    df['central_cooling'] = df.heatingorsystemdesc.map({'Central': 1, 'Floor/Wall': 0})
    
    # Removing lotsize squarefeet outliers that are outrageously large. 
    
    df = remove_outliers(df, 3, ['lotsizesquarefeet'])  
            
    # Redudant columns or uninterpretable columns
    # Unit count was dropped because now its known that theyre all 1. 
    # Finished square feet is equal to calculated sq feet. 
    # full bathcnt and calculatedbathnbr are equal to bathroomcnt
    # property zoning desc is unreadable. 
    # assessment year is unnecessary, all values are 2016. 
    # property land use desc is always single family residence 
    # same with property landuse type id. 
    # room count must be for a different category, as it is always 0.
    # regionidcounty reveals the same information as FIPS. 
    # heatingorsystemtypeid is redundant. Encoded descr. 
    # Id does nothing, and parcelid is easier to represent. 

    
    df.drop(columns= ['finishedsquarefeet12', 'fullbathcnt', 'calculatedbathnbr',
                      'propertyzoningdesc', 'unitcnt', 'propertylandusedesc',
                      'assessmentyear', 'roomcnt', 'regionidcounty', 'propertylandusetypeid',
                      'heatingorsystemtypeid', 'id', 'heatingorsystemdesc' ],
            axis=1, inplace=True)
    
    df = df.set_index('parcelid')
    
    return df

In [3]:
df = acquire_zillow_data()

Using cached CSV


In [4]:
df = df[(df.propertylandusedesc == 'Single Family Residential') |
  (df.propertylandusedesc == 'Mobile Home') |
  (df.propertylandusedesc == 'Manufactured, Modular, Prefabricated Homes') |
  (df.propertylandusedesc == 'Cluster Home')]

In [5]:
# after controlling for plausible single family stays. 
df.shape

(52784, 68)

In [6]:
# Remove properties that couldn't even plausibly be a studio. 
df= df[(df.bedroomcnt > 0) & (df.bathroomcnt > 0)]
df.shape

(52595, 68)

In [7]:
# Remove properties where there is not a single bathroom.
df = df[df.bathroomcnt > 0]
df.shape

(52595, 68)

In [8]:
# keep only properties with square footage greater than 70 (legal size of a bedroom)
df = df[df.calculatedfinishedsquarefeet > 70]
df.shape

(52587, 68)

In [9]:
df.fips.value_counts()

6037.0    33803
6059.0    14054
6111.0     4730
Name: fips, dtype: int64

In [10]:
# https://www.codepublishing.com/CA/Coronado/html/Coronado86/Coronado8608.html
# Minimum lot size of single family units in Los Angeles, Orange.
df = df[df.lotsizesquarefeet >= 5000].copy()

In [11]:
df.fips.value_counts()

6037.0    29836
6059.0    10425
6111.0     3869
Name: fips, dtype: int64

In [12]:
df = data_prep(df)

The following 34 columns were dropped because they were missing more than 50.0% of data: 
['airconditioningtypeid', 'architecturalstyletypeid', 'basementsqft', 'buildingclasstypeid', 'decktypeid', 'finishedfloor1squarefeet', 'finishedsquarefeet13', 'finishedsquarefeet15', 'finishedsquarefeet50', 'finishedsquarefeet6', 'fireplacecnt', 'garagecarcnt', 'garagetotalsqft', 'hashottuborspa', 'poolcnt', 'poolsizesum', 'pooltypeid10', 'pooltypeid2', 'pooltypeid7', 'regionidneighborhood', 'storytypeid', 'threequarterbathnbr', 'typeconstructiontypeid', 'yardbuildingsqft17', 'yardbuildingsqft26', 'numberofstories', 'fireplaceflag', 'taxdelinquencyflag', 'taxdelinquencyyear', 'airconditioningdesc', 'architecturalstyledesc', 'buildingclassdesc', 'storydesc', 'typeconstructiondesc']

0 rows were dropped because they were missing more than 50.0% of data


In [13]:
df

Unnamed: 0,id,parcelid,bathroomcnt,bedroomcnt,buildingqualitytypeid,calculatedbathnbr,calculatedfinishedsquarefeet,finishedsquarefeet12,fips,fullbathcnt,...,structuretaxvaluedollarcnt,taxvaluedollarcnt,assessmentyear,landtaxvaluedollarcnt,taxamount,censustractandblock,logerror,transactiondate,heatingorsystemdesc,propertylandusedesc
1,1387261,17052889,1.0,2.0,,1.0,1465.0,1465.0,6111.0,1.0,...,88000.0,464000.0,2016.0,376000.0,5672.48,6.111001e+13,0.055619,2017-01-01,,Single Family Residential
2,11677,14186244,2.0,3.0,,2.0,1243.0,1243.0,6059.0,2.0,...,85289.0,564778.0,2016.0,479489.0,6488.30,6.059022e+13,0.005383,2017-01-01,,Single Family Residential
3,2288172,12177905,3.0,4.0,8.0,3.0,2376.0,2376.0,6037.0,3.0,...,108918.0,145143.0,2016.0,36225.0,1777.51,6.037300e+13,-0.103410,2017-01-01,Central,Single Family Residential
6,781532,12095076,3.0,4.0,9.0,3.0,2962.0,2962.0,6037.0,3.0,...,276684.0,773303.0,2016.0,496619.0,9516.26,6.037461e+13,-0.001011,2017-01-01,Central,Single Family Residential
8,1246926,12790562,3.0,4.0,9.0,3.0,3039.0,3039.0,6037.0,3.0,...,177527.0,220583.0,2016.0,43056.0,3104.19,6.037500e+13,-0.040966,2017-01-02,Central,Single Family Residential
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77373,1373391,10722691,2.0,3.0,6.0,2.0,1570.0,1570.0,6037.0,2.0,...,46784.0,72026.0,2016.0,25242.0,1000.70,6.037135e+13,0.081196,2017-09-19,Central,Single Family Residential
77376,673515,11000655,2.0,2.0,6.0,2.0,1286.0,1286.0,6037.0,2.0,...,70917.0,354621.0,2016.0,283704.0,4478.43,6.037101e+13,0.020615,2017-09-20,Central,Single Family Residential
77377,2968375,17239384,2.0,4.0,,2.0,1612.0,1612.0,6111.0,2.0,...,50683.0,67205.0,2016.0,16522.0,1107.48,6.111008e+13,0.013209,2017-09-21,,Single Family Residential
77378,1843709,12773139,1.0,3.0,4.0,1.0,1032.0,1032.0,6037.0,1.0,...,32797.0,49546.0,2016.0,16749.0,876.43,6.037434e+13,0.037129,2017-09-21,Central,Single Family Residential


In [14]:
df = df[(df.propertycountylandusecode == '0100')|
      (df.propertycountylandusecode == '0101') |
      (df.propertycountylandusecode == '0104') |
       (df.propertycountylandusecode == '122') | 
        (df.propertycountylandusecode == '1111') |
        (df.propertycountylandusecode == '1110') |
        (df.propertycountylandusecode == '1')
        ]

In [17]:
df.fips.value_counts()

6037.0    29538
6059.0    10425
6111.0     3843
Name: fips, dtype: int64

In [16]:
df.shape

(43806, 34)

In [None]:
df

In [19]:
import mitosheet
mitosheet.sheet(df, view_df=True)

MitoWidget(analysis_data_json='{"analysisName": "UUID-c74d8d98-2a49-48f4-beae-bbfc93a2015d", "code": {"imports…

In [None]:
from mitosheet import *; register_analysis('UUID-c74d8d98-2a49-48f4-beae-bbfc93a2015d')
    
# Filtered unitcnt in df
df = df[df['unitcnt'] != 2]


In [20]:
df = df[df['unitcnt'] != 2]

In [23]:
def describe_data(df):
    '''
    This function takes in a pandas dataframe and prints out the shape, datatypes, number of missing values, 
    columns and their data types, summary statistics of numeric columns in the dataframe, as well as the value counts for categorical variables.
    '''
    # Print out the "shape" of our dataframe - rows and columns
    print(f'This dataframe has {df.shape[0]} rows and {df.shape[1]} columns.')
    print('')
    print('--------------------------------------')
    print('--------------------------------------')
    
    # print the datatypes and column names with non-null counts
    print(df.info())
    print('')
    print('--------------------------------------')
    print('--------------------------------------')
    
    
    # print out summary stats for our dataset
    print('Here are the summary statistics of our dataset')
    print(df.describe().applymap(lambda x: f"{x:0.3f}"))
    print('')
    print('--------------------------------------')
    print('--------------------------------------')

    # print the number of missing values per column and the total
    print('Null Values by Column: ')
    missing_total = df.isnull().sum().sum()
    missing_count = df.isnull().sum() # the count of missing values
    value_count = df.isnull().count() # the count of all values
    missing_percentage = round(missing_count / value_count * 100, 2) # percentage of missing values
    missing_df = pd.DataFrame({'count': missing_count, 'percentage': missing_percentage})\
    .sort_values(by='percentage', ascending=False)
    
    print(missing_df.head(50))
    print(f' \n Total Number of Missing Values: {missing_total} \n')
    df_total = df[df.columns[:]].count().sum()
    proportion_of_nulls = round((missing_total / df_total), 4)
    print(f' Proportion of Nulls in Dataframe: {proportion_of_nulls}\n') 
    print('--------------------------------------')
    print('--------------------------------------')
    
    print('Row-by-Row Nulls')
    print(nulls_by_row(df))
    print('----------------------')


    print('Relative Frequencies: \n')
    # Display top 5 values of each variable within reasonable limit
    limit = 25
    for col in df.columns:
        if df[col].nunique() < limit:
            print(f'Column: {col} \n {round(df[col].value_counts(normalize=True).nlargest(5), 3)} \n')
        else: 
            print(f'Column: {col} \n')
            print(f'Range of Values: [{df[col].min()} - {df[col].max()}] \n')
        print('------------------------------------------')
        print('--------------------------------------')
        
        
def nulls_by_col(df):
    '''
    This function  takes in a dataframe of observations and attributes(or columns) and returns a dataframe where each row is an atttribute name, the first column is the 
    number of rows with missing values for that attribute, and the second column is percent of total rows that have missing values for that attribute.
    '''
    num_missing = df.isnull().sum()
    rows = df.shape[0]
    prcnt_miss = (num_missing / rows * 100)
    cols_missing = pd.DataFrame({'num_rows_missing': num_missing, 
                                 'percent_rows_missing': prcnt_miss})\
    .sort_values(by='percent_rows_missing', ascending=False)
    return cols_missing.applymap(lambda x: f"{x:0.1f}")

def nulls_by_row(df):
    '''
    This function takes in a dataframe and returns a dataframe with 3 columns: the number of columns missing, percent of columns missing, 
    and number of rows with n columns missing.
    '''
    num_missing = df.isnull().sum(axis = 1)
    prcnt_miss = (num_missing / df.shape[1] * 100)
    rows_missing = pd.DataFrame({'num_cols_missing': num_missing, 
                                 'percent_cols_missing': prcnt_miss})\
    .reset_index()\
    .groupby(['num_cols_missing', 'percent_cols_missing']).count()\
    .rename(index=str, columns={'index': 'num_rows'}).reset_index().set_index('num_cols_missing')\
    .sort_values(by='percent_cols_missing', ascending=False)
    return rows_missing
    

In [29]:
describe_data(df)

This dataframe has 43793 rows and 34 columns.

--------------------------------------
--------------------------------------
<class 'pandas.core.frame.DataFrame'>
Int64Index: 43793 entries, 1 to 77379
Data columns (total 34 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            43793 non-null  int64  
 1   parcelid                      43793 non-null  int64  
 2   bathroomcnt                   43793 non-null  float64
 3   bedroomcnt                    43793 non-null  float64
 4   buildingqualitytypeid         29441 non-null  float64
 5   calculatedbathnbr             43778 non-null  float64
 6   calculatedfinishedsquarefeet  43793 non-null  float64
 7   finishedsquarefeet12          43667 non-null  float64
 8   fips                          43793 non-null  float64
 9   fullbathcnt                   43778 non-null  float64
 10  heatingorsystemtypeid         29567 non-null  float64

TypeError: '<=' not supported between instances of 'float' and 'str'

In [28]:
df = df['unitcnt'].fillna(1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


In [33]:
df.shape

(43793, 34)

In [34]:
df2 = df.dropna()
# nope, this makes me lose all counties again. 

In [35]:
df2.shape

(28432, 34)

In [None]:
df2

In [37]:
import mitosheet
mitosheet.sheet(df2, view_df=True)

MitoWidget(analysis_data_json='{"analysisName": "UUID-80bf2dd7-f507-437c-955a-e12406f90bed", "code": {"imports…

In [31]:
def impute(df, my_strategy, column_list):
    ''' 
    This function takes in a df, strategy, and column list and
    returns df with listed columns imputed using imputing stratagy
    '''
    # build imputer    
    imputer = SimpleImputer(strategy=my_strategy)  
    # fit/transform selected columns
    df[column_list] = imputer.fit_transform(df[column_list]) 

    return df

strategystr, default=’mean’
The imputation strategy.

If “mean”, then replace missing values using the mean along each column. Can only be used with numeric data.
If “median”, then replace missing values using the median along each column. Can only be used with numeric data.
If “most_frequent”, then replace missing using the most frequent value along each column. Can be used with strings or numeric data. If there is more than one such value, only the smallest is returned.
If “constant”, then replace missing values with fill_value. Can be used with strings or numeric data.

In [None]:
# imputing continuous columns with median value
train = impute(train, 'median', ['yearbuilt','finishedsquarefeet12', 'lotsizesquarefeet', 'structuretaxvaluedollarcnt', 'taxvaluedollarcnt', 'landtaxvaluedollarcnt', 'taxamount'])
# imputing categorical colummns with most frequent
cat_cols = ['calculatedbathnbr','fullbathcnt', 'regionidcity', 'regionidzip',
       'censustractandblock']
train = impute(train, 'most_frequent', 
        cat_cols)

In [38]:
df.drop(columns= ['finishedsquarefeet12', 'fullbathcnt', 'calculatedbathnbr',
                  'propertyzoningdesc', 'unitcnt', 'propertylandusedesc',
                  'assessmentyear', 'roomcnt', 'regionidcounty', 'propertylandusetypeid',
                  'heatingorsystemtypeid', 'id', 'heatingorsystemdesc', 'buildingqualitytypeid',
                 ],
        axis=1, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [40]:
describe_data(df)

This dataframe has 43793 rows and 20 columns.

--------------------------------------
--------------------------------------
<class 'pandas.core.frame.DataFrame'>
Int64Index: 43793 entries, 1 to 77379
Data columns (total 20 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   parcelid                      43793 non-null  int64  
 1   bathroomcnt                   43793 non-null  float64
 2   bedroomcnt                    43793 non-null  float64
 3   calculatedfinishedsquarefeet  43793 non-null  float64
 4   fips                          43793 non-null  float64
 5   latitude                      43793 non-null  float64
 6   longitude                     43793 non-null  float64
 7   lotsizesquarefeet             43793 non-null  float64
 8   propertycountylandusecode     43793 non-null  object 
 9   rawcensustractandblock        43793 non-null  float64
 10  regionidcity                  42922 non-null  float64

In [41]:
df.shape

(43793, 20)

In [42]:
df = df.dropna()

In [44]:
df.shape

(42825, 20)

In [None]:
# Not much of a loss. 

In [50]:
from datetime import date
# encode yearbuilt as age of home in years as an integer
df['age_of_home'] = (date.today().year - df.yearbuilt).astype(int)

AttributeError: 'DataFrame' object has no attribute 'yearbuilt'

In [48]:
df

Unnamed: 0,parcelid,bathroomcnt,bedroomcnt,calculatedfinishedsquarefeet,fips,latitude,longitude,lotsizesquarefeet,propertycountylandusecode,rawcensustractandblock,regionidcity,regionidzip,structuretaxvaluedollarcnt,taxvaluedollarcnt,landtaxvaluedollarcnt,taxamount,censustractandblock,logerror,transactiondate,age
1,17052889,1.0,2.0,1465.0,6111.0,34449266.0,-119281531.0,12647.0,1110,6.111001e+07,13091.0,97099.0,88000.0,464000.0,376000.0,5672.48,6.111001e+13,0.055619,2017-01-01,50
2,14186244,2.0,3.0,1243.0,6059.0,33886168.0,-117823170.0,8432.0,122,6.059022e+07,21412.0,97078.0,85289.0,564778.0,479489.0,6488.30,6.059022e+13,0.005383,2017-01-01,55
3,12177905,3.0,4.0,2376.0,6037.0,34245180.0,-118240722.0,13038.0,0101,6.037300e+07,396551.0,96330.0,108918.0,145143.0,36225.0,1777.51,6.037300e+13,-0.103410,2017-01-01,47
6,12095076,3.0,4.0,2962.0,6037.0,34145202.0,-118179824.0,63000.0,0101,6.037461e+07,47019.0,96293.0,276684.0,773303.0,496619.0,9516.26,6.037461e+13,-0.001011,2017-01-01,67
8,12790562,3.0,4.0,3039.0,6037.0,33960230.0,-118006914.0,20028.0,0100,6.037500e+07,14634.0,96173.0,177527.0,220583.0,43056.0,3104.19,6.037500e+13,-0.040966,2017-01-02,47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77373,10722691,2.0,3.0,1570.0,6037.0,34194943.0,-118629218.0,7499.0,0100,6.037135e+07,12447.0,96342.0,46784.0,72026.0,25242.0,1000.70,6.037135e+13,0.081196,2017-09-19,59
77376,11000655,2.0,2.0,1286.0,6037.0,34245368.0,-118282383.0,47405.0,0100,6.037101e+07,12447.0,96284.0,70917.0,354621.0,283704.0,4478.43,6.037101e+13,0.020615,2017-09-20,77
77377,17239384,2.0,4.0,1612.0,6111.0,34300140.0,-118706327.0,12105.0,1111,6.111008e+07,27110.0,97116.0,50683.0,67205.0,16522.0,1107.48,6.111008e+13,0.013209,2017-09-21,53
77378,12773139,1.0,3.0,1032.0,6037.0,34040895.0,-118038169.0,5074.0,0100,6.037434e+07,36502.0,96480.0,32797.0,49546.0,16749.0,876.43,6.037434e+13,0.037129,2017-09-21,63
