In [2]:
import sys
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re

from scipy.special import expit
from scipy import sparse
from scipy.sparse import csr_matrix

#from sklearn.model_selection import StratifiedKFold
from sklearn.cross_validation import StratifiedKFold

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

from sklearn.cluster import KMeans

from xgboost.sklearn import XGBClassifier
from xgboost.core import DMatrix
from xgboost.training import train, cv




In [3]:
train_file = 'Dataset\\train.json'
test_file = 'Dataset\\test.json'
print('load data', datetime.datetime.now())
df_train = pd.read_json(train_file)
df_test = pd.read_json(test_file)
print(df_train.shape)
print(df_test.shape)
print('load data done', datetime.datetime.now())

target_num_map = {'high':0, 'medium':1, 'low':2}
y_train = np.array(df_train['interest_level'].apply(lambda x: target_num_map[x]))
df_train = df_train.drop(['interest_level'], axis=1)


load data 2017-03-09 14:46:38.612256
(49352, 15)
(74659, 14)
load data done 2017-03-09 14:46:48.804152


In [49]:
def feature_engineering(df_train, df_test, y_train):
    print('feature engineering', datetime.datetime.now())

    #for some reason listing_id improves the score

    #df_train.index = df_train['listing_id']
    #df_train = df_train.drop(['listing_id'], axis=1)

    #df_test.index = df_test['listing_id']
    #df_test = df_test.drop(['listing_id'], axis=1)

    #ignore_Index because use sort_index later.
    df_all = pd.concat((df_train, df_test), axis=0, ignore_index=True)

    ###
    ###date feature
    ###
    df_all['created'] = pd.to_datetime(df_all['created'])
    #year - all 2016
    #df['created_year'] = df['created'].dt.year
    df_all['created_month'] = df_all['created'].dt.month
    df_all['created_day'] = df_all['created'].dt.day
    df_all['created_day_of_year'] = df_all['created'].dt.strftime('%j').astype(int)
    df_all['created_hour'] = df_all['created'].dt.hour
    df_all['created_weekday'] = df_all['created'].dt.weekday
    df_all = df_all.drop(['created'], axis=1)

    '''
    #create_weekday categorical
    ohe = OneHotEncoder(sparse=False)
    df_all_ohe = ohe.fit_transform(df_all.created_weekday.reshape(-1, 1)) 	
    for i in range(df_all_ohe.shape[1]):
        df_all['ohe' + str(i)] = df_all_ohe[:, i]
    df_all = df_all.drop(['created_weekday'], axis=1)
    '''
    ###
    ### numeric features
    ###
    #adjust incorrect x/y
    x_mean = df_all.latitude.mean()
    y_mean = df_all.longitude.mean()

    df_all.loc[df_all.latitude < x_mean - 5, 'latitude'] = x_mean - 5
    df_all.loc[df_all.latitude > x_mean + 5, 'latitude'] = x_mean + 5
    df_all.loc[df_all.longitude < y_mean - 5, 'longitude'] = y_mean - 5
    df_all.loc[df_all.longitude > y_mean + 5, 'longitude'] = y_mean + 5

    '''
    #adjust incorrect x/y by percentile
    percentile = 0.1
    llimit = np.percentile(df_all.latitude.values, percentile)
    ulimit = np.percentile(df_all.latitude.values, 100 - percentile)
    df_all.loc[df_all['latitude']<llimit, 'latitude'] = llimit
    df_all.loc[df_all['latitude']>ulimit, 'latitude'] = ulimit
    llimit = np.percentile(df_all.longitude.values, percentile)
    ulimit = np.percentile(df_all.longitude.values, 100 - percentile)
    df_all.loc[df_all['longitude']<llimit, 'longitude'] = llimit
    df_all.loc[df_all['longitude']>ulimit, 'longitude'] = ulimit
    '''

    #log x/y
    df_all['logx'] = np.log(df_all['latitude'])
    df_all['logy'] = np.log(df_all['longitude'] + 100)

    #radius
    df_all['radius'] = np.log((df_all.latitude - x_mean)*(df_all.latitude - x_mean) + (df_all.longitude - y_mean)*(df_all.longitude - y_mean))

    #price
    #df_all.loc[df_all['price'] > 100000, 'price'] = 100000

    #log price
    #df_all['logprice'] = np.log(df_all.price)

    df_all["price_per_bed"] = df_all["price"]/df_all["bedrooms"] 
    df_all["room_dif"] = df_all["bedrooms"] - df_all["bathrooms"] 
    df_all["room_sum"] = df_all["bedrooms"] + df_all["bathrooms"] 
    df_all["price_per_room"] = df_all["price"]/df_all["room_sum"]

    df_all["photos_count"] = df_all["photos"].apply(len)
    df_all = df_all.drop(['photos'], axis=1)
    
    ###
    ###zones
    ###
    n_zones = 140
    x_min = df_all.logx.mean() - 0.004
    x_max = df_all.logx.mean() + 0.003
    y_min = df_all.logy.mean() - 0.003
    y_max = df_all.logy.mean() + 0.003

    df_all2 = df_all[(df_all.logx >= x_min) & (df_all.logx <= x_max) & (df_all.logy >= y_min) & (df_all.logy <= y_max)]
    kmeans = KMeans(n_clusters=n_zones, random_state=0).fit(df_all2[['logx', 'logy']])

    print('zones', df_all.shape)

    for i in range(n_zones):
        df_all['zone' + str(i)] = 0
        df_all.loc[df_all2.logx[kmeans.labels_ == i].index, 'zone' + str(i)] = 1

    print('zones', df_all.shape)

    ###
    ###description
    ###
    df_all['description'] = df_all['description'].apply(lambda x: cleaning_text(x))
    df_all["description_words_count"] = df_all["description"].apply(lambda x: 0 if len(x) == 0 else len(x.split(" ")))
    df_all['description_uppercase'] = df_all['description'].apply(lambda x: 1 if len(re.findall(r'[A-Z]', x))/(len(x) + 1) > 0.5 else 0)
    df_all['description'] = df_all['description'].apply(lambda x: x.lower())

    '''
    n_features2 = 100
    tfidf2 = CountVectorizer(stop_words='english', max_features=n_features2)
    tr_sparse2 = tfidf2.fit_transform(df_all[:df_train.shape[0]]['description'])
    te_sparse2 = tfidf2.transform(df_all[df_train.shape[0]:]['description'])
    '''
    #df_all = df_all.drop(['description'], axis=1)

    ###
    ### features
    ###
    df_all["features_count"] = df_all["features"].apply(len)

    '''
    n_features = 2000
    df_all['features'] = df_all['features'].apply(lambda x: cleaning_list(x))
    df_all['features'] = df_all['features'].apply(lambda x: " ".join(["_".join(i.split(" ")) for i in x]))
    df_all['features'] = df_all['features'].apply(lambda x: x.lower())
    tfidf = CountVectorizer(stop_words='english', max_features=n_features)
    tr_sparse = tfidf.fit_transform(df_all[:df_train.shape[0]]['features'])
    te_sparse = tfidf.transform(df_all[df_train.shape[0]:]['features'])

    df_all = df_all.drop(['features'], axis=1)
    '''
    
    ###
    ###display and street address
    ###
    df_all['display_address'] = df_all.display_address.str.replace('Avenue', '')
    df_all['display_address'] = df_all.display_address.str.replace(' Ave', '')
    df_all['display_address'] = df_all.display_address.str.replace('Street', '')
    df_all['display_address'] = df_all.display_address.str.replace('St.', '')
    df_all['display_address'] = df_all.display_address.str.replace(' St', '')
    df_all['display_address'] = df_all.display_address.str.rstrip()

    df_all['street_address'] = df_all.street_address.str.replace('Avenue', '')
    df_all['street_address'] = df_all.street_address.str.replace(' Ave', '')
    df_all['street_address'] = df_all.street_address.str.replace('Street', '')
    df_all['street_address'] = df_all.street_address.str.replace('St.', '')
    df_all['street_address'] = df_all.street_address.str.replace(' St', '')
    df_all['street_address'] = df_all.street_address.str.rstrip()

    #keep only the first int from street_address - not a good idea, just the number without street is useless
    #df_all['street_address'] = df_all.street_address.apply(lambda x: x.split(" ")[0])

    ###
    ###categorical features
    ###
    #cannot make them ohe - too many distinct values
    ohe_features = ['building_id', 'display_address', 'manager_id', 'street_address'] 
    for f in ohe_features: 
        le = LabelEncoder() 
        df_all[f] = le.fit_transform(df_all[f]) 

    ###
    ###building_id, manager_id
    ###
    value_counts = df_all['building_id'].value_counts()
    df_all = pd.merge(df_all, pd.DataFrame(value_counts), left_on='building_id', right_index=True).sort_index()
    df_all = df_all.drop(['building_id_x'], axis=1)    
    df_all.loc[df_all.building_id == 0, 'building_id_y'] = 0

    value_counts = df_all['manager_id'].value_counts()
    df_all = pd.merge(df_all, pd.DataFrame(value_counts), left_on='manager_id', right_index=True).sort_index()
    df_all = df_all.drop(['manager_id_x'], axis=1)    
    df_all.loc[df_all.manager_id == 0, 'manager_id_y'] = 0

    print(df_all.shape)

    #done
    X_train = df_all[:df_train.shape[0]]
    X_test = df_all[df_train.shape[0]:]

    #X_train = pd.concat((X_train, pd.DataFrame(tr_sparse.todense())), axis=1)
    #X_test = pd.concat((X_test, pd.DataFrame(te_sparse.todense())), axis=1)

    #X_train = csr_matrix(np.hstack([X_train, tr_sparse.todense()]))
    #X_test = csr_matrix(np.hstack([X_test, te_sparse.todense()]))
    #X_train = csr_matrix(np.hstack([X_train, tr_sparse.todense(), tr_sparse2.todense()]))
    #X_test = csr_matrix(np.hstack([X_test, te_sparse.todense(), te_sparse2.todense()]))

    print('Train', X_train.shape)
    print('Test', X_test.shape)

    print('feature engineering done', datetime.datetime.now())
    return X_train, X_test



In [7]:
ENGLISH_STOP_WORDS = frozenset([
    "a", "about", "above", "across", "after", "afterwards", "again", "against",
    "all", "almost", "alone", "along", "already", "also", "although", "always",
    "am", "among", "amongst", "amoungst", "amount", "an", "and", "another",
    "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are",
    "around", "as", "at", "back", "be", "became", "because", "become",
    "becomes", "becoming", "been", "before", "beforehand", "behind", "being",
    "below", "beside", "besides", "between", "beyond", "bill", "both",
    "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "con",
    "could", "couldnt", "cry", "de", "describe", "detail", "do", "done",
    "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else",
    "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone",
    "everything", "everywhere", "except", "few", "fifteen", "fifty", "fill",
    "find", "fire", "first", "five", "for", "former", "formerly", "forty",
    "found", "four", "from", "front", "full", "further", "get", "give", "go",
    "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter",
    "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his",
    "how", "however", "hundred", "i", "ie", "if", "in", "inc", "indeed",
    "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter",
    "latterly", "least", "less", "ltd", "made", "many", "may", "me",
    "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly",
    "move", "much", "must", "my", "myself", "name", "namely", "neither",
    "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone",
    "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
    "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our",
    "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps",
    "please", "put", "rather", "re", "same", "see", "seem", "seemed",
    "seeming", "seems", "serious", "several", "she", "should", "show", "side",
    "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone",
    "something", "sometime", "sometimes", "somewhere", "still", "such",
    "system", "take", "ten", "than", "that", "the", "their", "them",
    "themselves", "then", "thence", "there", "thereafter", "thereby",
    "therefore", "therein", "thereupon", "these", "they", "thick", "thin",
    "third", "this", "those", "though", "three", "through", "throughout",
    "thru", "thus", "to", "together", "too", "top", "toward", "towards",
    "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us",
    "very", "via", "was", "we", "well", "were", "what", "whatever", "when",
    "whence", "whenever", "where", "whereafter", "whereas", "whereby",
    "wherein", "whereupon", "wherever", "whether", "which", "while", "whither",
    "who", "whoever", "whole", "whom", "whose", "why", "will", "with",
    "within", "without", "would", "yet", "you", "your", "yours", "yourself",
    "yourselves"])

In [75]:
def cleaning_text(text):
    #sentence=sentence.lower()
    text = text.replace('<p><a  website_redacted', '')
    text = text.replace('!<br /><br />', '')
    text = text.replace('kagglemanager renthop com', '')
    text = re.sub('[^\w\s]',' ', text) #removes punctuations
    text = re.sub('\d+',' ', text) #removes digits
    text =' '.join([w for w in text.split() if not w in ENGLISH_STOP_WORDS]) # removes english stopwords
    #text=' '.join([w for w , pos in pos_tag(text.split()) if (pos == 'NN' or pos=='JJ' or pos=='JJR' or pos=='JJS' )])
    #selecting only nouns and adjectives
    text =' '.join([w for w in text.split() if not len(w)<=2 ]) #removes single lettered words and digits
    text = text.strip()
    return text

def cleaning_text2(text):
    text = re.sub('[^\w\s]',' ', text) #removes punctuations
    #text = re.sub('\d+',' ', text) #removes digits
    #text =' '.join([w for w in text.split() if not len(w)<=2 ]) #removes single lettered words and digits
    text = text.strip()
    return text

def cleaning_list(list):
    return [cleaning_text2(x) for x in list]
    #return map(cleaning_text, list)


In [50]:
X_train, X_test = feature_engineering(df_train, df_test, y_train)

feature engineering 2017-03-09 21:11:43.385234
zones (124011, 25)
zones (124011, 165)
(124011, 170)
Train (49352, 170)
Test (74659, 170)
feature engineering done 2017-03-09 21:12:44.771078


In [51]:
X_train.description

0        brand new bedroom bath apartmentenjoy these fo...
1                                                         
2        top top west village location beautiful pre wa...
3        building amenities garage garden fitness room ...
4        beautifully renovated bedroom flex bedroom apa...
5                                                         
6        stunning unit great location lots natural ligh...
7        this huge sunny plenty lights bed bath offers ...
8                                                         
9        this spacious bedroom bedroom able fit queen s...
10       new market spacious studio located maintained ...
11       check bedroom apartment great location washing...
12       low fee beautiful cherry oak wooden floorsthe ...
13       lincoln square premier service buildings locat...
14       spacious bedroom fit king sized bed comfortabl...
15       stunning renovated studio unit high ceilings s...
16       east village great community great people offe.

In [76]:
df_all = pd.concat((X_train, X_test), axis=0, ignore_index=True)
df_all['features2'] = df_all['features'].apply(lambda x: cleaning_list(x))
df_all['features2'] = df_all['features2'].apply(lambda x: " ".join(["_".join(i.split(" ")) for i in x]))
#df_all['features2'] = df_all['features2'].apply(lambda x: x.lower())
df_all.features2

0                                                          
1         Doorman Elevator Fitness_Center Cats_Allowed D...
2         Laundry_In_Building Dishwasher Hardwood_Floors...
3                                    Hardwood_Floors No_Fee
4                                                   Pre_War
5                                                          
6         prewar elevator Dogs_Allowed Cats_Allowed LOWR...
7         Doorman Elevator Pre_War Terrace Laundry_in_Un...
8         Cats_Allowed Dogs_Allowed Elevator Laundry_In_...
9                                Dishwasher Hardwood_Floors
10                                                         
11        prewar dishwasher HIGHRISE ROOFDECK EAT_IN_KIT...
12              Doorman Elevator Laundry_in_Building No_Fee
13        Swimming_Pool Doorman Fitness_Center No_Fee Do...
14        Elevator Multi_Level Laundry_in_Building Dishw...
15        Doorman Elevator Fitness_Center Laundry_in_Bui...
16                                      

In [77]:
tfidf = CountVectorizer(stop_words='english', max_features=2000)
tr_sparse = tfidf.fit_transform(df_all[:df_train.shape[0]]['features2'])
print(len(tfidf.get_feature_names()))
tfidf.get_feature_names()

1230


['000_sf_fitness_center',
 '000_sq__ft__landscaped_terrace_complete_with_an_organic_garden',
 '000_square_foot_exclusive_courtyard_oasis_for_via_residents',
 '000_square_foot_sun_deck__free_wifi_in_marc_club',
 '1000__move_in_visa_giftcard_will_be_handed_to_new_tenants_upon_rental_lease_signing',
 '1100_sq_ft',
 '1200sq_ft',
 '12th_st___3rd_ave',
 '1500__move_in_visa_giftcard_will_be_handed_to_new_tenants_upon_rental_lease_signing',
 '1_2_bath',
 '1_2_month_fee',
 '1_5_bath',
 '1_5_bathrooms',
 '1_5_baths',
 '1_5_marble_baths',
 '1_br_or_conv_2',
 '1_mo_broker_s_fee_18_mo_lease',
 '1_month_free',
 '1_month_free_rent',
 '1st',
 '22',
 '24_7_concierge',
 '24_7_doorman',
 '24_7_doorman_concierge',
 '24_7_fitness_center',
 '24_7_full_time_doorman_concierge',
 '24_hour_attended_lobby',
 '24_hour_concierge',
 '24_hour_concierge_and_doorman',
 '24_hour_doorman',
 '24_hour_doorman_concierge',
 '24_hour_parking_garage_with_an_electric_vehicle_charging_station',
 '24_hr_concierge',
 '24_hr_doorm

1649


['000',
 '000_sf_fitness_center',
 '000_sq',
 '000_square_foot_exclusive_courtyard_oasis_for_via_residents',
 '01',
 '04',
 '0862',
 '1000',
 '1100_sq_ft',
 '1200sq_ft',
 '12_month_leases_allowed',
 '12th_st_',
 '15',
 '1500',
 '16',
 '16______swimming_pool_________dishwasher',
 '16_____firepalce',
 '16____dishwasher',
 '1_bed',
 '1_br_or_conv_2',
 '1_mo_broker',
 '1_month_free',
 '1_month_free_rent',
 '1ba',
 '1st',
 '22',
 '24',
 '2450',
 '24_hour_attended_lobby',
 '24_hour_doorman',
 '24_hour_doorman_concierge',
 '24_hr_concierge',
 '24_hr_doorman',
 '24hr_doorman',
 '24hr_doormen',
 '24hr_white',
 '24th_st_',
 '250_application_fee_with_background_and_credit_check',
 '250_deposit_with_approved_credit',
 '250_security_deposit',
 '25lb_weight_limit_on_pet',
 '2_bath',
 '2_bathrooms',
 '2_beautifully_furnished_outside_sundecks',
 '2_bedroon_convertible',
 '2_blk_to_bedford_l_',
 '2_block_away',
 '2_blocks_away_from_union_square',
 '2_full_bathrooms',
 '2_full_baths',
 '2_month_fee',
 '

In [33]:
np.random.seed(0)

folds = StratifiedKFold(y_train, n_folds=5, shuffle=True)
for train_index, test_index in folds:
    X_train2, X_test2 = X_train.loc[train_index], X_train.loc[test_index]
    y_train2, y_test2 = y_train[train_index], y_train[test_index]
    break


In [452]:
early_stopping_rounds = 100
learning_rate, max_depth, ss, cs, gamma, min_child_weight, reg_lambda, reg_alpha = 0.1, 6, 0.7, 0.7, 0, 1, 1, 0
#learning_rate, max_depth, ss, cs, gamma, min_child_weight, reg_lambda, reg_alpha = 0.1, 4, 0.8, 0.8, 0, 1, 1, 0
clf = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=5000, objective='multi:softprob', subsample=ss, colsample_bytree=cs, gamma=gamma, min_child_weight=min_child_weight, reg_lambda=reg_lambda, reg_alpha=reg_alpha)

X_train2 = csr_matrix(X_train2.values)
X_test2 = csr_matrix(X_test2.values)

clf.fit(X_train2, y_train2, eval_set=[(X_test2, y_test2)], eval_metric='mlogloss', early_stopping_rounds=early_stopping_rounds, verbose=100)
#[358]	validation_0-mlogloss:0.571967

Will train until validation_0 error hasn't decreased in 100 rounds.
[0]	validation_0-mlogloss:1.037728
[100]	validation_0-mlogloss:0.572477
[200]	validation_0-mlogloss:0.563403
[300]	validation_0-mlogloss:0.563054
Stopping. Best iteration:
[250]	validation_0-mlogloss:0.562200



XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.7,
       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=6,
       min_child_weight=1, missing=None, n_estimators=5000, nthread=-1,
       objective='multi:softprob', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=0.7)

In [35]:
clf = RandomForestClassifier(n_estimators=1000)
clf.fit(X_train2, y_train2)
y_pred = clf.predict_proba(X_test2)
log_loss(y_test2, y_pred)
#0.60083096265660485

0.59390945436383891

In [34]:
for x in X_train2.columns:
    inf = (X_train2[x] == np.inf).sum()
    if inf > 0:
        print(x, inf)

In [31]:
X_train.loc[X_train.price_per_bed == np.inf, 'price_per_bed'] = 1000000
X_train.loc[X_train.price_per_room == np.inf, 'price_per_room'] = 1000000

In [451]:
temp = pd.concat([X_train2.manager_id, pd.get_dummies(y_train)], axis = 1).groupby('manager_id').mean()
temp.columns = ['high_frac', 'medium_frac', 'low_frac']
temp['manager_listings'] = X_train2.groupby('manager_id').count().iloc[:,1]
temp['manager_skill'] = temp['high_frac']*2 + temp['medium_frac']

unranked_managers_ixes = temp['manager_listings'] < 20
ranked_managers_ixes = ~unranked_managers_ixes
mean_values = temp.loc[ranked_managers_ixes, ['high_frac', 'medium_frac', 'low_frac', 'manager_skill']].mean()
temp.loc[unranked_managers_ixes, ['high_frac', 'medium_frac', 'low_frac', 'manager_skill']] = mean_values.values

temp = temp['manager_skill']

#join
X_train2 = X_train2.merge(temp.reset_index(), how='left', left_on='manager_id', right_on='manager_id')

X_test2 = X_test2.merge(temp.reset_index(), how='left', left_on='manager_id', right_on='manager_id')
new_manager_ixes = X_test2['manager_skill'].isnull()
X_test2.loc[new_manager_ixes, 'manager_skill'] = mean_values['manager_skill']

In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss

In [453]:
X_train2

<39481x22 sparse matrix of type '<class 'numpy.float64'>'
	with 806988 stored elements in Compressed Sparse Row format>

In [439]:
X_train2

Unnamed: 0,bathrooms,bedrooms,building_id,display_address,latitude,listing_id,longitude,manager_id,price,street_address,...,created_day_of_year,created_hour,created_weekday,logx,logy,radius,photos_count,description_words_count,description_uppercase,manager_skill
0,1.5,3,3797,10410,40.7145,7211212,-73.9425,1568,3000,19652,...,176,7,4,3.706584,3.260306,-7.446341,5,65,0,0.253333
1,1.0,2,8986,8261,40.7947,7150865,-73.9667,1988,5465,19804,...,164,12,6,3.708552,3.259376,-5.639716,11,0,0,0.013333
2,1.0,1,8889,11414,40.7388,6887163,-74.0018,3733,2850,8190,...,108,3,6,3.707181,3.258027,-5.898510,8,60,0,0.495327
3,1.0,1,1848,9354,40.7539,6888711,-73.9677,282,3275,11851,...,109,2,0,3.707552,3.259338,-7.421907,3,38,0,0.301370
4,1.0,4,0,12263,40.8241,6934781,-73.9493,2618,3350,16045,...,119,1,3,3.709273,3.260045,-4.896160,3,47,0,0.413298
5,2.0,4,2544,12352,40.7429,6894514,-74.0028,3081,7995,12696,...,110,4,1,3.707282,3.257989,-5.851502,5,0,0,0.127907
7,2.0,1,169,12371,40.7427,6867392,-73.9957,3959,5645,3487,...,104,6,2,3.707277,3.258262,-6.134814,5,99,0,0.742857
8,1.0,1,7635,9943,40.8234,6898799,-73.9457,3339,1725,18260,...,111,2,2,3.709255,3.260183,-4.910527,5,0,0,0.413298
9,2.0,4,0,6009,40.7278,6814332,-73.9808,633,5800,16669,...,93,2,5,3.706911,3.258835,-6.830186,9,92,0,0.000000
10,1.0,0,0,12719,40.7769,6869199,-73.9467,3901,1950,4148,...,105,1,3,3.708116,3.260144,-6.470295,1,15,0,0.413298
