In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import json

matches_df = pd.read_csv('train/matches_train.csv')
foursq_df = pd.read_json('train/foursquare_train.json')
locu_df = pd.read_json('train/locu_train.json')

In [28]:
def crossjoin(df1, df2, **kwargs):
    # Cross join the two org's data to form a dataframe of all possible pairs
    df1['_tmp'] = 1
    df2['_tmp'] = 1
    joined = pd.merge(df1, df2, on=['_tmp'], **kwargs).drop('_tmp', axis=1)
    return joined

train_df = crossjoin(foursq_df, locu_df, suffixes=['_foursq', '_locu'])

matches_df.columns = ['id_locu', 'id_foursq']
matches_df['matched'] = 1
train_df = train_df.merge(matches_df, how='left', left_on=['id_locu', 'id_foursq'], right_on=['id_locu', 'id_foursq'])
train_df['matched'].fillna(0, inplace=True)

Unnamed: 0,country_foursq,id_foursq,latitude_foursq,locality_foursq,longitude_foursq,name_foursq,phone_foursq,postal_code_foursq,region_foursq,street_address_foursq,...,latitude_locu,locality_locu,longitude_locu,name_locu,phone_locu,postal_code_locu,region_locu,street_address_locu,website_locu,matched
0,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.739822,New York,-73.985144,Chipotle Mexican Grill,2126736904,10010,NY,125 East 23rd St.,http://www.chipotle.com/,
1,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.810765,New York,-73.952591,Honey Salon Inc,2126630100,10026,NY,174 Saint Nicholas Ave.,,
2,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.743580,New York,-73.986127,Palatte,6464763812,10016,NY,66 Madison Ave.,http://www.palattenyc.com/,
3,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.828820,New York,-73.949022,Best Taste Restaurant,2122815691,10031,NY,3609 Broadway,,
4,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.749936,New York,-73.983849,Integra Hair System Inc,2125636786,10018,NY,11 W. 36th St. # 3,http://www.integrahair.com/,
5,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.788494,New York,-73.943786,Halal JFK Fried Chicken Pizza,2123692514,10029,NY,1998 2nd Ave.,http://halaljfkchickenpizza.com/,
6,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.815148,New York,-73.939791,Hong Kong Foid,2129265689,10037,NY,527-09 Lenox Ave.,,
7,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.797416,New York,-73.937459,AAA Laundry,6466720202,10035,NY,2272 2nd Ave.,,
8,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.722915,New York,-73.998114,Whiskey Friday,,10012,NY,Spring St. & Crosby St.,,
9,United States,4f328ea619836c91c7e3714a,40.794855,New York,-73.966069,Chen Jin Diao Restaurant,(212) 678-4680,,NY,800 Columbus Ave.,...,40.755041,New York,-73.977646,Stauback Co,2127105250,10002,NY,366 Madison Ave.,,


In [85]:
def prec_rec_f1(feature, df, mask):
    '''
    Utility function for evaluating the precision/recall/f1 of a
    feature matching function.
    Args:
        - feature - function that takes the dataframe as an input and
                    returns a numpy array of 1s and 0s binary labels
        - df      - the dataframe to evaluate the feature function on
        - mask    - the indices for positive labels
    Returns:
        - prec    - precision
        - rec     - recall
        - f1      - f1-score
    '''
    preds = feature(df)
    
    pos = preds.loc[mask]
    pos_preds = mask.loc[preds]
    prec = np.mean(pos_preds)
    rec = np.mean(pos)
    f1 = 2 / (1 / prec + 1 / rec)
    return prec, rec, f1

In [482]:
import re

y = train_df['matched'] == 1

stop_words = ['restaurant', 'inc', 'cafe', 'bakery', 'and', 'the', 'of']

def remove_words(s, words):
    return ' '.join([token for token in s.split() if s not in words])

def remove_punc(s):
    return re.sub('[.,&#\'()]', '', s)

def norm_name(name, default=''):
    # Normalize a location's name to get matches
    if not name:
        return default
    
    name = str(name).lower()
    name = remove_punc(name)
    name = remove_words(name, stop_words)
    return name

def norm_address(a, default='None'):
    if not a:
        return default
    a = str(a).lower()
    a = a.replace('east', 'e')
    a = a.replace('west', 'w')
    a = a.replace('south', 's')
    a = a.replace('north', 'n')
    a = a.replace('square', 'sq')
    a = a.replace('th ', ' ')
    a = a.replace('st ', ' ')
    a = a.replace('st. ', ' ')
    a = a.replace(' One ', ' 1 ')
    a = a.replace(' Two ', ' 2 ')
    a = a.replace(' Three ', ' 3 ')
    a = a.replace('.', '')
    a = a.replace(',', '')
    return a

def unigrams(name):
    return norm_name(name).split()

def lat_long_match(df):
    # Latitude and longitudes differ in decimal places for foursquare and locu
    # Errors in coordinates and misisng values also reduce recall
    def norm_coord(coord):
        return round(coord, 4)
    return (df['latitude_foursq'].apply(norm_coord) == df['latitude_locu'].apply(norm_coord)) \
         & (df['longitude_foursq'].apply(norm_coord) == df['longitude_locu'].apply(norm_coord))

def phone_num_match(df):
    def norm_foursq_phone(p, default='None'):
        if not p:
            return default
        p = str(p).replace('(', '')
        p = p.replace(')', '')
        p = p.replace(' ', '')
        p = p.replace('-', '')
        return p
    
    norm_phone = df['phone_foursq'].apply(lambda p: norm_foursq_phone(p, default='None_foursq'))
    return norm_phone == df['phone_locu'].apply(lambda p: norm_foursq_phone(p, default='None_locu'))

def address_match(df):
    def norm(a, default='None'):
        if not a:
            return default
        a = str(a).lower()
        a = a.replace('east', 'e')
        a = a.replace('west', 'w')
        a = a.replace('south', 's')
        a = a.replace('north', 'n')
        a = a.replace('square', 'sq')
        a = a.replace('th ', ' ')
        a = a.replace('st ', ' ')
        a = a.replace('st. ', ' ')
        a = a.replace(' One ', ' 1 ')
        a = a.replace(' Two ', ' 2 ')
        a = a.replace(' Three ', ' 3 ')
        a = a.replace('.', '')
        a = a.replace(',', '')
        return a
    
    return df['street_address_foursq'].apply(lambda a: norm(a, default='None_foursq')) \
        == df['street_address_locu'].apply(lambda a: norm(a, default='None_locu'))

def name_match(df):
    return df['name_foursq'].apply(lambda a: norm_name(a, default='None_foursq')) \
        == df['name_locu'].apply(lambda a: norm_name(a, default='None_locu'))

In [477]:
print(prec_rec_f1(lat_long_match, train_df, mask))
print(prec_rec_f1(phone_num_match, train_df, mask))
print(prec_rec_f1(address_match, train_df, mask))
print(prec_rec_f1(name_match, train_df, mask))

(0.9571428571428572, 0.5583333333333333, 0.7052631578947369)
(0.9912663755458515, 0.6305555555555555, 0.7707979626485567)
(0.9635761589403974, 0.8083333333333333, 0.879154078549849)
(0.7857142857142857, 0.7944444444444444, 0.7900552486187845)


In [236]:
def errors(df, y_true, y_pred, cols, type='false_pos'):
    # Utility function for viewing classification errors
    if type == 'false_pos':
        return df.loc[~y_true & y_pred][cols]
    elif type == 'false_neg':
        return df.loc[y_true & ~y_pred][cols]

errors(train_df, y, agg_feature(train_df), ['name_foursq', 'name_locu'], type='false_pos').head(50)

Unnamed: 0,name_foursq,name_locu
6199,Neighborhood Gourmet Eatery,Da Mikele
10564,Tasti D-Lite,Tasti D-Lite
15069,Delightful Restaurant,Dunkin Donuts
16034,Z Lounge,Hizen Japanese Restaurant
34402,Starbucks,Starbucks
34413,Starbucks,Starbucks
34548,Starbucks,Starbucks
34707,Starbucks,Starbucks
34718,Starbucks,Starbucks
51436,Mike's Pizzeria & Cafe,Keens Steakhouse


In [191]:
# Play around with approximate matching for names
def unigram_overlap(df):
    def overlap(row):
        grams1 = set(unigrams(row['name_foursq']))
        grams2 = set(unigrams(row['name_locu']))
        return len(grams1.intersection(grams2)) / float(min([len(grams1), len(grams2)]))
    return df.apply(overlap, axis=1)

# overlaps = unigram_overlap(train_df)

def overlap_match(df):
    return overlaps > .5
    
def agg_feature(df):
    return lat_long_match(df) | address_match(df)

print(prec_rec_f1(overlap_match, train_df, mask))
print(prec_rec_f1(agg_feature, train_df, mask))
print(prec_rec_f1(name_match, train_df, mask))

(0.6148148148148148, 0.9222222222222223, 0.7377777777777778)
(0.7862068965517242, 0.95, 0.8603773584905661)
(0.7857142857142857, 0.7944444444444444, 0.7900552486187845)


In [187]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

vectorizer = CountVectorizer(ngram_range=(1, 1), analyzer='word')
counts = vectorizer.fit(train_df['name_foursq'].apply(lambda n: norm_name(n, default='None_foursq')))

In [221]:
from collections import Counter
from fuzzywuzzy import fuzz

all_names = foursq_df['name'].append(locu_df['name'])
all_names.apply(lambda n: norm_name(n, default='None'))
name_counts = Counter(all_names)
brand_names = [name for name in name_counts if name_counts[name] > 2]

def name_match_no_brands(df):
    normed1 = df['name_foursq'].apply(lambda a: norm_name(a, default='None_foursq')) 
    normed2 = df['name_locu'].apply(lambda a: norm_name(a, default='None_locu'))
    return ((normed1 == normed2)
        & pd.Series([name not in brand_names for name in df['name_foursq']])
        & pd.Series([name not in brand_names for name in df['name_locu']]))

print(prec_rec_f1(name_match_no_brands, train_df, mask))

(0.9961977186311787, 0.7277777777777777, 0.8410914927768861)


In [209]:
def agg_feature2(df):
    return lat_long_match(df) | address_match(df) | name_match_no_brands(df)

print(prec_rec_f1(agg_feature2, train_df, mask))

(0.9549295774647887, 0.9416666666666667, 0.9482517482517482)


In [302]:
'''Continuous Features'''
def name_fuzzy(df):
    fuzzy_ratio = df.apply(lambda row: fuzz.ratio(norm_name(row['name_foursq'], default='None_foursq'),  
                                                  norm_name(row['name_locu'], default='None_locu')), axis=1)
    return fuzzy_ratio

def addr_fuzzy(df):
    fuzzy_ratio = df.apply(lambda row: fuzz.ratio(row['street_address_foursq'] or 'None_foursq',  
                                                  row['street_address_locu'] or 'None_locu', axis=1))
    return fuzzy_ratio

def lat_long_continuous(df):
    # Continuous euclidean difference between lat, long pairs
    # Some matches that were missed in the predictions had close but not exactly the same lat, long
    def norm_coord(coord):
        return round(coord, 4) if not np.isnan(coord) else -1
    
    def euclid_dist(row):
        return np.sqrt((norm_coord(row['latitude_foursq']) 
                        - norm_coord(row['latitude_locu']))**2 
                       + (norm_coord(row['longitude_foursq']) 
                        - norm_coord(row['longitude_locu']))**2)
    
    return df.apply(euclid_dist, axis=1)

'''Creating the feature matrix'''

def featurize(df, *features):
    feat_df = pd.DataFrame({f.__name__: f(df) for f in features})
    return feat_df

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

df_train = featurize(train_df, 
                     lat_long_continuous, 
                     address_match, 
                     name_match_no_brands, 
                     name_fuzzy, 
                     addr_fuzzy,
                     phone_num_match)
clf = LogisticRegression()
clf.fit(df_train, y)

preds = clf.predict(df_train)
print(f1_score(y, preds))

0.9730496453900709


In [297]:
fnegs = errors(train_df, y, preds, type='false_neg', 
                cols=['name_foursq', 
                'name_locu', 
                'street_address_foursq', 
                'street_address_locu', 
                'latitude_foursq', 
                'latitude_locu',
                'phone_foursq',
                'phone_locu'])
fnegs

Unnamed: 0,name_foursq,name_locu,street_address_foursq,street_address_locu,latitude_foursq,latitude_locu,phone_foursq,phone_locu
18352,Via Brasil,Brasilla Restaurant,34 W. 46th St.,7 West 45th St.,40.756776,40.755802,(212) 997-1158,2129971158
25454,Littletown,Little Town,,366 W. 46th St.,40.734887,40.760559,,2129792337
34547,Starbucks,Starbucks,,1345 6th Ave.,40.730007,40.76201,,2122658610
70297,Halel Cart,Exhale Spa,,150 Central Park South,40.75798,40.766195,,2125617400
89584,Azul Bistro,Azul Argentine Bistro,152 Stanton St.,152 Stanton St.,40.720513,40.720648,(212) 477-9463,6466022004
109996,Rubirosa at The Westway,The Westway,75 Ckarkson,75 Clarkson St.,40.729706,40.729866,,2126200101
123247,Spring Social Running Club,Tsung Sun Social Club,,11 Division St.,40.772649,40.713998,,2122269414
150568,Fresh Bagels,Pick A Bagel,,360 W. 42nd St.,40.756361,40.758207,,2127928008
217889,Greenwich Village Bar,Greenwich Village Bistro,"Jfk International Airport, Delta Terminal, Bui...",John F. Kennedy International Airport,40.640255,40.643772,,7187512890
245136,Hang Japanese Restaurant,Hana Japanese Cuisine,111 Rivington,111 Rivington St.,40.719928,40.719928,,2123889688


In [298]:
df_train.ix[fnegs.index]

Unnamed: 0,addr_fuzzy,address_match,lat_long_continuous,name_fuzzy,name_match_no_brands,phone_num_match
18352,69,False,0.001166,41,False,True
25454,0,False,0.025756,95,False,False
34547,8,False,0.034428,100,False,False
70297,18,False,0.008877,60,False,False
89584,100,True,0.000224,69,False,False
109996,77,False,0.000283,65,False,False
123247,15,False,0.062149,64,False,False
150568,8,False,0.025065,50,False,False
217889,51,False,0.006435,89,False,False
245136,87,False,0.0,67,False,False


In [299]:
clf.coef_

array([[ 0.0107074 ,  4.80855941, -1.35100731,  0.09763664,  4.33721611,
         4.65599235]])

In [461]:
'''
First pass on test data using Logistic Regression.
'''
foursq_test = pd.read_json('online_competition/foursquare_test.json')
locu_test = pd.read_json('online_competition/locu_test.json')

test_df = crossjoin(foursq_test, locu_test, suffixes=['_foursq', '_locu'])

features_test = featurize(test_df,
                          lat_long_continuous,
                          address_match,
                          name_match_no_brands,
                          name_fuzzy,
                          addr_fuzzy,
                          phone_num_match)

print(test_df.shape)
preds_test = clf.predict(features_test)

matches_test = test_df.loc[preds_test][['id_locu', 'id_foursq']]
matches_test.columns = ['locu_id', 'foursquare_id']
matches_test.to_csv('matches_test.csv', index=False)

(160000, 22)


In [342]:
import lightgbm as lgb

'''
Training performance using lightGBM (Gradient boosted trees)
'''

d_train = lgb.Dataset(df_train, label=y)
params = {}

clf_lgb = lgb.train(params, d_train, 100)

preds_lgb = clf_lgb.predict(features_test)
preds_lgb = preds_lgb > .35

print(prec_rec_f1(lambda df: pd.Series(clf_lgb.predict(df) > .35), df_train, y))

(0.9943502824858758, 0.9777777777777777, 0.9859943977591037)


In [454]:
%%time
'''
Make and time test predictions and write to csv.
.35 seems to be the best threshold during training.
Entire pipeline from featurization to training is included here
for fair timing purposes vs pre-filtered pipeline later.
'''
df_train = featurize(train_df, 
                     lat_long_continuous, 
                     address_match, 
                     name_match_no_brands, 
                     name_fuzzy, 
                     addr_fuzzy,
                     phone_num_match)

features_test = featurize(test_df,
                          lat_long_continuous,
                          address_match,
                          name_match_no_brands,
                          name_fuzzy,
                          addr_fuzzy,
                          phone_num_match)

d_train = lgb.Dataset(df_train, label=y)
params = {}

clf_lgb = lgb.train(params, d_train, 100)

preds_lgb = clf_lgb.predict(features_test)
preds_lgb = preds_lgb > .35

print(prec_rec_f1(lambda df: pd.Series(clf_lgb.predict(df) > .35), df_train, y))

'''Original'''

preds_test_lgb = clf_lgb.predict(features_test)
preds_test_lgb = preds_test_lgb > .35

matches_test_lgb = test_df.loc[preds_test_lgb][['id_locu', 'id_foursq']]
matches_test_lgb.columns = ['locu_id', 'foursquare_id']
matches_test_lgb.to_csv('matches_test.csv', index=False)

(0.9943502824858758, 0.9777777777777777, 0.9859943977591037)
CPU times: user 4min 57s, sys: 920 ms, total: 4min 58s
Wall time: 2min 59s


In [474]:
'''Evaluating precision and recall of potentially simple filtering heuristics'''
def postal_match(df):
    return df['postal_code_locu'] == df['postal_code_foursq']

def region_match(df):
    return df['locality_locu'] == df['locality_foursq']

def addr_match(df):
    return ~(df['street_address_locu'] == '') \
        & ~(df['street_address_foursq'] == '') \
        & (df['street_address_locu'] == df['street_address_foursq'])

print(prec_rec_f1(postal_match, train_df, y))
print(prec_rec_f1(region_match, train_df, y))
print(prec_rec_f1(phone_num_match, train_df, y))
print(prec_rec_f1(lambda df: ~postal_match(df) & ~region_match(df), train_df, y))
print(prec_rec_f1(lambda df: name_fuzzy(df) > .3, train_df, y))

(0.019644527595884004, 0.875, 0.0384263494967978)
(0.0009932659932659932, 0.9833333333333333, 0.001984527413387151)
(0.9912663755458515, 0.6305555555555555, 0.7707979626485567)
(0.0, 0.0, 0.0)




(0.0010031124349718433, 1.0, 0.0020042144175391034)


In [371]:
# What do the false positives look like? Can we improve precision?
train_df.loc[addr_match(train_df) & ~y][['name_locu', 'name_foursq','street_address_locu', 'street_address_foursq']]

Unnamed: 0,name_locu,name_foursq,street_address_locu,street_address_foursq
6199,Da Mikele,Neighborhood Gourmet Eatery,275 Church St.,275 Church St.
16034,Hizen Japanese Restaurant,Z Lounge,203 E. 45th St.,203 E. 45th St.
93964,Tasti D-Lite,Subway,1 Penn Plz.,1 Penn Plz.
108795,CRISPIN'S,City Lights Diner,764 10th Ave.,764 10th Ave.
149105,Neighborhood Gourmet Eatery,Da Mikele,275 Church St.,275 Church St.
174813,Starbucks,Bar Bella,870 7th Ave.,870 7th Ave.
215245,Z Lounge,Hizen Japanese Restaurant,203 E. 45th St.,203 E. 45th St.
234794,City Lights Diner,Crispín's,764 10th Ave.,764 10th Ave.
281635,Park Avenue Food Court,Andy's Deli,59 4th Ave.,59 4th Ave.
325992,Blu NYC,Indigo Rooftop Lounge,127 W. 28th St.,127 W. 28th St.


In [471]:
def filter_matches(df, *high_prec_feats):
    '''
    Filter out pairs with high precision features.
    Since each location can only be matched once, for each match
    we can reduce all other negative pairs containing that location.
    '''
    # Accumulate the union of positive predictions according to our feats
    # These can be filtered out by virtue of our feats being high precision
    pos = pd.Series([False] * df.shape[0])
    for feat in high_prec_feats:
        pos |= feat(df)
    
    # Select the positively predicted pairs
    matches = df[pos]
    
    # Remove all locu and foursquare ids that have already been matched
    # Do this using np.array operations and boolean indexing (which may not be the most efficient)
    keep = pd.Series([True] * (df.shape[0] - matches.shape[0]))
    for id in matches['id_locu']:
        keep = keep & (df['id_locu'] != id)
    
    for id in matches['id_foursq']:
        keep = keep & (df['id_foursq'] != id)
    
    # Return the positive predictions separately
    return df[~pos & keep], matches

def filter_neg(df, *high_rec_feats):
    '''
    Filter out pairs that do not match against very rec features.
    This means we can exclude them from consideration because they are
    unlikely to match.
    '''
    excl = pd.Series([False] * df.shape[0])
    for feat in low_prec_feats:
        excl |= feat(df)
    
    return df[~excl]

In [485]:
%%time
'''
Entire featurization + training + prediction pipeline with filtering and GBM.
'''
def addr_fuzzy(df):
    fuzzy_ratio = df.apply(lambda row: fuzz.ratio(row['street_address_foursq'] or 'None_foursq',  
                                                  row['street_address_locu'] or 'None_locu'), axis=1)
    return fuzzy_ratio
                           
df_train, matches_train = filter_matches(train_df, lambda df: addr_match(df) & name_match_no_brands(df))
df_train = filter_neg(df_train, lambda df: ~postal_match(df) & ~region_match(df))

df_test, matches_test = filter_matches(test_df, lambda df: addr_match(df) & name_match_no_brands(df))
df_test = filter_neg(df_test, lambda df: ~postal_match(df) & ~region_match(df))

feats_train = featurize(df_train,
                       lat_long_continuous,
                       name_fuzzy,
                       addr_fuzzy,
                       phone_num_match)

feats_test = featurize(df_test,
                       lat_long_continuous,
                       name_fuzzy,
                       addr_fuzzy,
                       phone_num_match)

params = {}
y_train = df_train['matched']
dataset_train = lgb.Dataset(feats_train, label=y_train)
clf_lgb_new = lgb.train(params, dataset_train, 100)

preds_train = clf_lgb_new.predict(feats_train) > .35
filtered_preds = np.array([1] * matches.shape[0])
f1 = f1_score(np.concatenate([y_train.values, filtered_preds]), 
               np.concatenate([preds_train, filtered_preds]))
print("F1 score: {}".format(f1))

preds = clf_lgb_new.predict(feats_test) > .35
matches = df_test[preds][['id_locu', 'id_foursq']]
matches = pd.concat([matches, matches_test[['id_locu', 'id_foursq']]])
matches.columns = ['locu_id', 'foursq_id']
matches.to_csv('matches_test3.csv', index=False)



F1 score: 0.9900744416873448
CPU times: user 3min 32s, sys: 92 ms, total: 3min 32s
Wall time: 1min 51s


Unnamed: 0,name_locu,name_foursq
1,El Maguey y la Tuna,Sticky's Finger Joint
2,Hair Date Salon/ Professionals Hair Cut,Sticky's Finger Joint
3,Pizza Plus Inc,Sticky's Finger Joint
4,Double Crown,Sticky's Finger Joint
5,Pink Tea Cup,Sticky's Finger Joint
7,Hudson Common @ the Hudson,Sticky's Finger Joint
8,Starbucks,Sticky's Finger Joint
9,..nyc jaajjjj......:-D,Sticky's Finger Joint
10,Trump's World Tower Valet,Sticky's Finger Joint
12,Time Warner Inc,Sticky's Finger Joint
