In [1]:
import pandas as pd

from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [2]:
df = pd.read_csv("data/AB_NYC_2019.csv")
df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0


In [39]:
def extract_adjectives(list_of_tagged_words):
    adjectives = []
    for word, tag in list_of_tagged_words:
        if tag == 'JJ' and word not in adjectives:
            adjectives.append(word)
    return adjectives

def entity_extract(sentence):
    if not isinstance(sentence,str):
        return ''
    lower_sentence = sentence.lower()
    tokenized_sentence = word_tokenize(lower_sentence)
    pos_tagged_sentence = pos_tag(tokenized_sentence)
    return extract_adjectives(pos_tagged_sentence)




In [40]:
df['name'][0]

'Clean & quiet apt home by the park'

In [41]:
df.dropna(axis = 0, subset = ['name'], inplace = True)

In [43]:
df['adjectives'] = df['name'].apply(entity_extract)
df[['name', 'adjectives']].head(20)

Unnamed: 0,name,adjectives
0,Clean & quiet apt home by the park,"[clean, quiet, apt]"
1,Skylit Midtown Castle,[skylit]
2,THE VILLAGE OF HARLEM....NEW YORK !,[new]
3,Cozy Entire Floor of Brownstone,[entire]
4,Entire Apt: Spacious Studio/Loft by central park,"[entire, spacious, central]"
5,Large Cozy 1 BR Apartment In Midtown East,"[large, midtown]"
6,BlissArtsSpace!,[]
7,Large Furnished Room Near B'way,[large]
8,Cozy Clean Guest Room - Family Apt,"[cozy, clean]"
9,Cute & Cozy Lower East Side 1 bdrm,[east]


In [38]:
all_adjectives = {}

# find the top 20 adjectives
for list_of_adjectives in df['adjectives']:
    for adj in list_of_adjectives:
        if adj not in all_adjectives.keys():
            all_adjectives[adj] = 1
        else:
            all_adjectives[adj] += 1
    
top_adj = {}
i = 0
for word in sorted(all_adjectives.keys(), key = lambda x: all_adjectives[x], reverse = True):
    if i==20:
        break
    if word not in ['apt', 'nyc', 'square']:
        top_adj[word] = all_adjectives[word]
        i+=1

for k, v in top_adj.items():
    print(k, v)

private 7049
spacious 3744
east 2546
beautiful 2203
sunny 2075
large 2046
central 1757
modern 1756
new 1532
cozy 1334
upper 1280
quiet 1209
great 1128
bright 1112
huge 961
clean 901
prime 818
big 661
loft 604
comfortable 601
gorgeous 594
entire 589
midtown 564
west 520
historic 393


In [9]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\miach\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


True

In [47]:
df['garden']=df['name'].apply(lambda n: 'garden' in n.lower() or 'yard' in n.lower() or 'rooftop' in n.lower())
df['garden'].value_counts()

False    46604
True      2275
Name: garden, dtype: int64

In [48]:
df[df['garden']].sample(50)

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,tagged_name,adjectives,garden
289,66974,"Lovely, Modern, Garden Apartment",329436,Jana,Brooklyn,Gowanus,40.68076,-73.9896,Entire home/apt,190,3,69,2019-07-01,0.79,2,258,"[(lovely, RB), (,, ,), (modern, JJ), (,, ,), (...","[modern, garden]",True
3468,2079686,Sunny apt with Backyard,7624316,Rebecca,Brooklyn,Bushwick,40.7018,-73.91823,Entire home/apt,60,3,14,2017-02-20,0.21,1,0,"[(sunny, JJ), (apt, NN), (with, IN), (backyard...",[sunny],True
8946,6863691,BKLYN'S Best 3BR Townhouse W/Garden,700224,Shane & Nicole,Brooklyn,Gowanus,40.68338,-73.98657,Entire home/apt,250,29,16,2019-04-30,0.36,4,300,"[(bklyn, NN), ('s, POS), (best, JJS), (3br, CD...",[],True
47531,35794273,Hope Garden,106798652,Timbrooke,Brooklyn,Bushwick,40.69554,-73.92511,Private room,130,1,0,,,2,365,"[(hope, NN), (garden, NN)]",[],True
45951,35005701,Sonder | The Nash | Playful 1BR + Rooftop,219517861,Sonder (NYC),Manhattan,Murray Hill,40.7473,-73.97603,Entire home/apt,194,29,0,,,327,329,"[(sonder, NN), (|, VBZ), (the, DT), (nash, JJ)...",[nash],True
2279,1092760,Private Entrance - Backyard Summer Dining - Enjoy,5887081,Michelle,Brooklyn,Bedford-Stuyvesant,40.69034,-73.95321,Entire home/apt,110,7,195,2019-07-03,2.72,2,284,"[(private, JJ), (entrance, NN), (-, :), (backy...",[private],True
1778,799900,"Amazing apartment near museum, gardens & park!",4211266,Nora,Brooklyn,Prospect Heights,40.67338,-73.96415,Entire home/apt,150,17,3,2016-09-26,0.07,1,0,"[(amazing, VBG), (apartment, NN), (near, IN), ...",[],True
17646,13877683,"Perfect Private Garden Apartment, 2 blox to su...",57994,Martin & Hande,Brooklyn,Bedford-Stuyvesant,40.68082,-73.91128,Entire home/apt,99,2,142,2019-07-07,3.94,1,57,"[(perfect, JJ), (private, JJ), (garden, NN), (...","[perfect, private]",True
9158,7010681,Gorgeous Garden Apt Steps To Park,36754170,Steve,Brooklyn,Prospect-Lefferts Gardens,40.65703,-73.95964,Entire home/apt,110,1,2,2015-08-10,0.04,1,0,"[(gorgeous, JJ), (garden, NN), (apt, JJ), (ste...","[gorgeous, apt]",True
16280,13112895,Penthouse Exclusive Garden Top Apartment,7573341,Robert,Brooklyn,Sunset Park,40.63885,-74.01959,Entire home/apt,225,2,130,2019-06-28,3.48,2,306,"[(penthouse, RB), (exclusive, JJ), (garden, NN...",[exclusive],True
