# Notebook 3: Individual Parks

In [1]:
#importing parks csv
import pandas as pd

# You might need to add a path as well
fn = 'parks_data.csv'
parks_data = pd.read_csv(fn)
parks_data.head()

Unnamed: 0,id,owner,secret,server,farm,title,ispublic,isfriend,isfamily,description,...,longitude,accuracy,context,place_id,woeid,geo_is_public,geo_is_contact,geo_is_friend,geo_is_family,parkname
0,50952829406,77318907@N08,6fe5b8ef3a,65535,66,Angel's Gate Cloudscape,1,0,0,"{'_content': 'San Pedro, CA\n01-02-21'}",...,-118.271759,16,0,,5392528.0,1,0,0,0,angelsgate
1,50952896937,77318907@N08,ebe224b984,65535,66,Harbor Entrance at Sunrise,1,0,0,"{'_content': ""The Angel's Gate in San Pedro, C...",...,-118.271791,16,0,,5392528.0,1,0,0,0,angelsgate
2,50292619641,66115413@N07,11c1a5bb33,65535,66,Never come here at night,1,0,0,{'_content': 'La Brea Tar Pits\nLos Angeles'},...,-118.357064,15,0,,5381273.0,1,0,0,0,hancockpark
3,50220027608,66115413@N07,a917301efa,65535,66,Quarantined Sloths,1,0,0,{'_content': 'La Brea Tar Pits / Hancok Park'},...,-118.35715,16,0,,5381273.0,1,0,0,0,hancockpark
4,49861807202,22316914@N06,ccec045e20,65535,66,Subway,1,0,0,{'_content': 'Westlake / MacArthur Park metro ...,...,-118.274167,16,0,,8062690.0,1,0,0,0,macarthur


In [7]:
import nltk
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords

swords = [re.sub(r"[^A-z\s]", "", sword) for sword in stopwords.words('english')]
swords += ['losangeles', 'la', 'losangelesca', 'ca', 'macarthur', 'macarthurpark', 'woodley', 'riodelosangeles', 'runyoncanyon', 
           'temescalgateway', 'heidelbergpark', 'hancockpark', 'franklincanyonpark', 'franklincanyonpark', 'angelsgate', 
           'coldwatercanyon', 'chatsworthparksouth','cheviothills', 'california', 'usa', 'southerncalifornia', 'park', 'parklabrea', 
          'unitedstates', 'america']

def clean_string(text):
    # remove punctuation
    text = re.sub(r"[^A-z\s]", "", text)
    
    cleaned_list_of_words = [word for word in word_tokenize(text.lower()) if word not in swords] #return a string or apply to all tags
    
    return cleaned_list_of_words

#calling the function to only apply to the tags column 
parks_data['tags'] = parks_data['tags'].apply(clean_string)


In [16]:
parks_data
parks_data.parkname.unique()

array(['angelsgate', 'hancockpark', 'macarthur', 'runyoncanyon',
       'chatsworthparksouth', 'franklincanyonpark', 'cheviothills',
       'coldwatercanyon', 'temescalgateway', 'riodelosangeles', 'Unknown',
       'woodley', 'heidelbergpark'], dtype=object)

### MacArthur Park

In [9]:
#Need to figure out how to filter by parkname and retain the tags info by photo
macarthur = parks_data['parkname']=='macarthur'
macarthur.head()

0    False
1    False
2    False
3    False
4     True
Name: parkname, dtype: bool

In [14]:
macarthur_tags = parks_data[macarthur]
print(macarthur_tags)
macarthur_tags.parkname.unique()

array(['macarthur'], dtype=object)

In [19]:
cols = ['tags', 'parkname']
tag_park = macarthur_tags[cols].explode('tags', ignore_index=True)

In [20]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

                     value
tags                      
westlake               173
lake                    89
ciclavia                68
fountain                64
palmtrees               61
rally                   53
architecture            49
socal                   45
urban                   44
buildings               43
palms                   41
grand                   41
neighborhood            41
community               41
dtla                    39
mexican                 38
picounion               37
protest                 37
downtownlosangeles      37
lengua                  36
keepfamliestogethor     36
tamale                  36
asada                   36
alpastor                36
march                   36
migrant                 36
carnitas                36
eltaurino               36
burrito                 36
familyseparations       36
thegreattacohunt        36
detetioncenter          36
immigration             36
hcm                     35
landmark                34
s

In [21]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_macarthur.csv', index=True)

### Woodley Park


In [22]:
#Need to figure out how to filter by parkname and retain the tags info by photo
woodley = parks_data['parkname']=='woodley'
woodley.head()

0    False
1    False
2    False
3    False
4    False
Name: parkname, dtype: bool

In [23]:
woodley_tags = parks_data[woodley]
print(woodley_tags)
woodley_tags.parkname.unique()

              id         owner      secret  server  farm                title  \
1545  4559969340  33886308@N03  221588da16    3510     4    Pied-billed grebe   
1546  4559339861  33886308@N03  20b62e3be1    3397     4         Fox squirrel   
1759  4321724121  33886308@N03  8b9aaccfee    2758     3   Anna's Hummingbird   
1760  4321723815  33886308@N03  aa7c7fe6d3    4007     5     Friendly Mallard   
1836  4322459066  33886308@N03  15fb7a9db9    2797     3  Prehistoric pelican   

      ispublic  isfriend  isfamily  \
1545         1         0         0   
1546         1         0         0   
1759         1         0         0   
1760         1         0         0   
1836         1         0         0   

                                            description  ...   longitude  \
1545  {'_content': "I've gotten a lot of bad picture...  ... -118.472571   
1546  {'_content': "Most of the squirrels were shy, ...  ... -118.473494   
1759  {'_content': 'There were several of these arou... 

array(['woodley'], dtype=object)

In [24]:
cols = ['tags', 'parkname']
tag_park = woodley_tags[cols].explode('tags', ignore_index=True)

In [25]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

                   value
tags                    
sepulvedadam           5
woodleyavenuepark      5
bird                   4
vannuys                3
annashumingbird        1
brownpelican           1
duck                   1
foxsquirrel            1
grebe                  1
hummingbird            1
mallard                1
pelican                1
piedbilledgrebe        1
shorebird              1
squirrel               1


In [26]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_woodley.csv', index=True)

### Rio de Los Angeles

In [27]:
#Need to figure out how to filter by parkname and retain the tags info by photo
riodelosangeles = parks_data['parkname']=='riodelosangeles'
riodelosangeles.head()

0    False
1    False
2    False
3    False
4    False
Name: parkname, dtype: bool

In [28]:
riodelosangeles_tags = parks_data[riodelosangeles]
print(riodelosangeles_tags)
riodelosangeles_tags.parkname.unique()

              id            owner      secret  server  farm  \
978   8695273748      8653475@N05  702dfc939e    8121     9   
1305  5632712951      8653475@N05  dc10cc1b27    5265     6   
1306  5633295762      8653475@N05  5958fc85af    5307     6   
1307  5633295272      8653475@N05  f15d9ce5f0    5104     6   
1308  5632711539      8653475@N05  cde073afeb    5190     6   
1309  5632710989      8653475@N05  9d2b3d568d    5267     6   
1310  5633293548      8653475@N05  7227413d1d    5265     6   
1758  4427973434     90809455@N00  7d3ec9a6d8    4003     5   
1772  4032546103     90809455@N00  461c604617    2766     3   
1829  4407314265  37912374286@N01  26b90be1e7    4048     5   

                                                  title  ispublic  isfriend  \
978   National Park Service, Anahuak Youth Sports As...         1         0   
1305  Los Angeles State Historic Park Camp Out and R...         1         0   
1306  Los Angeles State Historic Park Camp Out and R...         1    

array(['riodelosangeles'], dtype=object)

In [29]:
cols = ['tags', 'parkname']
tag_park = riodelosangeles_tags[cols].explode('tags', ignore_index=True)

In [30]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

                                            value
tags                                             
riodelosangelesstatepark                       10
cityproject                                     7
lariver                                         7
anahuakyouthsportsassocation                    6
campout                                         6
cityprojectca                                   6
wcvi                                            6
walkathon                                       6
earthday                                        6
environmentaljustice                            6
urbanparkmovement                               6
mmf                                             6
losangelesriver                                 6
losangelesstatehistoricpark                     6
robertgarcia                                    1
santamonicamountainsnationalrecreationarea      1
afsnikkormmfg                                   1
smmnra                                          1


In [31]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_riodelosangeles.csv', index=True)

### Runyon Canyon

In [32]:
#Need to figure out how to filter by parkname and retain the tags info by photo
runyoncanyon = parks_data['parkname']=='runyoncanyon'
runyoncanyon.head()

0    False
1    False
2    False
3    False
4    False
Name: parkname, dtype: bool

In [33]:
runyoncanyon_tags = parks_data[runyoncanyon]
print(runyoncanyon_tags)
runyoncanyon_tags.parkname.unique()

               id            owner      secret  server  farm  \
77    49328405323     56701972@N00  f49b6dd886   65535    66   
78    49328405263     56701972@N00  dbda7b7c52   65535    66   
79    49328405288     56701972@N00  76923c4cd5   65535    66   
80    49329098177     56701972@N00  fe2de10eb4   65535    66   
81    49328405178     56701972@N00  4b8ba5e5bf   65535    66   
...           ...              ...         ...     ...   ...   
2658    342066691  35034347053@N01  8b45b279e2     151     1   
2659    342064652  35034347053@N01  7b67a7ce31     126     1   
2660    342060959  35034347053@N01  560ab39ad9     160     1   
2661    342059431  35034347053@N01  36d8564751     150     1   
2745    168615083     53611868@N00  5da8b48efc      67     1   

                         title  ispublic  isfriend  isfamily  \
77               Runyon Canyon         1         0         0   
78    Runyon Canyon View of LA         1         0         0   
79    Runyon Canyon View of LA         

array(['runyoncanyon'], dtype=object)

In [34]:
cols = ['tags', 'parkname']
tag_park = runyoncanyon_tags[cols].explode('tags', ignore_index=True)

In [35]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

                           value
tags                            
textures                     152
texturemaps                  152
texturemap                   152
texture                      152
hollywood                    123
runyon                        62
hiking                        60
hike                          40
runyoncanyonpark              39
sunset                        36
mountains                     35
attackcat                     31
canyon                        29
hollywoodhills                27
sky                           21
city                          21
angeles                       21
los                           21
downtown                      16
hills                         14
skyline                       14
mountain                      13
mm                            13
flickruseridn                 12
fisheye                       11
clouds                        11
lomo                          11
lomofisheye                   11
flickrwalk

In [36]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_runyoncanyon.csv', index=True)

### Temescal Gateway

In [37]:
#Need to figure out how to filter by parkname and retain the tags info by photo
temescalgateway = parks_data['parkname']=='temescalgateway'
temescalgateway.head()

0    False
1    False
2    False
3    False
4    False
Name: parkname, dtype: bool

In [38]:
temescalgateway_tags = parks_data[temescalgateway]
print(temescalgateway_tags)
temescalgateway_tags.parkname.unique()

              id         owner      secret  server  farm  \
700  12198446876  17573696@N00  82d958dcbd    3785     4   

                                title  ispublic  isfriend  isfamily  \
700  high point, temescal ridge trail         1         0         0   

          description  ...   longitude  accuracy  context          place_id  \
700  {'_content': ''}  ... -118.535461        16        0  9WPZAGRTVrwsRsDe   

         woeid geo_is_public  geo_is_contact  geo_is_friend  geo_is_family  \
700  2467338.0             1               0              0              0   

            parkname  
700  temescalgateway  

[1 rows x 22 columns]


array(['temescalgateway'], dtype=object)

In [39]:
cols = ['tags', 'parkname']
tag_park = temescalgateway_tags[cols].explode('tags', ignore_index=True)

In [40]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

                            value
tags                             
flickrandroidappfilternone      1
temescalcanyon                  1
temescalridgetrail              1


In [41]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_temescalgateway.csv', index=True)

### Heidelberg Park

### Hancock Park

In [42]:
#Need to figure out how to filter by parkname and retain the tags info by photo
hancockpark = parks_data['parkname']=='hancockpark'
hancockpark.head()

0    False
1    False
2     True
3     True
4    False
Name: parkname, dtype: bool

In [43]:
hancockpark_tags = parks_data[hancockpark]
print(hancockpark_tags)
hancockpark_tags.parkname.unique()

               id         owner      secret  server  farm  \
2     50292619641  66115413@N07  11c1a5bb33   65535    66   
3     50220027608  66115413@N07  a917301efa   65535    66   
8     49541489037  82057796@N00  6a84dd124a   65535    66   
9     49540767593  82057796@N00  a47eb21d64   65535    66   
10    49541267536  82057796@N00  057a041c79   65535    66   
...           ...           ...         ...     ...   ...   
2742    189408818  63348143@N00  a51d7e9ad9      44     1   
2743    181797470  63348143@N00  ff05d50cb7      44     1   
2744    179394267  63348143@N00  e6dd2d6a80      74     1   
2746    163958185  63348143@N00  222f36be46      73     1   
2747    161398070  63348143@N00  c3cdd3f644      50     1   

                         title  ispublic  isfriend  isfamily  \
2     Never come here at night         1         0         0   
3           Quarantined Sloths         1         0         0   
8                      Asphalt         1         0         0   
9          

array(['hancockpark'], dtype=object)

In [44]:
cols = ['tags', 'parkname']
tag_park = hancockpark_tags[cols].explode('tags', ignore_index=True)

In [45]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

                                         value
tags                                          
tarpits                                    584
labreatarpits                              460
labrea                                     372
museum                                     316
pagemuseum                                 223
fossils                                    202
bones                                      181
socal                                      159
estate                                     151
realestate                                 150
real                                       150
paleontology                               149
lacma                                      146
animals                                    131
mammoths                                   130
excavation                                 124
sabretooth                                 121
tigers                                     121
giantgroundsloths                          121
gettyhouse   

In [46]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_hancockpark.csv', index=True)

### Franklin Canyon Park

In [47]:
#Need to figure out how to filter by parkname and retain the tags info by photo
franklincanyonpark = parks_data['parkname']=='franklincanyonpark'
franklincanyonpark.head()

0    False
1    False
2    False
3    False
4    False
Name: parkname, dtype: bool

In [48]:
franklincanyonpark_tags = parks_data[franklincanyonpark]
print(franklincanyonpark_tags)
franklincanyonpark_tags.parkname.unique()

               id          owner      secret  server  farm  \
155   48797213627   59567508@N06  1c86381521   65535    66   
157   48796705638   59567508@N06  94a6ffa442   65535    66   
158   48796698878   59567508@N06  43519d1f49   65535    66   
159   48791864882   59567508@N06  59812d6794   65535    66   
492   32173106956   42730864@N05  f7a73c3660     497     1   
596   12938751304  101119516@N04  2472459247    2853     3   
979    8616697685   48108659@N00  7a7bcf8454    8103     9   
980    8617803974   48108659@N00  11f30a3549    8103     9   
981    8616697479   48108659@N00  6cd95ab6ce    8521     9   
982    8617803782   48108659@N00  2574d73818    8263     9   
983    8617803662   48108659@N00  39b941a5d3    8538     9   
1199   6968689821   13566572@N08  4732e96344    7179     8   
1200   6963670675   13566572@N08  0078feb2ef    7188     8   
1830   4331258989   48108659@N00  57932fffa5    4042     5   
1831   4331258953   48108659@N00  4abd45da83    4028     5   
1832   4

array(['franklincanyonpark'], dtype=object)

In [49]:
cols = ['tags', 'parkname']
tag_park = franklincanyonpark_tags[cols].explode('tags', ignore_index=True)

In [50]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

                                value
tags                                 
santamonicamountains               13
tvshowlocations                     8
californiadreamsphotography         8
tvlocations                         8
losangelesfilminglocations          8
franklincanyonlake                  8
filminglocations                    8
losangelesmountains                 8
losangelestvlocations               8
mayberrylake                        8
mayberrync                          8
thefishinhole                       8
californiadreamsphotographycom      8
myerslake                           8
andygriffith                        8
andygriffithshow                    8
nature                              8
rustythedog                         7
canine                              7
chihuahuamix                        7
mutt                                7
dog                                 7
pet                                 7
grass                               6
franklincany

In [51]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_franklincanyonpark.csv', index=True)

### Angels Gate

### Coldwater Canyon

### Chatsworth Park South

### Cheviot Hills