# Notebook 3: Individual Parks

In [None]:
#importing parks csv
import pandas as pd

# You might need to add a path as well
fn = 'parks_data.csv'
parks_data = pd.read_csv(fn)
parks_data.head()

In [None]:
import nltk
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords

swords = [re.sub(r"[^A-z\s]", "", sword) for sword in stopwords.words('english')]
swords += ['losangeles', 'la', 'losangelesca', 'ca', 'macarthur', 'macarthurpark', 'woodley', 'riodelosangeles', 'runyoncanyon', 
           'temescalgateway', 'heidelbergpark', 'hancockpark', 'franklincanyonpark', 'franklincanyonpark', 'angelsgate', 
           'coldwatercanyon', 'chatsworthparksouth','cheviothills', 'california', 'usa', 'southerncalifornia', 'park', 'parklabrea', 
          'unitedstates', 'america']

def clean_string(text):
    # remove punctuation
    text = re.sub(r"[^A-z\s]", "", text)
    
    cleaned_list_of_words = [word for word in word_tokenize(text.lower()) if word not in swords] #return a string or apply to all tags
    
    return cleaned_list_of_words

#calling the function to only apply to the tags column 
parks_data['tags'] = parks_data['tags'].apply(clean_string)


In [None]:
parks_data
parks_data.parkname.unique()

### MacArthur Park

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
macarthur = parks_data['parkname']=='macarthur'
macarthur.head()

In [None]:
macarthur_tags = parks_data[macarthur]
print(macarthur_tags)
macarthur_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = macarthur_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_macarthur.csv', index=True)

# Analysis of spread of CES and strength of each CES - macarthur park

In [None]:
import pandas as pd

# import macarthur top tags
fn = 'top_tags_macarthur.csv'
macarthur_tags = pd.read_csv(fn)
macarthur_tags.head()

# import CES code breakdown for macarthur park
fn2 = 'codes_macarthur2.csv'
macarthur_ces = pd.read_csv(fn2)
macarthur_ces.head()

In [None]:
# replace NaNs with 0

macarthur_ces.fillna(0)

1. group by CES and count size
2. assign to new df to visualize

In [None]:
macarthur_ces.dtypes

In [None]:
# get list of columns from the original dataframe, excluding the ones that aren't about ces
cols = [col for col in macarthur_ces.columns if col not in ['Words']]

# normalize the data by dividing each column by the total tag counts (1682)
for col in cols:
    macarthur_ces[col] = macarthur_ces[col].sum()
    
ces = macarthur_ces[cols]/1682*100
print(ces)

Having trouble getting overall percentage for each group --> trying a different approach

In [None]:
#drop words column
macarthur_ces.drop(columns=['Words'])

#sum of each ces
sum_ces = macarthur_ces.sum(axis=0)
print(sum_ces)

In [None]:
# convert series to dataframe and keep index
# https://www.geeksforgeeks.org/convert-given-pandas-series-into-a-dataframe-with-its-index-as-another-column-on-the-dataframe/

cesDf = sum_ces.to_frame().reset_index()
print(cesDf)
list(cesDf.columns)

In [None]:
cols = [col for col in macarthur_ces.columns if col not in ['index']]

ces_total = cesDf[cols]/1682*100
print(ces_total)

In [None]:
#clean up df by droping row 0 and renaming columns

#drop row by index
cesDf.drop(labels=["Words"],axis=0,inplace=False)
print(cesDf)


In [None]:
#divide each column by total tag counts 1682 and multiply by 100 to normalize the data


### Woodley Park


In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
woodley = parks_data['parkname']=='woodley'
woodley.head()

In [None]:
woodley_tags = parks_data[woodley]
print(woodley_tags)
woodley_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = woodley_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_woodley.csv', index=True)

### Rio de Los Angeles

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
riodelosangeles = parks_data['parkname']=='riodelosangeles'
riodelosangeles.head()

In [None]:
riodelosangeles_tags = parks_data[riodelosangeles]
print(riodelosangeles_tags)
riodelosangeles_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = riodelosangeles_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_riodelosangeles.csv', index=True)

### Runyon Canyon

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
runyoncanyon = parks_data['parkname']=='runyoncanyon'
runyoncanyon.head()

In [None]:
runyoncanyon_tags = parks_data[runyoncanyon]
print(runyoncanyon_tags)
runyoncanyon_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = runyoncanyon_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_runyoncanyon.csv', index=True)

### Temescal Gateway

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
temescalgateway = parks_data['parkname']=='temescalgateway'
temescalgateway.head()

In [None]:
temescalgateway_tags = parks_data[temescalgateway]
print(temescalgateway_tags)
temescalgateway_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = temescalgateway_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_temescalgateway.csv', index=True)

### Heidelberg Park

### Hancock Park

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
hancockpark = parks_data['parkname']=='hancockpark'
hancockpark.head()

In [None]:
hancockpark_tags = parks_data[hancockpark]
print(hancockpark_tags)
hancockpark_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = hancockpark_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_hancockpark.csv', index=True)

### Franklin Canyon Park

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
franklincanyonpark = parks_data['parkname']=='franklincanyonpark'
franklincanyonpark.head()

In [None]:
franklincanyonpark_tags = parks_data[franklincanyonpark]
print(franklincanyonpark_tags)
franklincanyonpark_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = franklincanyonpark_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_franklincanyonpark.csv', index=True)

### Angels Gate

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
angelsgate = parks_data['parkname']=='angelsgate'
angelsgate.head()

In [None]:
angelsgate_tags = parks_data[angelsgate]
print(angelsgate_tags)
angelsgate_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = angelsgate_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_angelsgate.csv', index=True)

### Coldwater Canyon

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
coldwatercanyon = parks_data['parkname']=='coldwatercanyon'
coldwatercanyon.head()

In [None]:
coldwatercanyon_tags = parks_data[coldwatercanyon]
print(coldwatercanyon_tags)
coldwatercanyon_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = coldwatercanyon_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_coldwatercanyon.csv', index=True)

### Cheviot Hills

In [None]:
#Need to figure out how to filter by parkname and retain the tags info by photo
cheviothills = parks_data['parkname']=='cheviothills'
cheviothills.head()

In [None]:
cheviothills_tags = parks_data[cheviothills]
print(cheviothills_tags)
cheviothills_tags.parkname.unique()

In [None]:
cols = ['tags', 'parkname']
tag_park = cheviothills_tags[cols].explode('tags', ignore_index=True)

In [None]:
#create a column with count of each tag 
tag_park['value'] = [1] * tag_park.shape[0]

#return top 100 most used tags sorted by value
top_100_tags = tag_park.groupby('tags').sum().sort_values('value', ascending=False).head(100)

#so we can view all tags
pd.set_option('display.max_rows', 100)

print(top_100_tags)

In [None]:
# exporting top 100 tags to a csv for hand coding 
top_100_tags.to_csv('top_tags_cheviothills.csv', index=True)

### Chatsworth Park South