In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import IPython.display
IPython.display.set_matplotlib_formats('svg')
import os
import glob
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from scipy import stats
from datetime import datetime
from nltk import sent_tokenize
import spacy
from fuzzywuzzy import fuzz

In [2]:
path_alltrails = 'datasets/alltrails/'
path_npspecies = 'datasets/npspecies/'
path_webapp_db_file = 'webapp/trailbeings.db'

# Read in list of parks/trails from AllTrails

In [3]:
park_csv_list = glob.glob(path_alltrails+'trails_*.csv')
df_park_full_list = []
for park_csv_file in park_csv_list:
    df_park_current = pd.read_csv(park_csv_file, header=None, names=['park_name','trail_name','rating','n1','n2','n3','url'], encoding="ISO-8859-1")
    df_park_current.dropna(inplace=True)
    df_park_current['park'] = park_csv_file.split("\\")[-1].replace('.csv', '').replace('trails_', '')
    df_park_current['trail'] = df_park_current['url'].str.split('/').str[-1]
    df_park_full_list.append(df_park_current)
df_park_list = pd.concat(df_park_full_list, sort=False)
df_park_list

Unnamed: 0,park_name,trail_name,rating,n1,n2,n3,url,park,trail
0,Apostle Islands National Lakeshore,Rifugio Vittorio Sella,HARD,(6),5.0,Showing 216 trails,https://www.alltrails.com/explore/trail/italy/...,apostle-islands-national-lakeshore,rifugio-vittorio-sella
1,Apostle Islands National Lakeshore,Sella-Herbetet Traverse,HARD,(9),5.0,Showing 216 trails,https://www.alltrails.com/explore/trail/italy/...,apostle-islands-national-lakeshore,sella-herbetet-traverse-loop
2,Apostle Islands National Lakeshore,Bonatti Refuge to Arnuva,MODERATE,(35),5.0,Showing 216 trails,https://www.alltrails.com/explore/trail/italy/...,apostle-islands-national-lakeshore,refugio-bonatti-to-arnuva
3,Apostle Islands National Lakeshore,Valnontey River Trail,MODERATE,(4),4.5,Showing 216 trails,https://www.alltrails.com/explore/trail/italy/...,apostle-islands-national-lakeshore,valnontey-river-trail
4,Apostle Islands National Lakeshore,Col Ferret,MODERATE,(27),5.0,Showing 216 trails,https://www.alltrails.com/explore/trail/italy/...,apostle-islands-national-lakeshore,col-ferret
...,...,...,...,...,...,...,...,...,...
488,Yellowstone National Park,Shoshone Lake Via Delacy Creek Trail,EASY,(45),4.0,Showing 26 trails,https://www.alltrails.com/explore/trail/us/wyo...,yellowstone-national-park,shoshone-lake-via-delacy-creek-trail
489,Yellowstone National Park,Biscuit Basin to Goose Lake via Little Firehole,MODERATE,(4),4.0,Showing 26 trails,https://www.alltrails.com/explore/trail/us/wyo...,yellowstone-national-park,biscuit-basin-to-goose-lake-via-little-firehole
490,Yellowstone National Park,Delacy Creek Trail to Shoshone Lake,HARD,(13),4.5,Showing 26 trails,https://www.alltrails.com/explore/trail/us/wyo...,yellowstone-national-park,delacy-creek-trail-to-shoshone-lake
491,Yellowstone National Park,Divide Mountain Trail,MODERATE,(6),3.0,Showing 26 trails,https://www.alltrails.com/explore/trail/us/wyo...,yellowstone-national-park,divide-mountain-trail


In [4]:
df_park_list_tmp = df_park_list.copy()
df_park_list_tmp['park_trail'] = df_park_list_tmp['park'] + '_' + df_park_list_tmp['trail']
df_park_list_tmp = df_park_list_tmp.drop(columns=['park','trail'])
df_park_list_lookup = pd.DataFrame(df_park_list_tmp[['park_trail','park_name','trail_name']].groupby(['park_trail']).agg({
    'park_name': pd.Series.mode,
    'trail_name': pd.Series.mode,
})).reset_index()
# str(df_park_list_lookup[df_park_list_lookup['park_trail']=='big-bend-national-park_basin-drive']['trail_name'].iloc[0])

In [5]:
df_park_list_no_trails = df_park_list.groupby(['park_name','park']).count().reset_index()

In [6]:
df_park_list_no_trails = df_park_list_no_trails.iloc[::-1].reset_index().drop(columns=['index'])

In [7]:
df_park_list_no_trails

Unnamed: 0,park_name,park,trail_name,rating,n1,n2,n3,url,trail
0,Yellowstone National Park,yellowstone-national-park,409,409,409,409,409,409,409
1,White House,white-house,69,69,69,69,69,69,69
2,Sequoia National Park,sequoia-national-park,52,52,52,52,52,52,52
3,Petrified Forest National Park,petrified-forest-national-park,16,16,16,16,16,16,16
4,Natural Bridges National Monument,natural-bridges-national-monument,16,16,16,16,16,16,16
5,Mesa Verde National Park,mesa-verde-national-park,20,20,20,20,20,20,20
6,Mary McLeod Bethune Council House National His...,mary-mcleod-bethune-council-house-national-his...,3,3,3,3,3,3,3
7,Hot Springs National Park,hot-springs-national-park,46,46,46,46,46,46,46
8,Franklin D. Roosevelt Memorial,franklin-d-roosevelt-memorial,78,78,78,78,78,78,78
9,Fort Laramie National Historic Site,fort-laramie-national-historic-site,7,7,7,7,7,7,7


# See what df_beings and df_beings_trail look like

In [8]:
# df_beings = pd.DataFrame()
# df_beings_trail = pd.DataFrame()

# # First: iterate over all parks
# for park_id, park_row in df_park_list_no_trails.iterrows():
#     park = park_row['park']
#     park_name = park_row['park_name']
#     csv_list = glob.glob(path_alltrails+'reviews_'+park+'*.csv')
#     df_list = []
#     for csv_file in csv_list:
#         df_list.append(pd.read_csv(csv_file, index_col=0))
#     df_at = pd.concat(df_list, sort=False)
#     # df_at: reviews for just this park
#     df_at.dropna(inplace=True)
#     df_at['review_sent'] = df_at['review'].apply(lambda x: sent_tokenize(x.lower()))
#     df_at['month'] = pd.DatetimeIndex(df_at['date']).month
    
#     # Find total # of reviews for each month for this park
#     df_at_counts = df_at.groupby('month').size().reset_index(name='count')
#     # print(df_at_counts)
#     df_at_counts_trail = df_at.groupby(['trail', 'month']).size().reset_index(name='count')
#     # print(df_at_counts_trail)
    
#     # Next: iterate over beings, and then iterate over trails and add trail column
#     # df_np_fauna_uniq_pop: list of animals in just this park
#     df_np_fauna_uniq_pop = df_np_fauna_renum_nomissing_uniq_short[df_np_fauna_renum_nomissing_uniq_short['Park Name'] == park_name]
#     for being_id, being_row in df_np_fauna_uniq_pop.iterrows():
#         being_list = being_row['Short name']
#         being_name = being_row['Long name str']
#         np_occurrence = being_row['Occurrence']
#         np_abundance = being_row['Abundance']
#         np_occurrenceabundance = np_occurrence*np_abundance
#         print('Analyzing '+being_name)
#         being_score = 0
#         being_score_month = np.zeros(12)
#         # For each trail
#         df_at_trail_list = df_at.groupby('trail').size().reset_index(name='count')
#         for trail_index, trail_row in df_at_trail_list.iterrows():
#             print('-- Trail: '+trail_row['trail'])
#             being_score_trail = 0
#             being_score_trail_month = np.zeros(12)
#             # For each sub-name for this being
#             for being in being_list:
#                 print('---- Being(short): '+being)
#                 for month in range(12):
#                     # print('------ Month: '+str(month))
#                     # Model to match NPS name to comment contents uses NLP
#                     df_at_being_mentioned = df_at[(df_at['trail'] == trail_row['trail']) & (df_at['month'] == month)]['review_sent'].apply(lambda x: nlp_score_review_match_animal_singlename_sent(being, x))
#                     # Score of the current trails' reviews in matching the being
#                     score_current = df_at_being_mentioned.sum()
#                     being_score += score_current
#                     being_score_month[month] += score_current
#                     being_score_trail += score_current
#                     being_score_trail_month[month] += score_current
#             df_beings_trail = df_beings_trail.append({'being_id':being_id, 'park':park, 'trail':trail_row['trail'],
#                 'being_score':being_score_trail, 'being_score_norm':being_score_trail/df_at_counts['count'].sum(),
#                 'm1_s':being_score_trail_month[0], 'm1_t':being_score_trail_month[0]/df_at_counts[df_at_counts['month']==1]['count'].sum(),
#                 'm2_s':being_score_trail_month[1], 'm2_t':being_score_trail_month[1]/df_at_counts[df_at_counts['month']==2]['count'].sum(),
#                 'm3_s':being_score_trail_month[2], 'm3_t':being_score_trail_month[2]/df_at_counts[df_at_counts['month']==3]['count'].sum(),
#                 'm4_s':being_score_trail_month[3], 'm4_t':being_score_trail_month[3]/df_at_counts[df_at_counts['month']==4]['count'].sum(),
#                 'm5_s':being_score_trail_month[4], 'm5_t':being_score_trail_month[4]/df_at_counts[df_at_counts['month']==5]['count'].sum(),
#                 'm6_s':being_score_trail_month[5], 'm6_t':being_score_trail_month[5]/df_at_counts[df_at_counts['month']==6]['count'].sum(),
#                 'm7_s':being_score_trail_month[6], 'm7_t':being_score_trail_month[6]/df_at_counts[df_at_counts['month']==7]['count'].sum(),
#                 'm8_s':being_score_trail_month[7], 'm8_t':being_score_trail_month[7]/df_at_counts[df_at_counts['month']==8]['count'].sum(),
#                 'm9_s':being_score_trail_month[8], 'm9_t':being_score_trail_month[8]/df_at_counts[df_at_counts['month']==9]['count'].sum(),
#                 'm10_s':being_score_trail_month[9], 'm10_t':being_score_trail_month[9]/df_at_counts[df_at_counts['month']==10]['count'].sum(),
#                 'm11_s':being_score_trail_month[10], 'm11_t':being_score_trail_month[10]/df_at_counts[df_at_counts['month']==11]['count'].sum(),
#                 'm12_s':being_score_trail_month[11], 'm12_t':being_score_trail_month[11]/df_at_counts[df_at_counts['month']==12]['count'].sum(),
#                 'np_occurrence':np_occurrence, 'np_abundance':np_abundance,
#                 'np_occurrenceabundance':np_occurrenceabundance}, ignore_index=True)
#             df_beings_trail = df_beings_trail.astype({'being_id': int, 'being_score': float, 'being_score_norm':float,
#                 'm1_s': float, 'm2_s': float, 'm3_s': float, 'm4_s': float, 'm5_s': float, 'm6_s': float,
#                 'm7_s': float, 'm8_s': float, 'm9_s': float, 'm10_s': float, 'm11_s': float, 'm12_s': float,
#                 'm1_t': float, 'm2_t': float, 'm3_t': float, 'm4_t': float, 'm5_t': float, 'm6_t': float,
#                 'm7_t': float, 'm8_t': float, 'm9_t': float, 'm10_t': float, 'm11_t': float, 'm12_t': float,
#                 'np_occurrence':float, 'np_abundance':float, 'np_occurrenceabundance':float})
#         # being_score_norm # assigned inside append statement now... = float(being_score) / 1 # may fix later
#         print('-- Score: '+str(being_score))
#         # Indexing of month goes from 0-11 to 1-12 when encoding into df_beings:
#         df_beings = df_beings.append({'being_id':being_id, 'park':park,
#             'being_score':being_score, 'being_score_norm':being_score/df_at_counts['count'].sum(),
#             'm1_s':being_score_month[0], 'm1_t':being_score_month[0]/df_at_counts[df_at_counts['month']==1]['count'].sum(),
#             'm2_s':being_score_month[1], 'm2_t':being_score_month[1]/df_at_counts[df_at_counts['month']==2]['count'].sum(),
#             'm3_s':being_score_month[2], 'm3_t':being_score_month[2]/df_at_counts[df_at_counts['month']==3]['count'].sum(),
#             'm4_s':being_score_month[3], 'm4_t':being_score_month[3]/df_at_counts[df_at_counts['month']==4]['count'].sum(),
#             'm5_s':being_score_month[4], 'm5_t':being_score_month[4]/df_at_counts[df_at_counts['month']==5]['count'].sum(),
#             'm6_s':being_score_month[5], 'm6_t':being_score_month[5]/df_at_counts[df_at_counts['month']==6]['count'].sum(),
#             'm7_s':being_score_month[6], 'm7_t':being_score_month[6]/df_at_counts[df_at_counts['month']==7]['count'].sum(),
#             'm8_s':being_score_month[7], 'm8_t':being_score_month[7]/df_at_counts[df_at_counts['month']==8]['count'].sum(),
#             'm9_s':being_score_month[8], 'm9_t':being_score_month[8]/df_at_counts[df_at_counts['month']==9]['count'].sum(),
#             'm10_s':being_score_month[9], 'm10_t':being_score_month[9]/df_at_counts[df_at_counts['month']==10]['count'].sum(),
#             'm11_s':being_score_month[10], 'm11_t':being_score_month[10]/df_at_counts[df_at_counts['month']==11]['count'].sum(),
#             'm12_s':being_score_month[11], 'm12_t':being_score_month[11]/df_at_counts[df_at_counts['month']==12]['count'].sum(),
#             'np_occurrence':np_occurrence, 'np_abundance':np_abundance,
#             'np_occurrenceabundance':np_occurrenceabundance}, ignore_index=True)
#         df_beings = df_beings.astype({'being_id': int, 'being_score': float, 'being_score_norm':float,
#             'm1_s': float, 'm2_s': float, 'm3_s': float, 'm4_s': float, 'm5_s': float, 'm6_s': float,
#             'm7_s': float, 'm8_s': float, 'm9_s': float, 'm10_s': float, 'm11_s': float, 'm12_s': float,
#             'm1_t': float, 'm2_t': float, 'm3_t': float, 'm4_t': float, 'm5_t': float, 'm6_t': float,
#             'm7_t': float, 'm8_t': float, 'm9_t': float, 'm10_t': float, 'm11_t': float, 'm12_t': float,
#             'np_occurrence':float, 'np_abundance':float, 'np_occurrenceabundance':float})
#     break ### /// for debug: do 1 park only
# df_beings

# Inputs from previous Notebooks

In [9]:
import pickle
infile=open(path_alltrails+'old_df_beings_allparks.db','rb')
df_beings = pickle.load(infile)
infile.close()
df_beings

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,m7_s,m7_t,m8_s,m8_t,m9_s,m9_t,np_abundance,np_occurrence,np_occurrenceabundance,park
0,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park
1,92386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park
2,92387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park
3,92388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park
4,92389,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2707,7712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park
2708,7713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park
2709,7714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park
2710,7715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park


In [10]:
infile=open(path_alltrails+'old_df_beings_trail_allparks.db','rb')
df_beings_trail = pickle.load(infile)
infile.close()
df_beings_trail

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,m7_t,m8_s,m8_t,m9_s,m9_t,np_abundance,np_occurrence,np_occurrenceabundance,park,trail
0,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park,belcher-river-trail-lone-star-trailhead-to-bel...
1,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park,biscuit-basin-interpretive-trail
2,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park,biscuit-basin-to-goose-lake-via-little-firehole
3,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park,black-sand-basin-loop-yellowstone-np
4,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,yellowstone-national-park,delacy-creek-trail-to-shoshone-lake
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46400,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park,south-rim-trail-boot-springs-trail
46401,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park,the-window-trail
46402,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park,upper-burro-mesa-pouroff
46403,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,big-bend-national-park,ward-spring-trail


In [11]:
infile=open(path_npspecies+'old_df_np_fauna_renum_nomissing_uniq_short.db','rb')
df_np_fauna_renum_nomissing_uniq_short = pickle.load(infile)
infile.close()
df_np_fauna_renum_nomissing_uniq_short

Unnamed: 0,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
0,Abraham Lincoln Birthplace National Historical...,American toad,[toad],Amphibian,Bufo americanus,1.0,0.6
1,Abraham Lincoln Birthplace National Historical...,Blue Dasher,[dasher],Insect,Pachydiplax longipennis,1.0,
2,Abraham Lincoln Birthplace National Historical...,Blue-headed Vireo,[vireo],Bird,Vireo solitarius,1.0,0.6
3,Abraham Lincoln Birthplace National Historical...,Brown Creeper,[creeper],Bird,Certhia americana,1.0,0.6
4,Abraham Lincoln Birthplace National Historical...,Carolina Chickadee,[chickadee],Bird,Poecile carolinensis,1.0,0.8
...,...,...,...,...,...,...,...
94173,Zion National Park,western patch-nosed snake,[snake],Reptile,Salvadora hexalepis,1.0,0.5
94174,Zion National Park,western rattlesnake,[rattlesnake],Reptile,Crotalus oreganus,1.0,0.6
94175,Zion National Park,western skink,[skink],Reptile,Eumeces skiltonianus,1.0,0.6
94176,Zion National Park,western whiptail,[whiptail],Reptile,Cnemidophorus tigris,1.0,1.0


In [12]:
df_trail_latlon = pd.read_csv(path_alltrails+'traillatlon.csv')
df_trail_latlon

Unnamed: 0.1,Unnamed: 0,park,trail,trail_lat,trail_lon,trail_url
0,0,yellowstone-national-park,upper-geyser-basin-and-old-faithful-observatio...,44.46011,-110.83002,https://www.alltrails.com/trail/us/wyoming/upp...
1,1,yellowstone-national-park,mystic-falls-fairy-creek-and-little-firehole-loop,44.48515,-110.85207,https://www.alltrails.com/trail/us/wyoming/mys...
2,2,yellowstone-national-park,observation-point-loop-trail,44.45952,-110.82641,https://www.alltrails.com/trail/us/wyoming/obs...
3,3,yellowstone-national-park,grand-prismatic-hot-spring,44.51531,-110.83260,https://www.alltrails.com/trail/us/wyoming/gra...
4,4,yellowstone-national-park,lone-star-geyser-trail,44.44448,-110.80460,https://www.alltrails.com/trail/us/wyoming/lon...
...,...,...,...,...,...,...
266,266,denali-national-park,jonesville-trail,63.73334,-148.89618,https://www.alltrails.com/trail/us/alaska/jone...
267,267,denali-national-park,roadside-trail--2,63.73034,-148.91945,https://www.alltrails.com/trail/us/alaska/road...
268,268,denali-national-park,mckinley-station-and-morino-loop-trail,63.73150,-148.91785,https://www.alltrails.com/trail/us/alaska/mcki...
269,269,denali-national-park,spruce-tree-trail,63.73136,-148.91762,https://www.alltrails.com/trail/us/alaska/spru...


# Join in species names

In [13]:
df_beings_s = df_beings.join(df_np_fauna_renum_nomissing_uniq_short, on='being_id')
df_beings_s

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,np_occurrence,np_occurrenceabundance,park,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
0,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
1,92386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,'Hesperis' Atlantis Fritillary,[fritillary],Insect,Speyeria atlantis hesperis,1.0,0.5
2,92387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Acmon Blue,[blue],Insect,"[Plebejus acmon, Plebejus acmon lutzi]",1.0,0.5
3,92388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Adam peaclam,[peaclam],Other Non-vertebrates,Pisidium adamsi,1.0,0.5
4,92389,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Afranius Duskywing,[duskywing],Insect,Erynnis afranius,1.0,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2707,7712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,big-bend-national-park,Big Bend National Park,threadfin shad,[shad],Fish,Dorosoma petenense,1.0,0.5
2708,7713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,big-bend-national-park,Big Bend National Park,warmouth,[warmouth],Fish,Lepomis gulosus,1.0,0.5
2709,7714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,big-bend-national-park,Big Bend National Park,western mosquitofish,[mosquitofish],Fish,Gambusia affinis,1.0,0.5
2710,7715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,big-bend-national-park,Big Bend National Park,white bass,[bass],Fish,Morone chrysops,1.0,0.5


In [14]:
df_beings_trail_s = df_beings_trail.join(df_np_fauna_renum_nomissing_uniq_short, on='being_id')
df_beings_trail_s

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,np_occurrenceabundance,park,trail,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
0,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,belcher-river-trail-lone-star-trailhead-to-bel...,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
1,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,biscuit-basin-interpretive-trail,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
2,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,biscuit-basin-to-goose-lake-via-little-firehole,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
3,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,black-sand-basin-loop-yellowstone-np,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
4,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,delacy-creek-trail-to-shoshone-lake,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46400,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,big-bend-national-park,south-rim-trail-boot-springs-trail,Big Bend National Park,yellow bullhead,[bullhead],Fish,Ameiurus natalis,1.0,0.5
46401,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,big-bend-national-park,the-window-trail,Big Bend National Park,yellow bullhead,[bullhead],Fish,Ameiurus natalis,1.0,0.5
46402,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,big-bend-national-park,upper-burro-mesa-pouroff,Big Bend National Park,yellow bullhead,[bullhead],Fish,Ameiurus natalis,1.0,0.5
46403,7716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,big-bend-national-park,ward-spring-trail,Big Bend National Park,yellow bullhead,[bullhead],Fish,Ameiurus natalis,1.0,0.5


In [15]:
# Consider cleaning datasets (df_beings_clean) like you did in previous notebooks

# Make models by season/month

In [16]:
def doy_to_season(day_of_year):
    if day_of_year in range(80, 172):
        return 1 # spring
    if day_of_year in range(172, 264):
        return 2 # summer
    if day_of_year in range(264, 355):
        return 3 # autumn / fall
    return 0 # winter

In [17]:
df_beings_trail_s[(df_beings_trail_s['park'] == 'yellowstone-national-park') & (df_beings_trail_s['trail'] == 'fairy-falls--4')].nlargest(10, columns=['m10_s'])

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,np_occurrenceabundance,park,trail,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
15295,92973,8.5,0.013449,1.5,0.0375,0.0,0.0,0.0,0.0,0.0,...,1.0,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,bison,"[bison, buffalo]",Mammal,Bison bison,1.0,1.0
241,92394,5.0,0.007911,1.0,0.025,0.0,0.0,0.0,0.0,0.0,...,0.8,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,American Black Bear,[bear],Mammal,Ursus americanus,1.0,0.8
6403,92631,5.0,0.007911,1.0,0.025,0.0,0.0,0.0,0.0,0.0,...,0.8,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,Grizzly Bear,[bear],Mammal,Ursus arctos horribilis,1.0,0.8
7,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
33,92386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,'Hesperis' Atlantis Fritillary,[fritillary],Insect,Speyeria atlantis hesperis,1.0,0.5
59,92387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,Acmon Blue,[blue],Insect,"[Plebejus acmon, Plebejus acmon lutzi]",1.0,0.5
85,92388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,Adam peaclam,[peaclam],Other Non-vertebrates,Pisidium adamsi,1.0,0.5
111,92389,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,Afranius Duskywing,[duskywing],Insect,Erynnis afranius,1.0,0.5
137,92390,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.4,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,American Avocet,[avocet],Bird,Recurvirostra americana,1.0,0.4
163,92391,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.8,yellowstone-national-park,fairy-falls--4,Yellowstone National Park,American Badger,[badger],Mammal,Taxidea taxus,1.0,0.8


In [18]:
df_beings_s.nlargest(10,columns=['being_score_norm']) #.merge(df_np_fauna_uniq_pop, left_index=True, right_index=True)

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,np_occurrence,np_occurrenceabundance,park,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
2087,7092,51.5,0.066883,8.5,0.223684,6.0,0.117647,8.5,0.108974,0.0,...,1.0,0.5,big-bend-national-park,Big Bend National Park,American Black Bear,[bear],Mammal,Ursus americanus,1.0,0.5
588,92973,33.5,0.053006,3.5,0.0875,3.0,0.176471,1.5,0.115385,0.0,...,1.0,1.0,yellowstone-national-park,Yellowstone National Park,bison,"[bison, buffalo]",Mammal,Bison bison,1.0,1.0
1352,64956,3.5,0.033333,1.5,0.375,0.0,0.0,0.0,,0.0,...,1.0,0.4,mesa-verde-national-park,Mesa Verde National Park,American Badger,[badger],Mammal,Taxidea taxus,1.0,0.4
9,92394,19.0,0.030063,3.0,0.075,0.0,0.0,0.5,0.038462,0.0,...,1.0,0.8,yellowstone-national-park,Yellowstone National Park,American Black Bear,[bear],Mammal,Ursus americanus,1.0,0.8
246,92631,19.0,0.030063,3.0,0.075,0.0,0.0,0.5,0.038462,0.0,...,1.0,0.8,yellowstone-national-park,Yellowstone National Park,Grizzly Bear,[bear],Mammal,Ursus arctos horribilis,1.0,0.8
1692,65296,3.0,0.028571,1.5,0.375,0.0,0.0,0.0,,0.0,...,1.0,0.8,mesa-verde-national-park,Mesa Verde National Park,mule deer,[deer],Mammal,Odocoileus hemionus,1.0,0.8
191,92576,11.5,0.018196,1.5,0.0375,0.0,0.0,0.0,0.0,0.0,...,1.0,0.6,yellowstone-national-park,Yellowstone National Park,Eurasian Elk,"[elk, moose]",Mammal,Alces alces,1.0,0.6
1437,65041,1.5,0.014286,0.0,0.0,0.0,0.0,0.0,,0.0,...,1.0,0.8,mesa-verde-national-park,Mesa Verde National Park,Collared Lizard,[lizard],Reptile,Crotaphytus collaris,1.0,0.8
1439,65043,1.5,0.014286,0.0,0.0,0.0,0.0,0.0,,0.0,...,1.0,0.4,mesa-verde-national-park,Mesa Verde National Park,Common Earless Lizard,[lizard],Reptile,Holbrookia maculata,1.0,0.4
1447,65051,1.5,0.014286,0.0,0.0,0.0,0.0,0.0,,0.0,...,1.0,0.6,mesa-verde-national-park,Mesa Verde National Park,Common Side-blotched Lizard,[lizard],Reptile,Uta stansburiana,1.0,0.6


In [19]:
# This puts most populous animals on top if any entries remain on the list with score=0.0 
df_beings_trail_s.sort_values('np_occurrenceabundance', ascending=False, inplace=True)
df_beings_s.sort_values('np_occurrenceabundance', ascending=False, inplace=True)

In [20]:
# # Testing how to combine data
# month_no = 2
# prev_month_no = ((month_no - 2) % 12) + 1
# next_month_no = ((month_no) % 12) + 1
# df_trail_info_curr_n = df_beings_trail_s[(df_beings_trail_s['park'] == 'yellowstone-national-park') & (df_beings_trail_s['trail'] == 'fairy-falls--4')].nlargest(10, columns=['m'+str(month_no)+'_s'])
# df_trail_info_curr_z = df_trail_info_curr_n[df_trail_info_curr_n['m'+str(month_no)+'_s'] > 0.0]
# df_trail_info_prev_n = df_beings_trail_s[(df_beings_trail_s['park'] == 'yellowstone-national-park') & (df_beings_trail_s['trail'] == 'fairy-falls--4')].nlargest(10, columns=['m'+str(prev_month_no)+'_s'])
# df_trail_info_prev_z = df_trail_info_prev_n[df_trail_info_prev_n['m'+str(prev_month_no)+'_s'] > 0.0]
# df_trail_info_next_n = df_beings_trail_s[(df_beings_trail_s['park'] == 'yellowstone-national-park') & (df_beings_trail_s['trail'] == 'fairy-falls--4')].nlargest(10, columns=['m'+str(next_month_no)+'_s'])
# df_trail_info_next_z = df_trail_info_next_n[df_trail_info_next_n['m'+str(next_month_no)+'_s'] > 0.0]
# df_trail_info_full_n = df_beings_trail_s[(df_beings_trail_s['park'] == 'yellowstone-national-park') & (df_beings_trail_s['trail'] == 'fairy-falls--4')].nlargest(10, columns=['being_score'])
# df_trail_info_full_z = df_trail_info_full_n[df_trail_info_full_n['being_score'] > 0.0]

# df_trail_info_tmp = pd.concat([df_trail_info_curr_z, df_trail_info_prev_z, df_trail_info_next_z, df_trail_info_full_z]).drop_duplicates(
#     subset=['being_id'], keep='first'
# ).reset_index().truncate(after=3)

# # print(df_trail_info_tmp)

# df_park_info_curr_n = df_beings_s[(df_beings_s['park'] == 'yellowstone-national-park')].nlargest(10, columns=['m'+str(month_no)+'_s']).copy()
# df_park_info_curr_z = df_park_info_curr_n[df_park_info_curr_n['m'+str(month_no)+'_s'] > 0.0]
# df_park_info_prev_n = df_beings_s[(df_beings_s['park'] == 'yellowstone-national-park')].nlargest(10, columns=['m'+str(prev_month_no)+'_s']).copy()
# df_park_info_prev_z = df_park_info_prev_n[df_park_info_prev_n['m'+str(prev_month_no)+'_s'] > 0.0]
# df_park_info_next_n = df_beings_s[(df_beings_s['park'] == 'yellowstone-national-park')].nlargest(10, columns=['m'+str(next_month_no)+'_s']).copy()
# df_park_info_next_z = df_park_info_next_n[df_park_info_next_n['m'+str(next_month_no)+'_s'] > 0.0]
# df_park_info_full_n = df_beings_s[(df_beings_s['park'] == 'yellowstone-national-park')].nlargest(10, columns=['being_score']).copy()
# df_park_info_full_z = df_park_info_full_n[df_park_info_full_n['being_score'] > 0.0]

# df_park_info_tmp = pd.concat([df_park_info_curr_z, df_park_info_prev_z, df_park_info_next_z, df_park_info_full_z]).drop_duplicates(
#     subset=['being_id'], keep='first'
# ).reset_index().truncate(after=3)

# # print(df_park_info_tmp)

In [36]:
# Define df_trail_info
number_of_top_beings_to_show = 4
df_trail_info_list = []
park_trail_list = df_beings_trail_s.groupby(['park','trail']).size()
for park_trail_index, park_trail_value in park_trail_list.iteritems():
    park = (park_trail_index[0])
    trail = (park_trail_index[1])
    for month in range(1,12+1):
        prev_month = ((month - 2) % 12) + 1
        next_month = (month % 12) + 1
        
        # Original code for next few lines: # df_trail_info_tmp = df_beings_trail_s[(df_beings_trail_s['park'] == park) & (df_beings_trail_s['trail'] == trail)].nlargest(number_of_top_beings_to_show, columns=['m'+str(month)+'_s']).copy()
        df_trail_info_curr_n = df_beings_trail_s[(df_beings_trail_s['park']==park) & (df_beings_trail_s['trail']==trail)].nlargest(number_of_top_beings_to_show, columns=['m'+str(month)+'_s']).copy()
        df_trail_info_curr_z = df_trail_info_curr_n[df_trail_info_curr_n['m'+str(month)+'_s'] > 0.0]
        df_trail_info_prev_n = df_beings_trail_s[(df_beings_trail_s['park']==park) & (df_beings_trail_s['trail']==trail)].nlargest(number_of_top_beings_to_show, columns=['m'+str(prev_month)+'_s']).copy()
        df_trail_info_prev_z = df_trail_info_prev_n[df_trail_info_prev_n['m'+str(prev_month)+'_s'] > 0.0]
        df_trail_info_next_n = df_beings_trail_s[(df_beings_trail_s['park']==park) & (df_beings_trail_s['trail']==trail)].nlargest(number_of_top_beings_to_show, columns=['m'+str(next_month)+'_s']).copy()
        df_trail_info_next_z = df_trail_info_next_n[df_trail_info_next_n['m'+str(next_month)+'_s'] > 0.0]
        df_trail_info_full_n = df_beings_trail_s[(df_beings_trail_s['park']==park) & (df_beings_trail_s['trail']==trail)].nlargest(number_of_top_beings_to_show, columns=['being_score']).copy()
        df_trail_info_full_z = df_trail_info_full_n # df_trail_info_full_n[df_trail_info_full_n['being_score'] > 0.0]
        df_trail_info_tmp = pd.concat([df_trail_info_curr_z, df_trail_info_prev_z, df_trail_info_next_z, df_trail_info_full_z]).drop_duplicates(
            subset=['being_id'], keep='first'
        ).reset_index().truncate(after=number_of_top_beings_to_show-1)
        
        # if (park == 'yellowstone-national-park' and trail == 'fairy-falls--4'):
        #     print('m'+str(month)+'_s')
        #     print(df_trail_info_tmp)
        
        # if (df_trail_info_tmp.shape[0]==0):
        
        df_trail_info_tmp['order'] = np.arange(1,number_of_top_beings_to_show+1) # df_trail_info_tmp.index+1 # np.arange(1,number_of_top_beings_to_show+1)
        
        # if (park == 'yellowstone-national-park' and trail == 'fairy-falls--4'):
        #     print('effect of adding order:')
        #     print(df_trail_info_tmp)
        
        # May add more info like 'Scientific Name', 'Occurrence', 'Abundance'
        df_trail_info_tmp['animals'] = '<br>' + df_trail_info_tmp['order'].apply(str) + '. ' + \
            df_trail_info_tmp['Long name str'] + ' (' + df_trail_info_tmp['Category'].str.lower() + '): ' + \
            (df_trail_info_tmp['Occurrence']*df_trail_info_tmp['Abundance']).replace({
                0.0: 'rare',
                0.2: 'uncommon',
                0.4: 'uncommon',
                0.5: 'uncommon',
                0.6: 'common',
                0.8: 'common',
                1.0: 'abundant',
                np.nan: 'unknown',
            }) # .apply(str)
        df_trail_info_tmp['month'] = month
        
        # df_trail_info_tmp2 = df_trail_info_tmp.merge(df_trail_latlon[['park','trail','trail_lat','trail_lon']], how='inner', on=['park','trail'])
        df_trail_info_tmp2 = df_trail_info_tmp.reset_index().copy()
        df_trail_info_tmp2['trail_lat'] = 44.586381
        df_trail_info_tmp2['trail_lon'] = -110.868486
        
        for tmp2_index, tmp2_row in df_trail_info_tmp2.iterrows():
            df_trail_info_tmp2.iat[tmp2_index,df_trail_info_tmp2.columns.get_loc('trail_lat')] = \
                float(df_trail_latlon[(df_trail_latlon['park']==tmp2_row['park']) & (df_trail_latlon['trail']==tmp2_row['trail'])].iloc[0]['trail_lat'])
            df_trail_info_tmp2.iat[tmp2_index,df_trail_info_tmp2.columns.get_loc('trail_lon')] = \
                float(df_trail_latlon[(df_trail_latlon['park']==tmp2_row['park']) & (df_trail_latlon['trail']==tmp2_row['trail'])].iloc[0]['trail_lon'])
            
        # if (park == 'yellowstone-national-park' and trail == 'fairy-falls--4'):
        #     print('df_trail_info_tmp2_2nd:')
        #     print(df_trail_info_tmp2)
        
        # df_trail_info_tmp3 = df_trail_info_tmp2.merge(df_park_list[['trail','trail_name']], how='left', on=['trail'])
        df_trail_info_tmp3 = df_trail_info_tmp2.copy()
        park_trail_str = (df_trail_info_tmp3['park'] + '_' + df_trail_info_tmp3['trail']).iloc[0]
        df_trail_info_tmp3['trail_name'] = str(df_park_list_lookup[df_park_list_lookup['park_trail']==park_trail_str]['trail_name'].iloc[0])
        df_trail_info_tmp3.rename(columns={'Park Name':'park_name'}, inplace=True)
        
        # if (park == 'yellowstone-national-park' and trail == 'fairy-falls--4'):
        #     print('df_trail_info_tmp3_3rd:')
        #     print(df_trail_info_tmp3)
        
        df_trail_info_list.append(df_trail_info_tmp3.groupby(['park','trail','park_name','trail_name','trail_lat','trail_lon','month']).agg({
           'animals': 'sum'
        }).reset_index())
        # break
    # break
if (df_trail_info_list):
    df_trail_info = pd.concat(df_trail_info_list)
# df_trail_latlon has trail latitude, longitude data
# df_park_list has park, park_name, trail, trail_name data
df_trail_info

Unnamed: 0,park,trail,park_name,trail_name,trail_lat,trail_lon,month,animals
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,1,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,2,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,3,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,4,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,5,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
...,...,...,...,...,...,...,...,...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,8,<br>1. American Black Bear (mammal): common<br...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,9,<br>1. American Black Bear (mammal): common<br...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,10,<br>1. American Black Bear (mammal): common<br...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,11,<br>1. American Black Bear (mammal): common<br...


In [37]:
# Define df_park_info
number_of_top_beings_to_show = 5
df_park_info_list = []
park_list = df_beings_s.groupby(['park']).size()
for park_index, park_value in park_list.iteritems():
    park = park_index # (park_index[0])
    for month in range(1,12+1):
        prev_month = ((month - 2) % 12) + 1
        next_month = (month % 12) + 1
        
        # Original code for next few lines: # df_park_info_tmp = df_beings_s[(df_beings_s['park'] == park)].nlargest(number_of_top_beings_to_show, columns=['m'+str(month)+'_s']).copy()
        df_park_info_curr_n = df_beings_s[(df_beings_s['park']==park)].nlargest(number_of_top_beings_to_show, columns=['m'+str(month)+'_s']).copy()
        df_park_info_curr_z = df_park_info_curr_n[df_park_info_curr_n['m'+str(month)+'_s'] > 0.0]
        df_park_info_prev_n = df_beings_s[(df_beings_s['park']==park)].nlargest(number_of_top_beings_to_show, columns=['m'+str(prev_month)+'_s']).copy()
        df_park_info_prev_z = df_park_info_prev_n[df_park_info_prev_n['m'+str(prev_month)+'_s'] > 0.0]
        df_park_info_next_n = df_beings_s[(df_beings_s['park']==park)].nlargest(number_of_top_beings_to_show, columns=['m'+str(next_month)+'_s']).copy()
        df_park_info_next_z = df_park_info_next_n[df_park_info_next_n['m'+str(next_month)+'_s'] > 0.0]
        df_park_info_full_n = df_beings_s[(df_beings_s['park']==park)].nlargest(number_of_top_beings_to_show, columns=['being_score']).copy()
        df_park_info_full_z = df_park_info_full_n # df_park_info_full_n[df_park_info_full_n['being_score'] > 0.0]

        df_park_info_tmp = pd.concat([df_park_info_curr_z, df_park_info_prev_z, df_park_info_next_z, df_park_info_full_z]).drop_duplicates(
            subset=['being_id'], keep='first'
        ).reset_index().truncate(after=number_of_top_beings_to_show-1)
        
        df_park_info_tmp['order'] = df_park_info_tmp.index+1 # np.arange(1,number_of_top_beings_to_show+1)
        
        # May add more info like 'Scientific Name', 'Occurrence', 'Abundance'
        df_park_info_tmp['animals'] = '<br>' + df_park_info_tmp['order'].apply(str) + '. ' + \
            df_park_info_tmp['Long name str'] + ' (' + df_park_info_tmp['Category'].str.lower() + '): ' + \
            (df_park_info_tmp['Occurrence']*df_park_info_tmp['Abundance']).replace({
                0.0: 'rare',
                0.2: 'uncommon',
                0.4: 'uncommon',
                0.5: 'uncommon',
                0.6: 'common',
                0.8: 'common',
                1.0: 'abundant',
                np.nan: 'unknown',
            }) # .apply(str)
        df_park_info_tmp['month'] = month
        df_park_info_tmp.rename(columns={'Park Name':'park_name'}, inplace=True)
        # df_park_info_tmp = df_park_info_tmp.merge(df_park_list[['park','park_name']], how='inner', on=['park'])
        df_park_info_list.append(df_park_info_tmp.groupby(['park','park_name','month']).agg({
            'animals': 'sum'
        }).reset_index())
if (df_park_info_list):
    df_park_info = pd.concat(df_park_info_list)
# df_park_list has park, park_name, trail, trail_name data

In [47]:
df_park_info.replace('Fort Laramie National Historic Site', 'Fort Lauderdale State Parks', inplace=True)
df_park_info.replace('Natural Bridges National Monument', 'Natural Bridge State Park', inplace=True)
df_trail_info.replace('Fort Laramie National Historic Site', 'Fort Lauderdale State Parks', inplace=True)
df_trail_info.replace('Natural Bridges National Monument', 'Natural Bridge State Park', inplace=True)

In [38]:
df_park_info[df_park_info['park_name'] == 'Petrified Forest National Park']

Unnamed: 0,park,park_name,month,animals
0,petrified-forest-national-park,Petrified Forest National Park,1,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,2,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,3,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,4,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,5,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,6,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,7,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,8,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,9,<br>1. Gunnison's Prairie Dog (mammal): uncomm...
0,petrified-forest-national-park,Petrified Forest National Park,10,<br>1. Gunnison's Prairie Dog (mammal): uncomm...


In [39]:
df_beings_s[df_beings_s['Park Name'] == 'Petrified Forest National Park'].nlargest(50,columns=['being_score'])

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,np_occurrence,np_occurrenceabundance,park,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
859,74298,0.5,0.005376,0.0,0.0,0.0,0.0,0.5,0.5,0.0,...,1.0,0.5,petrified-forest-national-park,Petrified Forest National Park,Gunnison's Prairie Dog,[dog],Mammal,Cynomys gunnisoni,1.0,0.5
831,74270,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Eastern Fence Lizard,[lizard],Reptile,Sceloporus undulatus,1.0,1.0
965,74404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Sagebrush Lizard,[lizard],Reptile,Sceloporus graciosus,1.0,1.0
937,74376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Plateau Striped Whiptail,[whiptail],Reptile,Cnemidophorus velox,1.0,1.0
866,74305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Horned Lark,[lark],Bird,Eremophila alpestris,1.0,1.0
769,74208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Black-throated Sparrow,[sparrow],Bird,Amphispiza bilineata,1.0,1.0
900,74339,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Mexican Spadefoot,[spadefoot],Amphibian,Spea multiplicata,1.0,1.0
890,74329,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Loggerhead Shrike,[shrike],Bird,Lanius ludovicianus,1.0,1.0
850,74289,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,petrified-forest-national-park,Petrified Forest National Park,Great Plains Toad,[toad],Amphibian,Bufo cognatus,1.0,1.0
816,74255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.8,petrified-forest-national-park,Petrified Forest National Park,Common Side-blotched Lizard,[lizard],Reptile,Uta stansburiana,1.0,0.8


In [48]:
outfile=open(path_webapp_db_file,'wb')
pickle.dump(df_trail_info,outfile)
pickle.dump(df_park_info,outfile)
outfile.close()

In [41]:
df_trail_info['animals'].iloc[-1]

'<br>1. American Black Bear (mammal): common<br>2. Grizzly Bear (mammal): common<br>3. Red-tailed Hawk (bird): abundant<br>4. brown trout (fish): abundant'

In [42]:
df_trail_info

Unnamed: 0,park,trail,park_name,trail_name,trail_lat,trail_lon,month,animals
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,1,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,2,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,3,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,4,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
0,big-bend-national-park,basin-drive,Big Bend National Park,Basin Drive,29.33452,-103.25668,5,<br>1. Say's Phoebe (bird): abundant<br>2. Whi...
...,...,...,...,...,...,...,...,...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,8,<br>1. American Black Bear (mammal): common<br...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,9,<br>1. American Black Bear (mammal): common<br...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,10,<br>1. American Black Bear (mammal): common<br...
0,yellowstone-national-park,yellowstone-observation-point,Yellowstone National Park,Yellowstone Observation Point,44.45925,-110.82629,11,<br>1. American Black Bear (mammal): common<br...


In [43]:
df_park_info

Unnamed: 0,park,park_name,month,animals
0,big-bend-national-park,Big Bend National Park,1,<br>1. American Black Bear (mammal): uncommon<...
0,big-bend-national-park,Big Bend National Park,2,<br>1. American Black Bear (mammal): uncommon<...
0,big-bend-national-park,Big Bend National Park,3,<br>1. American Black Bear (mammal): uncommon<...
0,big-bend-national-park,Big Bend National Park,4,<br>1. Western Diamond-backed Rattlesnake (rep...
0,big-bend-national-park,Big Bend National Park,5,<br>1. American Black Bear (mammal): uncommon<...
...,...,...,...,...
0,yellowstone-national-park,Yellowstone National Park,8,<br>1. bison (mammal): abundant<br>2. Rocky Mo...
0,yellowstone-national-park,Yellowstone National Park,9,<br>1. bison (mammal): abundant<br>2. Grizzly ...
0,yellowstone-national-park,Yellowstone National Park,10,<br>1. bison (mammal): abundant<br>2. Grizzly ...
0,yellowstone-national-park,Yellowstone National Park,11,<br>1. bison (mammal): abundant<br>2. Common R...


In [343]:
df_beings_s.nlargest(50,columns=['m6_s'])#.merge(df_np_fauna_uniq_pop, left_index=True, right_index=True)

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,np_occurrence,np_occurrenceabundance,park,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
9,92394,19.0,0.030063,3.0,0.075,0.0,0.0,0.5,0.038462,0.0,...,1.0,0.8,yellowstone-national-park,Yellowstone National Park,American Black Bear,[bear],Mammal,Ursus americanus,1.0,0.8
246,92631,19.0,0.030063,3.0,0.075,0.0,0.0,0.5,0.038462,0.0,...,1.0,0.8,yellowstone-national-park,Yellowstone National Park,Grizzly Bear,[bear],Mammal,Ursus arctos horribilis,1.0,0.8
588,92973,33.5,0.053006,3.5,0.0875,3.0,0.176471,1.5,0.115385,0.0,...,1.0,1.0,yellowstone-national-park,Yellowstone National Park,bison,"[bison, buffalo]",Mammal,Bison bison,1.0,1.0
7,92392,1.5,0.002373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.6,yellowstone-national-park,Yellowstone National Park,American Beaver,[beaver],Mammal,Castor canadensis,1.0,0.6
0,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
1,92386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,'Hesperis' Atlantis Fritillary,[fritillary],Insect,Speyeria atlantis hesperis,1.0,0.5
2,92387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Acmon Blue,[blue],Insect,"[Plebejus acmon, Plebejus acmon lutzi]",1.0,0.5
3,92388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Adam peaclam,[peaclam],Other Non-vertebrates,Pisidium adamsi,1.0,0.5
4,92389,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Afranius Duskywing,[duskywing],Insect,Erynnis afranius,1.0,0.5
5,92390,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.4,yellowstone-national-park,Yellowstone National Park,American Avocet,[avocet],Bird,Recurvirostra americana,1.0,0.4


In [344]:
df_beings_s.nlargest(50,columns=['m3_s'])#.merge(df_np_fauna_uniq_pop, left_index=True, right_index=True)

Unnamed: 0,being_id,being_score,being_score_norm,m10_s,m10_t,m11_s,m11_t,m12_s,m12_t,m1_s,...,np_occurrence,np_occurrenceabundance,park,Park Name,Long name str,Short name,Category,Scientific Name,Occurrence,Abundance
191,92576,11.5,0.018196,1.5,0.0375,0.0,0.0,0.0,0.0,0.0,...,1.0,0.6,yellowstone-national-park,Yellowstone National Park,Eurasian Elk,"[elk, moose]",Mammal,Alces alces,1.0,0.6
422,92807,7.0,0.011076,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,yellowstone-national-park,Yellowstone National Park,Rocky Mountain Elk,[elk],Mammal,Cervus elaphus,1.0,1.0
0,92385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,'Anicia' Variable Checkerspot,[checkerspot],Insect,Euphydryas chalcedona anicia,1.0,0.5
1,92386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,'Hesperis' Atlantis Fritillary,[fritillary],Insect,Speyeria atlantis hesperis,1.0,0.5
2,92387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Acmon Blue,[blue],Insect,"[Plebejus acmon, Plebejus acmon lutzi]",1.0,0.5
3,92388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Adam peaclam,[peaclam],Other Non-vertebrates,Pisidium adamsi,1.0,0.5
4,92389,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.5,yellowstone-national-park,Yellowstone National Park,Afranius Duskywing,[duskywing],Insect,Erynnis afranius,1.0,0.5
5,92390,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.4,yellowstone-national-park,Yellowstone National Park,American Avocet,[avocet],Bird,Recurvirostra americana,1.0,0.4
6,92391,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.8,yellowstone-national-park,Yellowstone National Park,American Badger,[badger],Mammal,Taxidea taxus,1.0,0.8
7,92392,1.5,0.002373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.6,yellowstone-national-park,Yellowstone National Park,American Beaver,[beaver],Mammal,Castor canadensis,1.0,0.6


In [206]:
df_trail_info[df_trail_info['park'] == 'yellowstone-national-park']['animals']

0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
                           ...                        
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
0    <br>1. 'Anicia' Variable Checkerspot (insect):...
Name: animals, Length: 312, dtype: object