In [449]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [450]:
US_trails = pd.read_csv('../data/US_trails_half_step.csv')
US_trails.shape

(26754, 24)

In [451]:
US_trails.columns

Index(['ascent', 'conditionDate', 'conditionDetails', 'conditionStatus',
       'descent', 'difficulty', 'high', 'id', 'imgMedium', 'imgSmall',
       'imgSmallMed', 'imgSqSmall', 'latitude', 'length', 'location',
       'longitude', 'low', 'name', 'starVotes', 'stars', 'summary', 'type',
       'url', 'State'],
      dtype='object')

In [452]:
US_trails_one_step = pd.read_csv('../data/US_trails_1step.csv')
US_trails_one_step.shape
#this goes to show that half_step brought in more trails than one step

(24754, 25)

In [453]:
US_trails_one_step['type'].value_counts()

Trail            17393
Connector         4719
Featured Ride     2642
Name: type, dtype: int64

In [454]:
US_trails['type'].value_counts()
#half step picked up a lot more Connectors, but 9 Featured Rides

Trail            18297
Connector         5804
Featured Ride     2653
Name: type, dtype: int64

In [455]:
US_trails[US_trails['type'] == 'Connector']['name']

33                                          Northern Trail
44                                            Kitchen Sink
45                                             Gumbo Limbo
47                                              Cabin Loop
48                                         Half Pipe Trail
57                                           Dragon's Gate
58                                               El Diablo
59                                           Dragon's Tail
60                                      Stairway To Heaven
61                                          Christmas Tree
62                                                  Wicked
63                                            Kitchen Sink
64                                                Gilligan
65                                              Rocky Mile
66                                          Buzzard Bridge
67                                       Gates of Delirium
68                                            Warm-Up Lo

In [456]:
US_trails[US_trails['type'] == 'Connector']['stars'].value_counts()

0.0    5079
3.0     351
4.0     133
2.0     115
5.0      90
1.0      19
2.5       7
3.5       6
4.5       3
1.5       1
Name: stars, dtype: int64

In [457]:
US_trails[US_trails['type'] == 'Connector']['stars'].count()

5804

In [458]:
#85 percent of Connectors are rated as 0, which means they probably weren't rated so you might need to delete them
4031/4709

0.8560203864939477

In [459]:
Connectors = US_trails[US_trails['type']=='Connector']
Connectors.shape

(5804, 24)

In [460]:
Connectors['difficulty'].value_counts()

green        2371
blue         1329
greenBlue    1251
black         469
blueBlack     323
dblack         51
missing        10
Name: difficulty, dtype: int64

In [461]:
types = ['Trail', 'Featured Ride']
US_trails = US_trails[US_trails['type'].isin(types)].reset_index(drop=True)

In [462]:
US_trails.shape

(20950, 24)

In [463]:
US_trails.groupby('State')['name', 'id'].nunique()
#this goes to show that every state except 9 have trails that have the same name

Unnamed: 0_level_0,name,id
State,Unnamed: 1_level_1,Unnamed: 2_level_1
alabama,253,262
alaska,145,145
arizona,762,781
arkansas,300,314
california,2559,2681
colorado,1901,1953
connecticut,184,211
dc,5,5
delaware,53,53
florida,354,369


In [464]:
US_trails.columns

Index(['ascent', 'conditionDate', 'conditionDetails', 'conditionStatus',
       'descent', 'difficulty', 'high', 'id', 'imgMedium', 'imgSmall',
       'imgSmallMed', 'imgSqSmall', 'latitude', 'length', 'location',
       'longitude', 'low', 'name', 'starVotes', 'stars', 'summary', 'type',
       'url', 'State'],
      dtype='object')

In [465]:
#dropping trails that are unnecessary because they are image links and condition statuses
#I'm dropping low and high because I don't need the elevation points
#I'm dropping starVotes, summary, and url, though I'd like to include those as outputs on the website
US_trails.drop(['conditionDate','conditionDetails', 'conditionStatus', 
                'high', 'id', 'imgMedium', 'imgSmall', 
                'imgSmallMed', 'imgSqSmall', 'low', 
               'starVotes'],
               axis = 1, inplace = True) 

In [466]:
US_trails.isnull().any()

ascent        False
descent       False
difficulty    False
latitude      False
length        False
location      False
longitude     False
name          False
stars         False
summary        True
type          False
url           False
State         False
dtype: bool

In [467]:
US_trails['summary'].isnull().sum()

9

In [468]:
ordered_columns = ['name','location','State','type',
                   'difficulty','length','ascent',
                   'descent','stars','latitude','longitude']
US_trails = US_trails[ordered_columns]

In [469]:
burro = US_trails[US_trails['name'] == 'Burro Trail']
burro.shape

(2, 11)

In [470]:
names = list(US_trails['name'].unique())
names

['Peacock Flats',
 'Manana Out and Back',
 'Waimanalo MTB',
 'Kealia Trail',
 'Ohana Trail',
 'Manana Trail',
 'Kuaokala Ridge Trail',
 'Maunawili Ditch Trail',
 'Mokuleia Firebreak Road',
 'Kuaokala Firebreak Road',
 'Mokuleia Access Road',
 'Skyline Downhill',
 'Haleakala Shuttle',
 'Polipoli Springs Loop',
 'Skyline',
 'Mamane',
 'Waipoli doubletrack',
 'Kahakapao Loop',
 'Kaloko Loop',
 'Pineapple Express',
 'Kahakapao West',
 'Ravine',
 'Kahakapao East',
 'Renegade',
 'Tweener',
 'Tree Stump Trail',
 'Jurassic Park Trail',
 'Flat Line Trail',
 'Pig Skull Trail',
 'Old Hao Road',
 'Rowdy Bend Trail',
 'Snake Bight Trail',
 'Long Pine Key Trail',
 'Main Trail',
 'Southern Trail',
 'Tram Road',
 'North Point Trails',
 'Amelia Earhart MTB Trails',
 "Gilligan's Island Trail",
 'Oleta State Park',
 'Goldstick Trail',
 'Purple Haze',
 'Strangler Fig',
 'Gator Bank Trail',
 'Little Razor Back',
 'Chupacabra',
 'Start Me Up',
 'Crossing the Swamp',
 'East Bound and Down',
 'Paintball Loop'

In [471]:
#I need to find out the trails that have two names; are they in the same location within a state?
multiple_names = []
for name in names:
    df = US_trails[US_trails['name'] == name]
    if df.shape[0] > 1:
        multiple_names.append(df)

In [472]:
many_names = pd.concat(multiple_names, ignore_index = True)
many_names.shape

(3733, 11)

In [473]:
#This shows that there are a lot of trails with the same name, but they are often in different states, and if there are
#repeated names within a state, they are often in different locations
#many_names.head(20)

In [474]:
many_names.shape

(3733, 11)

In [475]:
#from many_names I need to find out if there are names that are in the same state and location
#for each unique name in many_names, get the state of the name, create dataframes, if their shape[0] is more than 1,
#append them to a list of dataframes, then concat them so I can get a better look
unique_names = list(many_names['name'].unique())
len(unique_names) #there are 1148 names that are repeated in my set of trails

1148

In [476]:
many_names[many_names['name']=='Skyline']['State'].value_counts().to_dict()
    

{'hawaii': 1, 'nevada': 1, 'west virginia': 1}

In [477]:
many_names[many_names['name']=='Burro Trail']['State'].value_counts()

colorado    2
Name: State, dtype: int64

In [478]:
dataframes = []
for name in unique_names:
    dictionary = many_names[many_names['name'] == name]['State'].value_counts().to_dict()
    for state, number in dictionary.items():
        if number > 2:
            df = many_names[(many_names['name'] == name) & (many_names['State']==state)]
            dataframes.append(df)

In [479]:
multiple_names_state = pd.concat(dataframes, ignore_index = True)
multiple_names_state.shape
#there are 410 names that are multiples in one state 

(410, 11)

In [480]:
names = multiple_names_state['name'].unique()
len(list(names))
#there are 70 unique names amongst 410 that are repeated

70

In [481]:
multiple_names_state[multiple_names_state['name']=='Green Trail']['location'].value_counts()

Dawsonville, Georgia               1
Baden, Maryland                    1
Oneonta, New York                  1
Easton, Connecticut                1
Croom, Maryland                    1
Medicine Park, Oklahoma            1
Fair Hill, Maryland                1
Mount Zion, Georgia                1
Lakewood, New York                 1
Fort Bragg, North Carolina         1
, Michigan                         1
Berrien Springs, Michigan          1
Mineral Springs, North Carolina    1
Adrian, Michigan                   1
Glens Falls North, New York        1
Bethel, Connecticut                1
Webster, New York                  1
Savage, Maryland                   1
Naples, New York                   1
Fairlawn, Ohio                     1
Brighton, New York                 1
Devola, Ohio                       1
Pumpkin Center, North Carolina     1
Athens, Georgia                    1
Stillwater, Oklahoma               1
Clinton, Maryland                  1
New Concord, Ohio                  1
C

In [482]:
frames = []
for name in names:
    dictionary = multiple_names_state[multiple_names_state['name'] == name]['location'].value_counts().to_dict()
    for location, number in dictionary.items():
        if number > 2:
            df = multiple_names_state[(multiple_names_state['name'] == name) & (multiple_names_state['location']==location)]
            frames.append(df)

In [483]:
multiple_names_location = pd.concat(frames, ignore_index = True)
multiple_names_location.shape

(13, 11)

In [484]:
multiple_names_location.head(13)
#this goes to show that there are only 4 locations in the entire country that have trails with the same names

Unnamed: 0,name,location,State,type,difficulty,length,ascent,descent,stars,latitude,longitude
0,Northeast Blue,"Lake Lindsey, Florida",florida,Trail,greenBlue,1.0,21,-21,4.0,28.6218,-82.2641
1,Northeast Blue,"Lake Lindsey, Florida",florida,Trail,greenBlue,0.5,6,-8,4.0,28.6201,-82.2595
2,Northeast Blue,"Lake Lindsey, Florida",florida,Trail,green,1.2,46,-42,3.0,28.6169,-82.2483
3,Wildwood Canyon Trail,"Casa Conejo, California",california,Trail,blueBlack,0.2,53,0,4.0,34.2149,-118.9111
4,Wildwood Canyon Trail,"Casa Conejo, California",california,Trail,blue,0.7,46,-33,3.0,34.2114,-118.9243
5,Wildwood Canyon Trail,"Casa Conejo, California",california,Trail,greenBlue,0.9,85,0,2.0,34.2131,-118.9093
6,Wildwood Canyon Trail,"Casa Conejo, California",california,Trail,greenBlue,0.4,46,-1,2.0,34.2149,-118.9159
7,Sedgwick County Park,"Wichita, Kansas",kansas,Trail,green,0.8,4,-3,4.0,37.7095,-97.4175
8,Sedgwick County Park,"Wichita, Kansas",kansas,Trail,green,1.6,10,-14,4.0,37.723,-97.4123
9,Sedgwick County Park,"Wichita, Kansas",kansas,Trail,green,1.0,16,-1,4.0,37.71,-97.4216


In [485]:
US_trails['stars'].value_counts()

3.0    4963
4.0    4527
5.0    1957
2.0    1483
3.5    1046
4.5     924
4.3     705
0.0     594
3.7     495
3.3     441
3.8     407
2.5     402
4.7     371
4.4     297
4.2     294
4.8     260
4.6     227
4.1     212
3.6     176
3.9     172
3.4     147
2.7     137
1.0     129
2.8     101
2.3     101
3.2     101
4.9      71
1.5      56
3.1      43
2.6      26
2.9      20
1.7      18
2.2      16
2.4      15
1.8      10
1.3       4
2.1       1
1.2       1
Name: stars, dtype: int64

In [486]:
US_trails['type'].value_counts()

Trail            18297
Featured Ride     2653
Name: type, dtype: int64

In [487]:
US_trails['trail_type'] = US_trails['type']

In [488]:
US_trails = pd.get_dummies(US_trails, columns=['type'])

In [489]:
US_trails.head()

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail
0,Peacock Flats,"Mokulēia, Hawaii",hawaii,dblack,22.0,3168,-3168,5.0,21.5816,-158.1929,Featured Ride,1,0
1,Manana Out and Back,"Waimalu, Hawaii",hawaii,black,3.5,362,-361,4.0,21.4301,-157.9385,Featured Ride,1,0
2,Waimanalo MTB,"Waimanalo, Hawaii",hawaii,greenBlue,3.5,279,-285,3.0,21.3426,-157.7429,Featured Ride,1,0
3,Kealia Trail,"Mokulēia, Hawaii",hawaii,dblack,2.8,2,-1681,5.0,21.554,-158.2139,Trail,0,1
4,Ohana Trail,"Maunawili, Hawaii",hawaii,blue,4.2,639,-773,4.7,21.3721,-157.7452,Trail,0,1


In [490]:
US_trails['difficulty'].value_counts()

blue         6810
greenBlue    4624
blueBlack    3497
green        3231
black        2323
dblack        438
missing        27
Name: difficulty, dtype: int64

In [491]:
difficulty = ['dblack', 'black', 'greenBlue', 'blue', 'blueBlack', 'green']
US_trails = US_trails[US_trails['difficulty'].isin(difficulty)].reset_index(drop=True)

In [492]:
def difficulty_encode(string):
    if string == 'green':
        return 1
    if string == 'greenBlue':
        return 2
    if string == 'blue':
        return 3
    if string == 'blueBlack':
        return 4
    if string == 'black':
        return 5
    if string == 'dblack':
        return 6

In [493]:
US_trails['difficulty_encoded'] = US_trails['difficulty'].apply(difficulty_encode)

In [494]:
US_trails.head()

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded
0,Peacock Flats,"Mokulēia, Hawaii",hawaii,dblack,22.0,3168,-3168,5.0,21.5816,-158.1929,Featured Ride,1,0,6
1,Manana Out and Back,"Waimalu, Hawaii",hawaii,black,3.5,362,-361,4.0,21.4301,-157.9385,Featured Ride,1,0,5
2,Waimanalo MTB,"Waimanalo, Hawaii",hawaii,greenBlue,3.5,279,-285,3.0,21.3426,-157.7429,Featured Ride,1,0,2
3,Kealia Trail,"Mokulēia, Hawaii",hawaii,dblack,2.8,2,-1681,5.0,21.554,-158.2139,Trail,0,1,6
4,Ohana Trail,"Maunawili, Hawaii",hawaii,blue,4.2,639,-773,4.7,21.3721,-157.7452,Trail,0,1,3


In [495]:
US_trails['difficulty_encoded'].value_counts()

3    6810
2    4624
4    3497
1    3231
5    2323
6     438
Name: difficulty_encoded, dtype: int64

In [496]:
US_trails['difficulty'].value_counts()

blue         6810
greenBlue    4624
blueBlack    3497
green        3231
black        2323
dblack        438
Name: difficulty, dtype: int64

In [497]:
def get_difficulty(string):
    if string == 'greenBlue':
        return 'Green/Blue'
    if string == 'blueBlack':
        return 'Blue/Black'
    if string == 'dblack':
        return 'Double Black'
    else:
        return string.title()

In [498]:
US_trails['difficulty'] = US_trails['difficulty'].apply(get_difficulty)

In [499]:
US_trails['difficulty'].value_counts()

Blue            6810
Green/Blue      4624
Blue/Black      3497
Green           3231
Black           2323
Double Black     438
Name: difficulty, dtype: int64

In [500]:
def get_location(string):
    location_state = string.split(',')
    return location_state[0]

In [501]:
location = 'Grand Junction, Colorado'
get_location(location)

'Grand Junction'

In [502]:
US_trails['location'] = US_trails['location'].apply(get_location)

In [503]:
US_trails.head()

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded
0,Peacock Flats,Mokulēia,hawaii,Double Black,22.0,3168,-3168,5.0,21.5816,-158.1929,Featured Ride,1,0,6
1,Manana Out and Back,Waimalu,hawaii,Black,3.5,362,-361,4.0,21.4301,-157.9385,Featured Ride,1,0,5
2,Waimanalo MTB,Waimanalo,hawaii,Green/Blue,3.5,279,-285,3.0,21.3426,-157.7429,Featured Ride,1,0,2
3,Kealia Trail,Mokulēia,hawaii,Double Black,2.8,2,-1681,5.0,21.554,-158.2139,Trail,0,1,6
4,Ohana Trail,Maunawili,hawaii,Blue,4.2,639,-773,4.7,21.3721,-157.7452,Trail,0,1,3


In [504]:
def capitalize_state(string):
    if string == 'dc':
        return string.upper()
    else:
        return string.title()

In [505]:
US_trails['State'] = US_trails['State'].apply(capitalize_state)

In [506]:
US_trails['State'].value_counts()

California        2679
Colorado          1953
Idaho             1020
Washington         837
Virginia           796
Utah               782
Arizona            781
Pennsylvania       778
Texas              747
New Mexico         675
Oregon             668
New York           607
North Carolina     548
Massachusetts      478
Wyoming            407
Tennessee          396
Michigan           384
Florida            368
Georgia            367
Montana            356
Wisconsin          312
West Virginia      309
Arkansas           304
New Hampshire      282
Vermont            272
Minnesota          271
Alabama            262
Missouri           261
Ohio               260
Kentucky           243
Maryland           237
Nevada             222
Connecticut        211
Illinois           207
Kansas             190
Indiana            189
Iowa               169
South Carolina     167
New Jersey         160
Alaska             145
Maine              125
Oklahoma            93
Mississippi         92
South Dakot

In [507]:
US_trails['length'].max()

294.30000000000001

In [508]:
US_trails.sort_values('length', ascending = False)
#this shows that there are 23 trails above 100 miles

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded
7705,Bikepacking: Moab to Boulder Town,Moab,Utah,Green/Blue,294.3,16628,-14007,4.0,38.5753,-109.5508,Featured Ride,1,0,2
7125,RockStar TRAIL Route,Harrisonburg,Virginia,Black,283.3,29256,-29640,5.0,38.4475,-78.8722,Featured Ride,1,0,5
6843,Segment 4: Boulder Town to Monticello,Escalante,Utah,Blue,242.5,22367,-21841,5.0,37.9010,-111.4127,Trail,0,1,3
7714,Segment 5: Monticello to Nucla,Monticello,Utah,Blue,237.5,22372,-23564,5.0,37.8698,-109.3432,Trail,0,1,3
6829,Segment 3: Navajo Lake to Boulder Town,Parowan,Utah,Blue,220.6,23021,-25941,5.0,37.5165,-112.8128,Trail,0,1,3
5126,Segment 1: Las Vegas to St. George,Paradise,Nevada,Blue,211.6,15628,-15048,5.0,36.0868,-115.1369,Trail,0,1,3
13904,Cowboy Trail,Norfolk,Nebraska,Green,195.2,1582,-513,5.0,42.0042,-97.4250,Trail,0,1,1
6860,Segment 6: Nucla to Durango,Montrose,Colorado,Blue,194.4,21880,-21208,5.0,38.2706,-108.5456,Trail,0,1,3
10017,Chesapeake & Ohio Canal Trail,Cumberland,Maryland,Green,191.9,1723,-2312,3.7,39.6493,-78.7630,Trail,0,1,1
6230,Segment 2: St George to Navajo Lake,Saint George,Utah,Blue,154.2,16422,-9581,5.0,37.0953,-113.5571,Trail,0,1,3


In [509]:
trails_greater_than_30 = US_trails[US_trails['length']>30]
trails_greater_than_30.shape

(229, 14)

In [510]:
trails_greater_than_30

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded
235,Lake Apopka Loop Trail,Paradise Heights,Florida,Green,36.1,170,-170,4.7,28.6346,-81.5504,Featured Ride,1,0,1
270,Fresno-Sauceda Loop - IMBA EPIC,Lajitas,Texas,Black,59.0,3914,-3916,4.8,29.2730,-103.7567,Featured Ride,1,0,5
271,Sauceda to Solitario Loop,Lajitas,Texas,Blue/Black,49.6,3437,-3434,5.0,29.4705,-103.9579,Featured Ride,1,0,4
307,Glenn Springs to Hot Springs,Big Bend National Park,Texas,Green/Blue,43.8,1358,-2625,3.0,29.2706,-103.1507,Featured Ride,1,0,2
824,AZT: Segment 1 - Canelo Hills,Sierra Vista Southeast,Arizona,Blue/Black,43.2,4825,-5985,3.0,31.3336,-110.2975,Trail,0,1,4
825,Geronimo Trail (FS Road 63),Lordsburg,New Mexico,Blue,41.5,1388,-2647,2.5,31.5258,-108.9760,Trail,0,1,3
868,Wild Azalea Trail (WAT),Woodworth,Louisiana,Blue,49.8,2176,-2176,4.2,31.1473,-92.5103,Featured Ride,1,0,3
909,Elephant Head Loop,Tubac,Arizona,Blue/Black,30.1,2708,-2705,2.0,31.6744,-110.9497,Featured Ride,1,0,4
934,"AZT: Segment 2 - Santa Rita Mountains, Kentuck...",Rio Rico,Arizona,Green,40.4,3233,-2236,0.0,31.5281,-110.7114,Trail,0,1,1
946,The Huachuca Mountain Ride,Sierra Vista,Arizona,Blue/Black,41.4,4577,-4576,5.0,31.5095,-110.2592,Featured Ride,1,0,4


In [511]:
def bin_column(df, column, new_column, cut_points, label_names):
    df[new_column] = pd.cut(df[column],cut_points,labels=label_names)
    return df

In [512]:
length_cuts = [-1.0,5.0,10.0,15.0,20.0,25.0,30.0,50.0,100,500]
length_ranges = ['0-5','5-10','10-15','15-20','20-25','25-30','30-50','50-100','100+']
US_trails = bin_column(US_trails,'length','length_range',length_cuts,length_ranges)

In [513]:
US_trails.shape

(20923, 15)

In [514]:
US_trails['length_range'].value_counts()

0-5       16487
5-10       2392
10-15       999
15-20       453
20-25       251
30-50       150
25-30       112
50-100       56
100+         23
Name: length_range, dtype: int64

In [515]:
Colorado = US_trails[US_trails['State'] == 'Colorado']
Colorado.shape

(1953, 15)

In [516]:
Colorado.head()

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded,length_range
6294,Grandview Ridge & Horse Gulch Loop,Durango,Colorado,Blue/Black,16.1,1822,-1830,4.0,37.224,-107.8564,Featured Ride,1,0,4,15-20
6295,Sale Barn - South Rim - Carbon Junction,Durango,Colorado,Blue,7.2,752,-766,3.5,37.2238,-107.8563,Featured Ride,1,0,3,5-10
6298,Big Canyon,Durango,Colorado,Blue,2.0,474,-5,4.2,37.2309,-107.8607,Trail,0,1,3,0-5
6299,Cowboy,Durango,Colorado,Green/Blue,2.7,115,-492,4.0,37.2528,-107.8396,Trail,0,1,2,0-5
6306,Sale Barn,Durango,Colorado,Blue,1.6,353,-60,3.5,37.2238,-107.8563,Trail,0,1,3,0-5


In [517]:
Colorado.sort_values('location')

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded,length_range
16837,Lake Loop,,Colorado,Green,1.3,8,-8,0.0,39.7557,-105.1027,Trail,0,1,1,0-5
9686,East Fork Sand Canyon Trail,,Colorado,Blue,0.7,1,-302,0.0,38.8360,-104.8957,Trail,0,1,3,0-5
17396,FS Road 306,,Colorado,Green/Blue,1.3,363,-11,0.0,40.5275,-106.6979,Trail,0,1,2,0-5
9662,Trail 721,Air Force Academy,Colorado,Blue,1.8,50,-461,2.0,39.0071,-104.9432,Trail,0,1,3,0-5
9663,Hay Creek East Road (313),Air Force Academy,Colorado,Blue,2.1,121,-598,2.0,39.0086,-104.9407,Trail,0,1,3,0-5
9664,Hell Creek Road (311),Air Force Academy,Colorado,Blue,4.0,111,-912,2.0,39.0072,-104.9558,Trail,0,1,3,0-5
9665,Hell Creek Spur (311.A),Air Force Academy,Colorado,Blue,1.4,79,-267,2.0,39.0319,-104.9391,Trail,0,1,3,0-5
9666,St. Francis Road,Air Force Academy,Colorado,Green/Blue,0.9,319,-6,2.0,38.9490,-104.8863,Trail,0,1,2,0-5
9580,USAFA Southern Perimeter Rd,Air Force Academy,Colorado,Blue,4.1,1169,-207,3.0,38.9551,-104.8354,Trail,0,1,3,0-5
9603,Foothills,Air Force Academy,Colorado,Green/Blue,5.4,292,-816,3.0,38.9353,-104.8747,Trail,0,1,2,5-10


In [518]:
less_than_one = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
zero = US_trails[US_trails['length'].isin(less_than_one)]

In [519]:
zero.shape

(6138, 15)

In [520]:
US_trails['ascent'].max()

29256

In [521]:
crazy_ascent = US_trails[US_trails['ascent']==US_trails['ascent'].max()]
crazy_ascent

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded,length_range
7125,RockStar TRAIL Route,Harrisonburg,Virginia,Black,283.3,29256,-29640,5.0,38.4475,-78.8722,Featured Ride,1,0,5,100+


In [522]:
US_trails.sort_values('ascent').head(-5)

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded,length_range
10461,Creek Side,El Jebel,Colorado,Blue,1.0,0,-439,4.0,39.3241,-107.1395,Trail,0,1,3,0-5
8797,Upper Hare Ball,Marlinton,West Virginia,Black,0.3,0,-342,4.5,38.4182,-80.0012,Trail,0,1,5,0-5
8809,Upper Beaver Dam,Marlinton,West Virginia,Blue/Black,0.4,0,-197,4.5,38.4438,-80.0164,Trail,0,1,4,0-5
8815,6000 Steps,Marlinton,West Virginia,Blue,1.5,0,-504,4.3,38.4095,-79.9688,Trail,0,1,3,0-5
8817,Dowell's Draft Trail,Bridgewater,Virginia,Blue,3.5,0,-1186,4.3,38.3127,-79.2645,Trail,0,1,3,0-5
8829,Easy Street,Marlinton,West Virginia,Green,2.3,0,-615,4.0,38.4139,-79.9961,Trail,0,1,1,0-5
8833,Sweet Dream,Marlinton,West Virginia,Black,0.4,0,-296,4.0,38.4256,-80.0084,Trail,0,1,5,0-5
8847,Red Run Trail #439,Marlinton,West Virginia,Blue/Black,1.8,0,-447,4.0,38.3625,-80.1528,Trail,0,1,4,0-5
8786,Slate Springs,Dayton,Virginia,Black,2.3,0,-1911,4.5,38.5121,-79.1773,Trail,0,1,5,0-5
8848,Gravel DoubleTrack,Massanutten,Virginia,Green,1.3,0,-293,4.0,38.4347,-78.7529,Trail,0,1,1,0-5


In [523]:
US_trails['difficulty'].value_counts()

Blue            6810
Green/Blue      4624
Blue/Black      3497
Green           3231
Black           2323
Double Black     438
Name: difficulty, dtype: int64

In [559]:
US_trails.sort_values(by='State', inplace = True)

In [525]:
US_trails.shape

(20923, 15)

In [534]:
US_trails[US_trails['State']=='Wyoming']['location'].value_counts()

Laramie                      41
Cody                         40
Sundance                     30
Lander                       27
Buffalo                      25
Cheyenne                     25
Moose Wilson Road            21
Centennial                   21
Sheridan                     19
Glendo                       19
Jackson                      14
Wilson                       13
Green River                  12
Newcastle                    11
Pinedale                     10
Afton                        10
Alta                         10
Thermopolis                   9
Casper                        8
Rafter J Ranch                7
Evanston                      6
Ranchettes                    5
Hoback                        3
Madison Junction              2
Tower Junction                2
Old Faithful Village          2
Lake Village                  2
Mills                         2
Mammoth Hot Springs           2
Dubois                        2
Greybull                      1
Gillette

In [535]:
US_trails[US_trails['State']=='New Hampshire']['location'].value_counts()

Lebanon                         28
North Conway                    26
Littleton                       26
Allenstown Elementary School    24
Hanover                         22
Stratham Station                13
Franklin                        12
Auburn                          11
Hollis                          11
Wilton                          10
Conway                          10
Londonderry                      9
Hampstead                        9
Raymond                          8
Litchfield                       8
                                 6
Merrimack                        6
Nottingham                       5
East Merrimack                   5
Exeter                           5
Candia                           4
Nashua                           4
Hill                             4
Claremont                        3
Madbury                          3
Goffstown                        2
Bartlett                         2
Keene                            1
Wolfeboro           

In [538]:
US_trails[US_trails['location']=='']['location'].value_counts()

    232
Name: location, dtype: int64

In [539]:
US_trails[(US_trails['State']=='New Hampshire') & (US_trails['location']=='')]

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded,length_range
15565,Inner Loop Trail,,New Hampshire,Green,0.7,30,-27,0.0,43.0325,-70.8915,Trail,0,1,1,0-5
15566,Upper Field Trail,,New Hampshire,Green,0.2,1,-2,0.0,43.0345,-70.89,Trail,0,1,1,0-5
15567,Mid Lake Trail,,New Hampshire,Green,0.5,26,-26,0.0,43.0413,-70.8929,Trail,0,1,1,0-5
16090,Center Connector Trail,,New Hampshire,Green,0.3,28,0,0.0,43.1768,-70.9314,Trail,0,1,1,0-5
16091,Middle Madbury Trail,,New Hampshire,Green,0.2,22,-2,0.0,43.172,-70.9403,Trail,0,1,1,0-5
16092,East Access Trail,,New Hampshire,Green,0.4,4,-7,0.0,43.1746,-70.9463,Trail,0,1,1,0-5


In [560]:
blank_location = US_trails[US_trails['location']==''].reset_index(drop=True)

In [561]:
blank_location['length_range'].value_counts()

0-5       208
5-10       11
50-100      7
10-15       4
30-50       1
15-20       1
100+        0
25-30       0
20-25       0
Name: length_range, dtype: int64

In [562]:
blank_location['difficulty'].value_counts()

Green         131
Green/Blue     64
Blue           28
Blue/Black      6
Black           3
Name: difficulty, dtype: int64

In [563]:
blank_location.sort_values(by = ['State'], inplace = True)

In [564]:
blank_location.head()

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded,length_range
0,Entry Path,,Alabama,Green,0.5,68,-34,2.0,32.3697,-86.18,Trail,0,1,1,0-5
1,Course C,,Alabama,Green,1.9,134,-137,0.0,32.3684,-86.181,Trail,0,1,1,0-5
2,Course A,,Alabama,Green,1.1,47,-45,0.0,32.367,-86.1814,Trail,0,1,1,0-5
3,Course B,,Alabama,Green,1.0,42,-51,0.0,32.3673,-86.1886,Trail,0,1,1,0-5
4,Homestead Loop Trail,,Alaska,Green/Blue,3.3,396,-384,0.0,59.6605,-151.6285,Trail,0,1,2,0-5


In [569]:
blank_location.groupby('State').agg('count')['location']
#This goes to show my dropdown menu for locations doesn't 
#work for states that have blank locations so I'll remove
#trails that have blank locations

State
Alabama            4
Alaska             3
Arizona            8
Arkansas           9
California        21
Colorado           3
Connecticut        9
Florida            8
Georgia            6
Hawaii             5
Idaho              6
Illinois           3
Maine              4
Massachusetts     19
Michigan           9
Minnesota          1
Mississippi        2
Montana            4
Nevada             1
New Hampshire      6
New Jersey        15
New Mexico         4
New York          10
North Carolina     7
North Dakota       7
Ohio               4
Oregon            21
Pennsylvania       5
South Dakota       7
Texas              6
Utah               7
Vermont            2
Virginia           4
Washington         1
West Virginia      1
Name: location, dtype: int64

In [574]:
US_trails = US_trails[US_trails['location'] != ''].reset_index(drop=True)

In [575]:
US_trails.shape

(20691, 15)

In [576]:
US_trails.head()

Unnamed: 0,name,location,State,difficulty,length,ascent,descent,stars,latitude,longitude,trail_type,type_Featured Ride,type_Trail,difficulty_encoded,length_range
0,Red Trail,Heflin,Alabama,Blue/Black,5.6,565,-555,2.0,33.7824,-85.5824,Trail,0,1,4,5-10
1,Main Trail,Northport,Alabama,Blue,2.3,152,-150,4.5,33.265,-87.5351,Trail,0,1,3,0-5
2,Tashka Trail,Northport,Alabama,Blue/Black,8.6,779,-727,4.5,33.2866,-87.6848,Trail,0,1,4,5-10
3,Della Raye Loop,Northport,Alabama,Blue,0.9,57,-53,4.5,33.2696,-87.534,Trail,0,1,3,0-5
4,South Loop,Holt,Alabama,Green/Blue,1.1,60,-52,4.5,33.2139,-87.4571,Trail,0,1,2,0-5
