## Get into ES

cd elasticsearch-6.1.1/bin

./elasticsearch

## Make sure ES is running:

In [1]:
import requests
res = requests.get('http://localhost:9200')
print(res.content)

b'{\n  "name" : "P6we63q",\n  "cluster_name" : "elasticsearch",\n  "cluster_uuid" : "GMQoOO63T_yJwEJhf6J-0w",\n  "version" : {\n    "number" : "6.1.1",\n    "build_hash" : "bd92e7f",\n    "build_date" : "2017-12-17T20:23:25.338Z",\n    "build_snapshot" : false,\n    "lucene_version" : "7.1.0",\n    "minimum_wire_compatibility_version" : "5.6.0",\n    "minimum_index_compatibility_version" : "5.0.0"\n  },\n  "tagline" : "You Know, for Search"\n}\n'


 ## Connect to ES cluster:

In [2]:
from elasticsearch import Elasticsearch
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

## Look for my data

In [3]:
ls

[0m[01;34mdata[0m/  ES_testing.ipynb  ghostdriver.log  [01;34mnotebooks[0m/  README.md  [01;34msrc[0m/  [01;34mweb_app[0m/


In [4]:
cd data

/home/karen/Documents/ski-recommender/data


In [5]:
ls

Alpine_Meadows.txt  Crested_Butte.txt  Eldora.txt    [0m[01;34mnew[0m/              Vail.txt
Arapahoe_Basin.txt  df2.pkl            Loveland.txt  resort_dict2.pkl  WP.csv
Beaver_Creek.txt    df.pkl             Monarch.txt   resort_dict.pkl
Copper.txt          DP.txt             mtn_df.pkl    Taos.txt


## Import dataframe with Trail data:

In [6]:
import pickle

In [7]:
pkl_file = open('df.pkl', 'rb')
df = pickle.load(pkl_file)
pkl_file.close() 

In [10]:
df

Unnamed: 0,trail_name,top_elev_(ft),bottom_elev_(ft),vert_rise_(ft),slope_length_(ft),avg_width_(ft),slope_area_(acres),avg_grade_(%),max_grade_(%),ability_level,resort,location,groomed,colors,ability_nums,color_nums
0,Over the Rainbow,11924.0,10858.0,1066.0,2404.0,319.0,17.6,50.0,69.0,Expert,Loveland,CO,0,bb,6,4
1,Zoom,11405.0,11005.0,400.0,839.0,195.0,3.7,55.0,66.0,Expert,Loveland,CO,0,bb,6,4
2,Avalanche Bowl,11840.0,11405.0,435.0,1141.0,255.0,6.7,42.0,76.0,Expert,Loveland,CO,0,bb,6,4
3,Tiger's Tail,11856.0,11469.0,386.0,1183.0,136.0,3.7,35.0,61.0,Expert,Loveland,CO,0,black,6,3
4,Spillway,11849.0,11631.0,218.0,702.0,198.0,3.2,33.0,41.0,Intermediate,Loveland,CO,1,blue,4,2
5,Waterfall,11638.0,11515.0,124.0,370.0,172.0,1.5,36.0,45.0,Advanced,Loveland,CO,0,blue,5,2
6,Upper Richard's,11816.0,11702.0,113.0,348.0,159.0,1.3,35.0,38.0,Intermediate,Loveland,CO,0,blue,4,2
7,Cat Walk,11856.0,11644.0,212.0,1830.0,63.0,2.7,12.0,31.0,Low Intermediate,Loveland,CO,1,green,3,1
8,Holy Cat,11141.0,10917.0,224.0,505.0,90.0,1.0,50.0,67.0,Expert,Loveland,CO,0,black,6,3
9,Busy Gully,11421.0,10903.0,518.0,1394.0,115.0,3.7,41.0,65.0,Expert,Loveland,CO,0,black,6,3


## Get data in dictionary form to be indexed

In [11]:
dict_df = df.to_dict(orient='records')

In [12]:
dict_df;

## Index (looping over trails)

In [22]:
j=1
for i in dict_df:
    es.index(index='ski', doc_type='runs', id=j, body=i)
    j+=1

## Check to make sure I get what I want back for a certain index

In [23]:
es.get(index='ski', doc_type='runs', id=5)

{'_id': '5',
 '_index': 'ski',
 '_source': {'ability_level': 'Intermediate',
  'ability_nums': 4,
  'avg_grade_(%)': 33.0,
  'avg_width_(ft)': 198.0,
  'bottom_elev_(ft)': 11631.0,
  'color_nums': 2,
  'colors': 'blue',
  'groomed': 1,
  'location': 'CO',
  'max_grade_(%)': 41.0,
  'resort': 'Loveland',
  'slope_area_(acres)': 3.2,
  'slope_length_(ft)': 702.0,
  'top_elev_(ft)': 11849.0,
  'trail_name': 'Spillway',
  'vert_rise_(ft)': 218.0},
 '_type': 'runs',
 '_version': 1,
 'found': True}

## Try different searches!

In [24]:
es.search(index="ski", body={"query": {"match": {'trail_name':'Over the Rainbow'}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '1',
    '_index': 'ski',
    '_score': 10.562768,
    '_source': {'ability_level': 'Expert',
     'ability_nums': 6,
     'avg_grade_(%)': 50.0,
     'avg_width_(ft)': 319.0,
     'bottom_elev_(ft)': 10858.0,
     'color_nums': 4,
     'colors': 'bb',
     'groomed': 0,
     'location': 'CO',
     'max_grade_(%)': 69.0,
     'resort': 'Loveland',
     'slope_area_(acres)': 17.6,
     'slope_length_(ft)': 2404.0,
     'top_elev_(ft)': 11924.0,
     'trail_name': 'Over the Rainbow',
     'vert_rise_(ft)': 1066.0},
    '_type': 'runs'},
   {'_id': '925',
    '_index': 'ski',
    '_score': 4.405436,
    '_source': {'ability_level': 'Advanced',
     'ability_nums': 5,
     'avg_grade_(%)': 33.0,
     'avg_width_(ft)': 171.0,
     'bottom_elev_(ft)': 9460.0,
     'color_nums': 3,
     'colors': 'black',
     'groomed': 0,
     'location': 'CO',
     'max_grade_(%)': 45.0,
     'resort': 'Winter 

Match will match any terms in the query

In [28]:
es.search(index="ski", body={"query": {"match_phrase": {'trail_name':'Over the Rainbow'}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '1',
    '_index': 'ski',
    '_score': 10.562768,
    '_source': {'ability_level': 'Expert',
     'ability_nums': 6,
     'avg_grade_(%)': 50.0,
     'avg_width_(ft)': 319.0,
     'bottom_elev_(ft)': 10858.0,
     'color_nums': 4,
     'colors': 'bb',
     'groomed': 0,
     'location': 'CO',
     'max_grade_(%)': 69.0,
     'resort': 'Loveland',
     'slope_area_(acres)': 17.6,
     'slope_length_(ft)': 2404.0,
     'top_elev_(ft)': 11924.0,
     'trail_name': 'Over the Rainbow',
     'vert_rise_(ft)': 1066.0},
    '_type': 'runs'}],
  'max_score': 10.562768,
  'total': 1},
 'timed_out': False,
 'took': 44}

Match phrase will match the exact phrase in the query

In [30]:
es.search(index="ski", body={"query": {"match_phrase_prefix": {'trail_name':'Over'}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '189',
    '_index': 'ski',
    '_score': 6.210478,
    '_source': {'ability_level': 'Advanced',
     'ability_nums': 5,
     'avg_grade_(%)': 34.0,
     'avg_width_(ft)': 165.0,
     'bottom_elev_(ft)': 9925.0,
     'color_nums': 3,
     'colors': 'black',
     'groomed': 0,
     'location': 'CO',
     'max_grade_(%)': 48.0,
     'resort': 'Copper',
     'slope_area_(acres)': 10.5,
     'slope_length_(ft)': 2778.0,
     'top_elev_(ft)': 10828.0,
     'trail_name': 'Overlode',
     'vert_rise_(ft)': 903.0},
    '_type': 'runs'},
   {'_id': '436',
    '_index': 'ski',
    '_score': 6.1848407,
    '_source': {'ability_level': 'Low Intermediate',
     'ability_nums': 3,
     'avg_grade_(%)': 18.0,
     'avg_width_(ft)': 238.0,
     'bottom_elev_(ft)': 10237.0,
     'color_nums': 1,
     'colors': 'green',
     'groomed': 1,
     'location': 'CO',
     'max_grade_(%)': 31.0,
     'resort': 'Vai

Match phrase prefix will match any word that starts with the query

In [110]:
es.search(index="ski", body={"query": {"range": {'vert_rise_(ft)': {'gte': '2000', 'lte': '2300'}}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '200',
    '_index': 'ski',
    '_score': 1.0,
    '_source': {'ability_level': 'Intermediate',
     'ability_nums': 4,
     'avg_grade_(%)': 25.0,
     'avg_width_(ft)': 140.0,
     'bottom_elev_(ft)': 9785.0,
     'color_nums': 2,
     'colors': 'blue',
     'groomed': 1,
     'location': 'CO',
     'max_grade_(%)': 46.0,
     'resort': 'Copper',
     'slope_area_(acres)': 28.8,
     'slope_length_(ft)': 8946.0,
     'top_elev_(ft)': 11921.0,
     'trail_name': 'Collage',
     'vert_rise_(ft)': 2136.0},
    '_type': 'runs'},
   {'_id': '1053',
    '_index': 'ski',
    '_score': 1.0,
    '_source': {'ability_level': 'Expert',
     'ability_nums': 6,
     'avg_grade_(%)': 34.0,
     'avg_width_(ft)': 246.0,
     'bottom_elev_(ft)': 8938.0,
     'color_nums': 4,
     'colors': 'bb',
     'groomed': 0,
     'location': 'CO',
     'max_grade_(%)': 70.0,
     'resort': 'Beaver Creek',
     'slo

Range will give anything in the range between gte (greater than or equal to) and lte (less than or equal to)

In [39]:
es.search(index="ski", body={
                                "query": {
                                     "bool": {
                                         'must': {
                                             "match_phrase_prefix": {'trail_name':'Over'}
                                         },
                                         'filter': {
                                             "range": {
                                                 'vert_rise_(ft)': {"gte" : '900'}
                                             }
                                         }
                                     }
                                }
                            }
         )

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '189',
    '_index': 'ski',
    '_score': 6.210478,
    '_source': {'ability_level': 'Advanced',
     'ability_nums': 5,
     'avg_grade_(%)': 34.0,
     'avg_width_(ft)': 165.0,
     'bottom_elev_(ft)': 9925.0,
     'color_nums': 3,
     'colors': 'black',
     'groomed': 0,
     'location': 'CO',
     'max_grade_(%)': 48.0,
     'resort': 'Copper',
     'slope_area_(acres)': 10.5,
     'slope_length_(ft)': 2778.0,
     'top_elev_(ft)': 10828.0,
     'trail_name': 'Overlode',
     'vert_rise_(ft)': 903.0},
    '_type': 'runs'},
   {'_id': '1',
    '_index': 'ski',
    '_score': 4.0558224,
    '_source': {'ability_level': 'Expert',
     'ability_nums': 6,
     'avg_grade_(%)': 50.0,
     'avg_width_(ft)': 319.0,
     'bottom_elev_(ft)': 10858.0,
     'color_nums': 4,
     'colors': 'bb',
     'groomed': 0,
     'location': 'CO',
     'max_grade_(%)': 69.0,
     'resort': 'Loveland',
     's

Bool will search over multiple fields. Must means that must exist. Filter must exist, but the score isn't taken into account.

In [67]:
es.search(index="ski", body={"query": {"match": {'location':'ca'}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '370',
    '_index': 'ski',
    '_score': 3.1954455,
    '_source': {'ability_level': 'Intermediate',
     'ability_nums': 4,
     'avg_grade_(%)': 40.0,
     'avg_width_(ft)': 276.0,
     'bottom_elev_(ft)': 7583.0,
     'color_nums': 2,
     'colors': 'blue',
     'groomed': 1,
     'location': 'CA',
     'max_grade_(%)': 44.0,
     'resort': 'Alpine Meadows',
     'slope_area_(acres)': 4.9,
     'slope_length_(ft)': 768.0,
     'top_elev_(ft)': 7867.0,
     'trail_name': 'Werner’s Schuss',
     'vert_rise_(ft)': 284.0},
    '_type': 'runs'},
   {'_id': '371',
    '_index': 'ski',
    '_score': 3.1954455,
    '_source': {'ability_level': 'Intermediate',
     'ability_nums': 4,
     'avg_grade_(%)': 32.0,
     'avg_width_(ft)': 240.0,
     'bottom_elev_(ft)': 7294.0,
     'color_nums': 2,
     'colors': 'blue',
     'groomed': 0,
     'location': 'CA',
     'max_grade_(%)': 45.0,
     'res

In [80]:
es.search(index="ski", body={"query": {"match_phrase_prefix": {'trail_name':'Bobby'}}})

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '403',
    '_index': 'ski',
    '_score': 4.777178,
    '_source': {'ability_level': 'Low Intermediate',
     'ability_nums': 3,
     'avg_grade_(%)': 16.0,
     'avg_width_(ft)': 217.0,
     'bottom_elev_(ft)': 7535.0,
     'color_nums': 2,
     'colors': 'blue',
     'groomed': 1,
     'location': 'CA',
     'max_grade_(%)': 27.0,
     'resort': 'Alpine Meadows',
     'slope_area_(acres)': 4.4,
     'slope_length_(ft)': 875.0,
     'top_elev_(ft)': 7673.0,
     'trail_name': 'Bobby’s Run',
     'vert_rise_(ft)': 138.0},
    '_type': 'runs'}],
  'max_score': 4.777178,
  'total': 1},
 'timed_out': False,
 'took': 3}

## Get searchable fields
and decide what to do with each

In [42]:
df.columns

Index(['trail_name', 'top_elev_(ft)', 'bottom_elev_(ft)', 'vert_rise_(ft)',
       'slope_length_(ft)', 'avg_width_(ft)', 'slope_area_(acres)',
       'avg_grade_(%)', 'max_grade_(%)', 'ability_level', 'resort', 'location',
       'groomed', 'colors', 'ability_nums', 'color_nums'],
      dtype='object')

'trail_name' - match?                   

'top_elev_(ft)' - range

'bottom_elev_(ft)' - range

'vert_rise_(ft)' - range

'slope_length_(ft)' - range

'avg_width_(ft)' - range

'slope_area_(acres)' - range

'avg_grade_(%)' - range

'max_grade_(%)' - range

'ability_level' - ignore

'resort' - match

'location' - match

'groomed' - term

'colors' - term

'ability_nums' - ignore 

'color_nums' - ignore

## Working on having users enter search terms

In [224]:
#DEFAULTS

#trail name
tn = "Over"
#top elevation min and max
te_min = '6700'
te_max = '13000'
#bottom elevation min and max
be_min = '6700'
be_max = '13000'
#vertical rise min and max
vr_min = '4'
vr_max = '3300'
#slope length min and max
sl_min = '70'
sl_max = '31000'
#average width min and max
aw_min = '2'
aw_max = '2600'
#slope area min an d max
sa_min = '0'
sa_max = '240'
#average grade min and max
ag_min = '0'
ag_max = '80'
#max grade min and max
mg_min = '0'
mg_max = '240'
#resort
r = ['Loveland', 'Arapahoe Basin', 'Copper', 'Eldora', 'Alpine Meadows', 'Vail', 'Monarch', 'Crested Butte', 'Taos', 
     'Diamond Peak','Winter Park', 'Beaver Creek']
#location
l = ['CO', 'CA', 'NM', 'NV']
#groomed
g = ['0','1']
#color
c = ['bb', 'black', 'blue', 'green']

In [252]:
def search_results(
#trail name
tn = None,
#top elevation min and max
te_min = '6700',
te_max = '13000',
#bottom elevation min and max
be_min = '6700',
be_max = '13000',
#vertical rise min and max
vr_min = '4',
vr_max = '3300',
#slope length min and max
sl_min = '70',
sl_max = '31000',
#average width min and max
aw_min = '2',
aw_max = '2600',
#slope area min an d max
sa_min = '0',
sa_max = '240',
#average grade min and max
ag_min = '0',
ag_max = '80',
#max grade min and max
mg_min = '0',
mg_max = '240',
#resort
r = ['Loveland', 'Arapahoe Basin', 'Copper', 'Eldora', 'Alpine Meadows', 'Vail', 'Monarch', 'Crested Butte', 'Taos', 
     'Diamond Peak','Winter Park', 'Beaver Creek'],
#location
l = ['CO', 'CA', 'NM', 'NV'],   
#groomed
g = ['0','1'],
#color
c = ['bb', 'black', 'blue', 'green']):
    
    r = [x.split()[0].lower() for x in r]  
    l = [x.lower() for x in l]

    if tn == None:
        return es.search(index="ski", body={
                                    "query": {
                                         "bool": {
                                             'filter': [
                                                 {"range": {'top_elev_(ft)': {"gte" : te_min, "lte": te_max}}},
                                                 {"range": {'bottom_elev_(ft)': {"gte" : be_min, "lte": be_max}}},
                                                 {"range": {'vert_rise_(ft)': {"gte" : vr_min, "lte": vr_max}}},
                                                 {"range": {'slope_length_(ft)': {"gte" : sl_min, "lte": sl_max}}},
                                                 {"range": {'avg_width_(ft)': {"gte" : aw_min, "lte": aw_max}}},
                                                 {"range": {'slope_area_(acres)': {"gte" : sa_min, "lte": sa_max}}},
                                                 {"range": {'avg_grade_(%)': {"gte" : ag_min, "lte": ag_max}}},
                                                 {"range": {'max_grade_(%)': {"gte" : mg_min, "lte": mg_max}}},
                                                 {"terms": {'resort':r}},
                                                 {"terms": {'location':l}},
                                                 {"terms": {'groomed':g}},
                                                 {"terms": {'colors':c}}
                                             ]
                                         }
                                    }
                                }
             )
    return es.search(index="ski", body={
                                    "query": {
                                         "bool": {
                                             'must': {
                                                 "match": {'trail_name':tn}
                                             },
                                             'filter': [
                                                 {"range": {'top_elev_(ft)': {"gte" : te_min, "lte": te_max}}},
                                                 {"range": {'bottom_elev_(ft)': {"gte" : be_min, "lte": be_max}}},
                                                 {"range": {'vert_rise_(ft)': {"gte" : vr_min, "lte": vr_max}}},
                                                 {"range": {'slope_length_(ft)': {"gte" : sl_min, "lte": sl_max}}},
                                                 {"range": {'avg_width_(ft)': {"gte" : aw_min, "lte": aw_max}}},
                                                 {"range": {'slope_area_(acres)': {"gte" : sa_min, "lte": sa_max}}},
                                                 {"range": {'avg_grade_(%)': {"gte" : ag_min, "lte": ag_max}}},
                                                 {"range": {'max_grade_(%)': {"gte" : mg_min, "lte": mg_max}}},
                                                 {"terms": {'resort': r}},
                                                 {"terms": {'location':l}},
                                                 {"terms": {'groomed':g}},
                                                 {"terms": {'colors':c}}
                                             ],
                                         }
                                    }
                                }
             )

In [260]:
search_results(r = ['Winter Park'], c = ['green', 'blue'], mg_min = '10', mg_max = '20')

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'hits': {'hits': [{'_id': '852',
    '_index': 'ski',
    '_score': 0.0,
    '_source': {'ability_level': 'Novice',
     'ability_nums': 2,
     'avg_grade_(%)': 10.0,
     'avg_width_(ft)': 367.0,
     'bottom_elev_(ft)': 9390.0,
     'color_nums': 1,
     'colors': 'green',
     'groomed': 1,
     'location': 'CO',
     'max_grade_(%)': 14.0,
     'resort': 'Winter Park',
     'slope_area_(acres)': 8.47,
     'slope_length_(ft)': 1680.76,
     'top_elev_(ft)': 9565.0,
     'trail_name': 'Porcupine',
     'vert_rise_(ft)': 175.0},
    '_type': 'runs'},
   {'_id': '933',
    '_index': 'ski',
    '_score': 0.0,
    '_source': {'ability_level': 'Beginner',
     'ability_nums': 1,
     'avg_grade_(%)': 10.0,
     'avg_width_(ft)': 112.0,
     'bottom_elev_(ft)': 9450.0,
     'color_nums': 1,
     'colors': 'green',
     'groomed': 1,
     'location': 'CO',
     'max_grade_(%)': 14.0,
     'resort': 'Winter Park',
     

## Use search_results function to query!
if search term not specified, will use default

resort, location, groomed, and color must be in a list (even if only one value)

see DEFAULTS for what values can be searched and what their variable names are