In [3]:
import requests
import re
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm
import pandas as pd
import seaborn as sns
import numpy as np
from pandas.io import sql
import sqlite3
from time import sleep
from multiprocessing import Pool

import psycopg2

In [4]:
def get_crag_ids(i):
    url_base = 'https://www.ukclimbing.com/logbook/crag.php?id={}'
    headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}
    url = url_base.format(i)
    r= requests.get(url,headers=headers)
    if r.status_code==200:
        return scrape_crag_info(i)
    else:
        pass
def get_user_ids(i):
    url_base = 'https://www.ukclimbing.com/logbook/showlog.php?id={}'
    headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}
    url = url_base.format(i)
    r= requests.get(url,headers=headers)
    if r.status_code==410:
        pass
    else:
        return scrape_user_info(i)
def scrape_crag_info(i):
    url_base_crag = 'https://www.ukclimbing.com/logbook/crag.php?id={}'
    headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}

    crag = {}
    url = url_base_crag.format(i)
    crag['url'] = url
    r= requests.get(url,headers=headers)
    soup = BeautifulSoup(r.text,'html.parser')
    try:
        crag['cragname'] = soup.find('div',attrs={"id":"breadcrumb-container"}).find('h1').text
        climbs = []
        for climb in soup.find_all('tr',{'class':'climb'}):
            num_stars = len(re.findall('\*',str(climb)))
            climbs.append((climb.attrs['data-id'],num_stars))

        crag['climb_list'] = climbs          
        try:
            for attr,value in zip(soup.find('div',attrs={"id":"crag_thumb"}).find('p').find_all('b'),re.findall('</b> ([^<]*)<',str(soup.find('div',attrs={"id":"crag_thumb"}).find('p')))):
                crag[attr.text] = value.strip()
            crag['Climbs'] = len(climbs)
        except:
            crag['Climbs'] = len(climbs)
            crag['Rocktype'] = 'UNKNOWN'
        latlong=re.findall('<meta content="([^>]*)" property="place:location:',str(soup.find('meta',attrs={"property":"og:title"})))
        crag['lat'] = latlong[0]    
        crag['long'] = latlong[1]
        crag['id'] = i
        return crag     
    except:
        print('not enough info for id ' + str(i))
    
def scrape_climb_info(climb_id):
    url_base_climb = 'https://www.ukclimbing.com/logbook/c.php?i={}'
    headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}

    climb={}
    try:
        url = url_base_climb.format(climb_id[0])
        climb['id'] = climb_id[0]
        climb['url'] = url
        r= requests.get(url,headers=headers)
        soup = BeautifulSoup(r.text,'html.parser')   
        info_title = soup.find('div',attrs={'class':'nav-tabs-header'}).find('h1')
        climb['climbname'] = re.findall('>([^<]+)\t?<',str(info_title))[0].replace('\t','')
        climb['rating'] = len(re.findall('fa fa-star',str(info_title)))
        climb['grade'] = info_title.find('small').text.strip()
        climb['crag_id'] = climb_id[1]
        #climb['rocktype'] = df_crags.loc[crag].rocktype
        try:
            desc = soup.find('div',attrs={'id':'overview'})
            if "No description has been" not in desc.text:
                if "Ticklist" in desc.text:
                    climb['desc'] = desc.text.split('Ticklist')[0]
                else:
                    climb['desc'] = desc.text.split('Feedback')[0]
            else:
                climb['desc'] = ''
                
        except:
            climb['desc'] = ''
        try:
            comments = []
            users = []


            for comment in soup.find('table',attrs={'class':'table table-sm mb-0'}).find_all('tr',attrs={'class':re.compile('entry_[0-9]+$')}):
                if 'd-table-row d-sm-none' not in str(comment):
                    users.append(re.search('id=([0-9]+)$',comment.find('a').attrs['href'])[1])
                    if 'no_content_entry' not in str(comment):
                        comments.append(comment.find('span',attrs={'class':'comment_desc'}).text)
                    else:
                        comments.append('')
        except:
            comments = []
            users = []
        climb['logs'] = {user:comment for (user,comment) in zip(users,comments)}
        climb['num_comments'] = len(comments)
        return climb
    except:
        return

def scrape_user_info(i):
    url_base_logbook = 'https://www.ukclimbing.com/logbook/showlog.php?id={}&sort=x&country=0&crag=0&gradetype=0&partner=0&year=0&season=0&nresults=100&pg=1'
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}
    user = {}
    user['id'] = i
    user['logbook_url'] = url_base_logbook.format(i)
    url_base_profile = 'https://www.ukclimbing.com/user/profile.php?id={}'
    url = url_base_profile.format(i)
    user['profile_url'] = url
    r = requests.get(url,headers=headers)
    soup = BeautifulSoup(r.text,'html.parser')
    try:
        user['username'] = soup.find('div',attrs={'class':'col-lg-8 col-xl-9','id':'has_sidebar'}).find('h1').text
        return user
    except: 
        return
            


In [5]:
scrape_climb_info((192935,142))

{'id': 192935,
 'url': 'https://www.ukclimbing.com/logbook/c.php?i=192935',
 'climbname': 'Patience Direct - Beauty',
 'rating': 0,
 'grade': 'E2 5c',
 'crag_id': 142,
 'desc': '',
 'logs': {'13422': 'Both lead (thought this was just called Patience direct or Patience left hand)'},
 'num_comments': 4}

In [6]:
total_crags = 30000

p = Pool()
crag_list = list(tqdm(p.imap(get_crag_ids, range(0,total_crags)), total=total_crags))
p.terminate()
p.join()

crag_dic = {}

for crag in crag_list:
    if crag:
        crag_dic[int(crag['id'])] = crag
        

HBox(children=(FloatProgress(value=0.0, max=30000.0), HTML(value='')))

Process ForkPoolWorker-1:
Process ForkPoolWorker-4:


not enough info for id 56


Process ForkPoolWorker-3:
Process ForkPoolWorker-2:
Process ForkPoolWorker-8:
Process ForkPoolWorker-6:


not enough info for id 59


Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "<ipython-input-4-1258cea42550>", line 8, in get_crag_ids
    return scrape_crag_info(i)
  File "<ipython-input-4-1258cea42550>", line 8, in get_crag

  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
KeyboardInterrupt
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 421, in _make_request
    six.raise_from(e, None)
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 421, in _make_request
    six.raise_from(e, None)
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 421, in _make_request
    six.raise_from(e, None)
  File "/Users/max/opt/ana

KeyboardInterrupt: 

Process ForkPoolWorker-14:
Process ForkPoolWorker-5:
Process ForkPoolWorker-7:
Process ForkPoolWorker-13:
Process ForkPoolWorker-12:
Process ForkPoolWorker-10:
Process ForkPoolWorker-11:
Process ForkPoolWorker-9:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/max/opt/anaconda3/lib/python3.7/multiprocessing/process.

  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 376, in _make_request
    self._validate_conn(conn)
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/requests/adapters.py", line 449, in send
    timeout=timeout
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 994, in _validate_conn
    conn.connect()
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 994, in _validate_conn
    conn.connect()
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib

  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/requests/sessions.py", line 533, in request
    resp = self.send(prep, **send_kwargs)
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/requests/adapters.py", line 449, in send
    timeout=timeout
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/requests/sessions.py", line 668, in send
    history = [resp for resp in gen] if allow_redirects else []
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 421, in _make_request
    six.raise_from(e, None)
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/requests/sessions.py", line 668, in <listcomp>
    history = [resp for resp in gen] if allow_redirects else []
  File "<string>", line 3, in raise_from
  File "/Users/max/opt/anaconda3/lib/python3.7/site-packages/requests/session

In [None]:
df_crags = pd.DataFrame(crag_dic).T
df_crags.columns = [column.lower() for column in df_crags.columns]
df_crags['climbs'] = df_crags['climbs'].replace('?',0).astype(int)
df_crags['id'] = df_crags['id'].astype(int)
df_crags['lat'] = df_crags['lat'].astype(float)
df_crags['long'] = df_crags['long'].astype(float)

def strip_numbers(alt):
    if type(alt)==str:
        num = re.search('[0-9]*',alt)
        try:
            return int(num[0])
        except:
            return np.nan
    else:
        return alt
df_crags['altitude'] = df_crags['altitude'].map(strip_numbers)

In [None]:
df_crags[df_crags['climbs']==0]

In [10]:
climb_id_list = []
for crag in df_crags.index:
    for climb_id,rating in df_crags.loc[crag].climb_list:
#         if rating >=2:
            climb = {}
            climb_id_list.append((climb_id,crag)) 
len(climb_id_list)            

495939

In [11]:
def sectors(num_finish,num_sec = 10):
    chunk_size = int(num_finish/num_sec)
    sectors = []
    previous = 0 
    for i in range(0,num_finish,chunk_size)[1:]:
        sectors.append((previous,i))
        previous = i
    sectors.append(((previous,num_finish-1)))
    return sectors

In [12]:
scan_sec = sectors(len(climb_id_list),100)

In [13]:
climb_id_list_iter = []
for sector in scan_sec:
    climb_id_list_iter.append(climb_id_list[sector[0]:sector[1]])


In [14]:
climb_dic = {}

In [15]:
from IPython.display import clear_output

In [16]:
scrape_climb_info((3167,4))

{'id': 3167,
 'url': 'https://www.ukclimbing.com/logbook/c.php?i=3167',
 'climbname': 'Vanishing Point',
 'rating': 0,
 'grade': 'E1 5b',
 'crag_id': 4,
 'desc': '',
 'logs': {'187067': '', '63530': ''},
 'num_comments': 2}

In [17]:
climb_id_list_iter[15]

[('291234', 1017),
 ('291235', 1017),
 ('291873', 1017),
 ('291874', 1017),
 ('291237', 1017),
 ('291238', 1017),
 ('291875', 1017),
 ('291236', 1017),
 ('435022', 1017),
 ('519497', 1017),
 ('291239', 1017),
 ('435023', 1017),
 ('492610', 1017),
 ('492612', 1017),
 ('492613', 1017),
 ('492614', 1017),
 ('380305', 1018),
 ('380306', 1018),
 ('44901', 1018),
 ('245297', 1018),
 ('44902', 1018),
 ('44903', 1018),
 ('44904', 1018),
 ('375435', 1018),
 ('44905', 1018),
 ('44906', 1018),
 ('513541', 1018),
 ('44907', 1018),
 ('28347', 1018),
 ('44908', 1018),
 ('44909', 1018),
 ('437883', 1018),
 ('44910', 1018),
 ('44927', 1018),
 ('437884', 1018),
 ('44911', 1018),
 ('44912', 1018),
 ('44928', 1018),
 ('44929', 1018),
 ('345847', 1018),
 ('28348', 1018),
 ('44930', 1018),
 ('28349', 1018),
 ('28350', 1018),
 ('44931', 1018),
 ('44932', 1018),
 ('44933', 1018),
 ('44934', 1018),
 ('44935', 1018),
 ('44936', 1018),
 ('44937', 1018),
 ('44938', 1018),
 ('44939', 1018),
 ('44940', 1018),
 ('5

In [18]:
climb_dic[64394]

KeyError: 64394

In [20]:
#81/100
itterations = climb_id_list_iter[50:52]
for j,i in enumerate(itterations):
    print(str(j)+"/"+str(len(itterations)))
    p = Pool(5)
    t = tqdm(p.imap(scrape_climb_info, i), total=len(i))
    climb_list = list(t)
    p.terminate()
    p.join()
    t.refresh()
    t.reset()
    for climb in climb_list:
        if climb:
            climb_dic[int(climb['id'])] = climb
    clear_output()

In [21]:
len(climb_dic)

9916

In [22]:
def return_climb_type(grade):
    if 'f' in grade or re.match('V[0-9]+',grade):
        return 'boulder'
    if re.match('^[0-9][abc]\+?$',grade) or re.match('^5\.[0-9]+[abcd]?$',grade) :
        return 'lead'
    if re.match('^M|D|VD|HVD|S|HS|VS|HVS|E[0-9]+',grade):
        return 'trad'
    else:
        return 'other'


In [23]:
def comments_flat(com_dic,desc):
    flat = desc + ' '
    for comment in com_dic:
        flat += com_dic[comment] + ' '
    return flat

In [24]:
df_climbs=pd.DataFrame(climb_dic).T
df_climbs = df_climbs[~df_climbs.index.duplicated()]
df_climbs['type'] = df_climbs['grade'].map(return_climb_type)
df_climbs['comments_flat'] = df_climbs.apply(lambda x: comments_flat(x.logs,x.desc), axis=1)
df_climbs['num_comments'] = df_climbs.apply(lambda x: x.num_comments+1 if x.desc else x.num_comments,axis=1)
df_climbs.type.value_counts()

lead       4768
boulder    2630
other      1545
trad        973
Name: type, dtype: int64

In [62]:
len(df_climbs)

492853

In [63]:
df_climbs[(df_climbs['type']=='other') & (df_climbs['num_comments']>0)]['grade'].value_counts()

summit       4150
VB           2334
18            905
17            863
19            809
             ... 
XI+             1
III 5.6/7       1
none 5×         1
IV s            1
7b s2/3         1
Name: grade, Length: 711, dtype: int64

In [23]:
user_list = [user for comment in df_climbs['logs'] for user in comment]

In [24]:
len(user_list)

4544978

In [25]:
user_id_list = np.unique(user_list)

In [67]:
len(user_id_list)

33064

In [57]:

p = Pool()
user_list = list(tqdm(p.imap(get_user_ids, user_id_list), total=len(user_id_list)))
p.terminate()
p.join()
user_dic = {}
for user in user_list:
    if user:
        user_dic[int(user['id'])] = user
df_users = pd.DataFrame(user_dic).T

HBox(children=(FloatProgress(value=0.0, max=33065.0), HTML(value='')))




In [59]:
len(df_users)

33062

In [111]:
users_climbs_dic = {}
rows = df_climbs.iterrows()
for climb in tqdm(rows):
    for log in climb[1].logs:
        try:
            users_climbs_dic[int(log)].append(climb[0])
        except:
            users_climbs_dic[int(log)] = [climb[0]]
        
len(users_climbs_dic)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




33065

In [112]:
df_users['climbs'] = pd.Series(users_climbs_dic)

In [114]:
df_users['num_climbs']=df_users.climbs.map(lambda x:len(x))

In [115]:
df_users

Unnamed: 0,id,logbook_url,profile_url,username,climbs,num_climbs
100002,100002,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,Stephanelericolais,"[42012, 18853, 147314]",3
10002,10002,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,Piers Harley,"[8359, 10726, 10731, 10732, 10735, 10744, 1077...",325
100025,100025,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,george5699,"[31435, 33329, 40976, 42231, 86139, 35261, 352...",15
100031,100031,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,Tak,"[10917, 10941, 10969, 10971, 10328, 10342, 103...",16
100045,100045,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,smeaton,"[10729, 10731, 10735, 10814, 10816, 10837, 108...",46
...,...,...,...,...,...,...
99962,99962,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,spilla,"[10370, 14060, 14061, 51492, 14062, 14063, 140...",19
99967,99967,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,Andrewbirkett,"[33152, 33154, 33286, 35960, 33289, 104598, 37...",68
99978,99978,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,Jamie Nobbs,"[8349, 8351, 8354, 4662, 4684, 34668, 4694, 47...",74
99997,99997,https://www.ukclimbing.com/logbook/showlog.php...,https://www.ukclimbing.com/user/profile.php?id...,nevets,"[11041, 10893, 10894, 10241, 10245, 10252, 102...",28


# Write SQL

In [116]:
from sqlalchemy import create_engine
db_user = 'postgres'
# if you need a password to access a database, put it here
db_password = ''
# on your computer, use localhost
db_host = 'localhost'
# the default port for postgres is 5432
db_port = 5432
# we want to connect to the northwind database
database = 'climbing'

conn_str = f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{database}'
connection = create_engine(conn_str)

In [25]:
# df_climbs['logs_flat'] = [repr(comment) for comment in df_climbs['logs']]
# df_climbs.drop('logs',axis=1).to_sql('climbs',connection,if_exists='replace')

In [117]:
df_users['climbs_flat'] = [repr(climb) for climb in df_users['climbs']]
df_users.drop('climbs',axis=1).to_sql('users',connection,if_exists='replace')

In [None]:
# df_crags['climbs_flat'] = [repr(climb) for climb in df_crags['climb_list']]
# df_crags.drop('climb_list',axis=1).to_sql('crags',connection,if_exists='replace')

In [None]:
df_crags.to_csv('crags.csv')

# Read from SQL Here

In [8]:
from sqlalchemy import create_engine
db_user = 'postgres'
# if you need a password to access a database, put it here
db_password = ''
# on your computer, use localhost
db_host = 'localhost'
# the default port for postgres is 5432
db_port = 5432
# we want to connect to the northwind database
database = 'climbing'

conn_str = f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{database}'
connection = create_engine(conn_str)

In [None]:
df_climbs_sql = pd.read_sql('select * from climbs',connection,index_col='index')
df_climbs_sql['logs'] = [eval(flat) for flat in df_climbs_sql['logs_flat']]
df_climbs_sql = df_climbs_sql.drop('logs_flat',axis=1)

In [None]:
df_climbs=df_climbs_sql

In [None]:
len(df_climbs_sql)

In [9]:
df_crags_sql = pd.read_sql('select * from crags',connection,index_col='index')
df_crags_sql['climb_list'] = [eval(flat) for flat in df_crags_sql['climbs_flat']]
df_crags_sql = df_crags_sql.drop('climbs_flat',axis=1)
df_crags_sql

df_crags = df_crags_sql

In [None]:
df_users_sql = pd.read_sql('select * from users',connection,index_col='index')
df_users_sql['climbs'] = [eval(flat) for flat in df_users_sql['climbs_flat']]
df_users_sql = df_users_sql.drop('climbs_flat',axis=1)
df_users_sql

In [None]:
df_crags_sql.head()

In [None]:
df_crags_sql.describe()

In [None]:
df_crags_sql[df_crags_sql['climbs']==0]

In [None]:
ninetyfive = scrape_crag_info('1652')

In [None]:
climb = scrape_climb_info(('30358','1652'))
from sklearn.feature_extraction.text import CountVectorizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
comments = climb['comments']

c_vec = cv.fit_transform(comments)

In [None]:
c_vec_df = pd.DataFrame(c_vec.asformat('array'),columns=cv.get_feature_names())

In [None]:
SIA = SentimentIntensityAnalyzer()
sentiment = []
for t in cv.get_feature_names():
    sentiment.append(SIA.polarity_scores(t))

df = pd.DataFrame(c_vec_df.sum(),columns=['count'])
df['neg'] = [i['neg'] for i in sentiment]
df['neu'] = [i['neu'] for i in sentiment]
df['pos'] = [i['pos'] for i in sentiment]
df['compound'] = [i['compound'] for i in sentiment]

In [None]:
df_crags_sql.mean()

In [None]:
from sklearn.cluster import KMeans

km = KMeans(n_clusters=6)



In [None]:
km.labels_

# Climbing Dic

In [None]:
wiki = "https://en.wikipedia.org/wiki/Glossary_of_climbing_terms"

r = requests.get(wiki)
soup = BeautifulSoup(r.text,'html.parser')
body = soup.find('div',attrs={'class':'mw-parser-output'})
list_of_words = [word.text.replace('\n','') for word in body.find_all('p')[3:]]

In [None]:
list_of_words
