Skip to content

Commit

Permalink
🔥 Now I have all data and already inserts into Mongo, here I come Jen…
Browse files Browse the repository at this point in the history
…kins!
  • Loading branch information
jvidalv committed May 9, 2020
1 parent c86b1cb commit 85287f5
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 14 deletions.
10 changes: 2 additions & 8 deletions main.py
Expand Up @@ -58,12 +58,6 @@
if random_sign not in data['contents']['you_hate'] and random_sign not in data['contents']['you_love']:
data['contents']['you_hate'].append(random_sign)

# YOU LOVE
pprint(daily_data)

# YOU HATE
exit()

# SPANISH
# Get the latest blog entry for this blog ( 1 each day )
spanish_base_url = 'https://www.semana.es/horoscopo/'
Expand Down Expand Up @@ -106,6 +100,6 @@
'Check love percentage using love calculator.', '').strip()
data['contents']['work']['en'] = panels[3].text.replace('\n', '').strip()

pprint(daily_data)
mongo.db.horoscope_daily.insert_many(daily_data)

# @todo
exit(1)
50 changes: 44 additions & 6 deletions main_stale.py
Expand Up @@ -7,6 +7,7 @@
from datetime import datetime
from src.constants.signs import *
from src.utils.headless import *
from src.utils.unicode import delete_accents

# CONNECTION
mongo = Mongo(mongo_connection)
Expand All @@ -16,8 +17,10 @@
parser = "lxml"

# STALE DATA BASE
for sign1 in signs_en:
for sign2 in signs_en:
list1 = signs_en
list2 = signs_en.copy()
for sign1 in list1:
for sign2 in list2:
stale_data.append({
'type': 'compatibility',
'sign1': sign1,
Expand All @@ -43,15 +46,50 @@
# ENGLISH
for data in stale_data:
sign1 = data['sign1'].lower()
sign2 = data['sign2'].lower()
url_base = "https://askastrology.com/zodiac-compatibility/" + sign1 + "-compatibility/" + sign1
for data2 in stale_data:
url_with_sign = url_base + '-' + data2['sign2'].lower()
url_with_sign = url_base + '-' + sign2
page = requests.get(url_with_sign, headers={'User-Agent': random_user_agent()})
soup = BeautifulSoup(page.content, parser)
compatibility_ps = soup.select('div.entry-content > p')
try:
data['resume']['en'] = compatibility_ps[0].text
data['relationship']['en'] = compatibility_ps[1].text
except IndexError:
pprint(url_with_sign)
page = requests.get(url_with_sign, headers={'User-Agent': random_user_agent()})
soup = BeautifulSoup(page.content, parser)
compatibility_ps = soup.select('div.entry-content > p')
data['resume']['en'] = compatibility_ps[0].text
data['relationship']['en'] = compatibility_ps[1].text
pprint(data)

# SPANISH
# todo
for data in stale_data:
sign1 = delete_accents(signs_en_to_es[data['sign1']].lower())
sign2 = delete_accents(signs_en_to_es[data['sign2']].lower())
url_base = "https://www.euroresidentes.com/horoscopos/compatibilidad/" + sign1 + "/" + sign1
url_with_sign = url_base + '-' + sign2 + '.htm'
page = requests.get(url_with_sign, headers={'User-Agent': random_user_agent()})

if page.ok == 0:
url_base = "https://www.euroresidentes.com/horoscopos/compatibilidad/" + sign2 + "/" + sign2
url_with_sign = url_base + '-' + sign1 + '.htm'
page = requests.get(url_with_sign, headers={'User-Agent': random_user_agent()})

soup = BeautifulSoup(page.content, parser)
compatibility_ps = soup.select('article.center-block > p')

try:
data['resume']['es'] = compatibility_ps[0].text
data['relationship']['es'] = compatibility_ps[2].text
except IndexError:
pprint(url_with_sign)
page = requests.get(url_with_sign, headers={'User-Agent': random_user_agent()})
soup = BeautifulSoup(page.content, parser)
compatibility_ps = soup.select('article.center-block > p')
data['resume']['es'] = compatibility_ps[0].text
data['relationship']['es'] = compatibility_ps[2].text

mongo.db.horoscope_stale.insert_many(stale_data)

exit(1)
6 changes: 6 additions & 0 deletions src/utils/unicode.py
@@ -0,0 +1,6 @@
import unicodedata


def delete_accents(string):
s = ''.join((c for c in unicodedata.normalize('NFD', string) if unicodedata.category(c) != 'Mn'))
return s

0 comments on commit 85287f5

Please sign in to comment.