In [2]:
import asyncio
import aiohttp
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm
from datetime import datetime, timedelta
from tqdm import tqdm
import nest_asyncio
nest_asyncio.apply()

In [None]:
async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()
    

async def process_horoscope(session, home_url, date, sign):
    url = f'{home_url}{sign}/{date}/'
    async with session.get(url) as response:

        try:
            html = await response.text()
            soup = BeautifulSoup(html, 'html.parser')
            horoscope_text = soup.find('p', class_='mtZOt')
            if horoscope_text is not None:
                horoscope_text = horoscope_text.get_text()
                return {'date': date, 'sign': sign, 'text': horoscope_text}
        except:
            pass
    

async def main():
    DF = pd.DataFrame(columns=['date', 'sign', 'text'])
    
    home_url = "https://horoscopes.rambler.ru/"

    start_date = datetime(2004, 1, 1)
    end_date = datetime(2023, 8, 31)
    date_list = []
    current_date = start_date
    while current_date <= end_date:
        date_list.append(current_date.strftime('%Y-%m-%d'))
        current_date += timedelta(days=1)

    signs = ['aries', 'taurus', 'gemini', 'cancer', 'leo', 'virgo', 'libra',
         'scorpio', 'sagittarius', 'capricorn', 'aquarius', 'pisces']

    async with aiohttp.ClientSession() as session:
        for date in tqdm(date_list):
            tasks = [process_horoscope(session, home_url, date, sign) for sign in signs]
            results = await asyncio.gather(*tasks)        

            df = pd.DataFrame(results)
            DF = pd.concat([DF, df], ignore_index=True)

    DF.to_csv(f"horoscopes_{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}.csv")
    print(DF)

loop = asyncio.get_event_loop()
loop.run_until_complete(main())