In [83]:
from bs4 import BeautifulSoup as bs
from prefect import task, flow, get_run_logger
from prefect.tasks import task_input_hash
from datetime import timedelta
import requests
import sqlite3
import os
import pandas as pd
import numpy as np
import re
import datetime

In [84]:
# base_url = "https://hotels.ng/places/nigeria-1/{pages}"
base_url = "https://hotels.ng"
def get_page(url, page):
    response = requests.get(f"{url}{page}")
    response.raise_for_status()
    soup = bs(response.text, "html.parser")
    return soup

In [85]:
names = []
locations = []
categories = []
descriptions = []
likes = []
urls = []
def parse_names(soup, name_tag: str):
    for name in soup.select(name_tag):
        names.append(name.text.strip())
    return names

In [86]:
def parse_locations(soup, location_tag: str):
    tag_name, tag_attrs = location_tag
    for loc in soup.find_all(tag_name, **tag_attrs):
        locations.append(loc.find_all("p")[1].text.strip())
    return locations

In [87]:
def parse_categories(soup, category_tag:str):
    tag_name, tag_attrs = category_tag
    for cat in soup.find_all(tag_name, **tag_attrs):
        categories.append(cat.find_all("p")[0].text.strip())
    return categories

In [88]:
def parse_descriptions(soup, description_tag:str):
    tag_name, tag_attrs = description_tag
    for desc in soup.find_all(tag_name, **tag_attrs):
        descriptions.append(desc.text.strip().replace("\n", "").replace("\r", ""))
    return descriptions

In [89]:
def parse_likes(soup, like_tag:str):
    tag_name, tag_attrs = like_tag
    for like in soup.find_all(tag_name, **tag_attrs):
        likes.append(like.find_all("span")[1].text.strip())
    return likes

In [90]:
def parse_urls(soup, url_tag:str):
    tag_name, tag_attrs = url_tag
    for url in soup.find_all(tag_name, **tag_attrs):
        urls.append(base_url+url.find_all("a")[0]["href"])
    return urls
    # urls = [base_url+url.find_all("a")[0]["href"] for url in soup.find_all(tag_name, **tag_attrs)]
    # return urls

In [91]:
def aggregate_data(name, location, category, description, like, url):
    data = {
        "name": name,
        "location": location,
        "category": category,
        "description": description,
        "like": like,
        "url": url
    }
    return data

In [92]:
def store_data(data):
    dataset = pd.DataFrame(data)
    return dataset

In [93]:
def main(url = "https://hotels.ng/places/nigeria-1/", pages = 3):
    name_tag = "a > h2"
    location_tag = ("div", {"class":"category-de01"})
    category_tag = "div", {"class":"category-de01"}
    description_tag = "p", {"class":"sub-details"}
    like_tag = "div", {"class":"head_right"}
    url_tag = "div", {"class":"head_left"}
    print(url)
    print(pages)
    for page in range(1, pages+1):
        print(page)
        print(f"url >> {url}{page}")
        soup = get_page(url, str(page))
        name = parse_names(soup, name_tag)
        location = parse_locations(soup, location_tag)
        categories = parse_categories(soup, category_tag)
        description = parse_descriptions(soup, description_tag)
        likes = parse_likes(soup, like_tag)
        urls = parse_urls(soup, url_tag)
    mydata = aggregate_data(name, location, categories, description, likes, urls)
    dataset = store_data(mydata)
    return dataset

    # get_page(url="https://hotels.ng/places/nigeria-1/1")


In [94]:
if __name__ == "__main__":
    main()


https://hotels.ng/places/nigeria-1/
3
1
url >> https://hotels.ng/places/nigeria-1/1


2
url >> https://hotels.ng/places/nigeria-1/2
3
url >> https://hotels.ng/places/nigeria-1/3


In [95]:
main()

https://hotels.ng/places/nigeria-1/
3
1
url >> https://hotels.ng/places/nigeria-1/1
2
url >> https://hotels.ng/places/nigeria-1/2
3
url >> https://hotels.ng/places/nigeria-1/3


Unnamed: 0,name,location,category,description,like,url
0,Megan Fowler Bridge,Abuja,Building,,38,https://hotels.ng/places/building/10-megan-fow...
1,The Chad Basin National Park,Abuja,Resort,About Chad Basin National Park The Chad Basi...,133,https://hotels.ng/places/resort/11-the-chad-ba...
2,Yankari National Park,Bauchi,Game reserve,ABOUT Yankari National Park is a large wildl...,46,https://hotels.ng/places/game-reserve/16-yanka...
3,Aso Rock,China,Monument,ABOUT Aso Rock is a large outcrop of granit...,0,https://hotels.ng/places/monument/18-aso-rock
4,Ikogosi Warm Springs,Ekiti,Resort,"About Ikogosi Warm Springs 'You are hot, and ...",2,https://hotels.ng/places/resort/19-ikogosi-war...
...,...,...,...,...,...,...
67,"National War Museum, Umuahia",Abia,Museum,"History of National War Museum, Umuahia Natio...",29,https://hotels.ng/places/museum/47-national-wa...
68,International Institute of Tropical Agriculture,Oyo,College,About International Institute of Tropical Agri...,0,https://hotels.ng/places/college/48-internatio...
69,"Millennium Park, Abuja",Abuja,Park,"History of Millennium Park, Abuja Millennium ...",221,https://hotels.ng/places/park/49-millennium-pa...
70,Wonderland Amusement Park and Resort,Abuja,Park,History of Wonderland Amusement Park and Resor...,253,https://hotels.ng/places/park/50-wonderland-am...
