## Baly Search Analysis

This notebook uses oxylabs to query on google and compares results with our own search.

In [1]:
%%capture
pip install beautifulsoup4 requests pandas lxml

In [2]:
import requests
from bs4 import BeautifulSoup
import lxml.html
import re

In [136]:
## Co-ordinates for Cities

origins = {
    'Najaf': '32.0250337,44.324328',
    'Karbala': '32.607318,44.026082',
    'Baghdad': '33.303421,44.457053'
}

## Oxylabs

You will need username and password, ask me for it.

In [4]:
from getpass import getpass
username = getpass("username")
password = getpass("password")

username ········
password ········


In [108]:
def process_html(html_files):
    name_selector = '[role="heading"]'
    rating_selector = 'span[aria-hidden="true"]'
    rating_count_selector = '[class*="RDApEe"]'
    details_selector = '.rllt__details div:nth-of-type(5)'
    lat_selector = '[data-lat]'
    lng_selector = '[data-lng]'
    type_selector = '//div[@class="rllt__details"]/div[2]/text()'
    address_selector = '.rllt__details div:nth-of-type(3)'
    data = []
    for html in html_files:
        soup = BeautifulSoup(html, 'html.parser')
        lxml_obj = lxml.html.fromstring(str(soup))
        index = -1
    
        for listing in soup.select('[class="VkpGBb"]'):
            index += 1
            place = listing.parent
            name_el = place.select_one(name_selector)
            name = name_el.text.strip() if name_el else ''
            
            rating_el = place.select_one(rating_selector)
            rating = rating_el.text.strip() if rating_el else ''
            
            rating_count_el = place.select_one(rating_count_selector)
            rating_count = ''
            if rating_count_el:
                count_match = re.search(r'\((.+)\)', rating_count_el.text)
                rating_count = count_match.group(1) if count_match else ''
            
            
            details_el = place.select_one(details_selector)
            details = details_el.text.strip() if details_el else ''
            
            lat_el = soup.select_one(lat_selector)
            lat = lat_el.get('data-lat') if lat_el else ''
            
            lng_el = soup.select_one(lng_selector)
            lng = lng_el.get('data-lng') if lng_el else ''
            
            type_el = lxml_obj.xpath(type_selector)
            place_types = []
            for item in type_el:
                parts = item.strip().split('·')
                non_empty_parts = [part.strip() for part in parts if part.strip()]
                if non_empty_parts:
                    place_types.append(non_empty_parts[-1])
            
            address_el = place.select_one(address_selector)
            address = address_el.text.strip() if address_el else ''
            
            place = {
                'name': name,
                'place_type': place_types[index],
                'address': address,
                'rating': rating,
                'rating_count': rating_count,
                'latitude': lat,
                'longitude': lng,
                'details': details,
            }
            data.append(place)
    return data

In [109]:
def query_google(query,origin):
    location = "lat: {0}, lng: {1}, rad: 15000".format(*origin.split(','))
    payload = {
        'source': 'google_maps',
        'query': query,
        'user_agent_type': 'desktop',
        'domain': 'iq',
    #   'geo_location': 'Najaf, Iraq',
        'geo_location': location,
    #   'geo_location': '2368',
        'start_page': '1',
        'google_suggest': 'IQ',
        'limit':'5',
        'pages': '3'
    }
    response = requests.request(
    	'POST',
    	'https://realtime.oxylabs.io/v1/queries',
    	auth=(username, password),
    	json=payload,
    	timeout=180
    )
    print(response.status_code)
    if response.status_code == 200:
        #print(str(response.content))
        results = response.json()['results']
        print(len(results))
        html_files = [result['content'] for result in results]
        return process_html(html_files)
    return []

This is where we search for a particular PoI - in this case parking lots in Najaf.

In [120]:
import pandas as np

def google_places_to_df(data):
    place_names = [ x['name'] for x in data ]
    types = [ x['place_type'] for x in data ]
    lats = [ x['latitude'] for x in data ]
    longs = [ x['longitude'] for x in data ]
    address = [x['address'] for x in data ]
    df = np.DataFrame({'Name': place_names, 'Type': types, 'Latitutde': lats, 'Longitude': longs, 'Address': address})
    return df


Let's search for garages in Najaf - this is where a lot of rides originate and terminate

In [126]:
query = 'كراج'
origin=origins['Najaf']
data = query_google(query,origin)
df = google_places_to_df(data)
df.head(15)

200
3


Unnamed: 0,Name,Type,Latitutde,Longitude,Address
0,ALBASRWI CARS SERVICE CENTER,شارع كراج بغداد النجف مقابل كراج بغداد,32.0325487,44.3240938,مفتوح ⋅ يغلق ٥:٣٠ م
1,كراج الطوابق,٣٫٢ كم,32.0325487,44.3240938,
2,كراج بغداد,٧٠٠٫٠ م,32.0325487,44.3240938,
3,كراج ارض الشوفر / فرع النجف,الجزيرة وفاء,32.0325487,44.3240938,مفتوح ⋅ يغلق ٦:٣٠ م
4,كراج الشهداء بوابة الرحمة,١٫٦ كم,32.0325487,44.3240938,
5,كراج عقيل الخفاجي,٥٫٢ كم,32.0325487,44.3240938,مغلق ⋅ يفتح في الساعة ٧ م
6,كراج صباغة محمد تركي,٥٫٥ كم,32.0325487,44.3240938,مفتوح ⋅ يغلق ٧ م
7,كراج,١٫٦ كم,32.0325487,44.3240938,
8,كراج النجف الشمالي,٦٥٠٫٠ م,32.0325487,44.3240938,
9,كراج رضوان الحمامي,٤٫١ كم,32.0325487,44.3240938,مفتوح ⋅ يغلق ٤ م


## Comparison with Smapp


In [145]:
import json


def search_goescapo(query,origin):
    base_url = 'http://gosecapo-staging.apps.private.apps.private.okd4.teh-2.snappcloud.io/maps/api/place/autocomplete/json'
    return get_search_results(query,origin,base_url)

def search_thor(query,origin):
    base_url = 'https://thor-staging.apps.private.okd4.teh-1.snappcloud.io/attempt/map'
    return get_search_results(query,origin,base_url)


def get_search_results(query,origin,base_url):
    params = {
        'origin': origin,
        'input': query,
        'language': 'ar'
    }

    response = requests.request(
    	'GET',
    	base_url,
        params = params,
    	timeout=300
    )
    if response.status_code != 200:
        print(response.status_code,response.content)
    return response

def get_data_frame(response_str):
    decoded_str = response_str.decode('utf-8')
    smappdata = json.loads(decoded_str)
    predicts = smappdata['predictions']
    place_names = [ x['description'] for x in predicts ]
    types = [ x['types'][0] for x in predicts ]
    lats = [ x['latitude'] for x in predicts]
    longs = [ x['longitude'] for x in predicts]
    df = np.DataFrame({'Name': place_names, 'Type': types, 'Latitutde': lats, 'Longitude': longs})
    return df

In [140]:
import json

origin = origins['Najaf']
query = 'كراج'
resp =  search_goescapo(query,origin)
df = get_data_frame(resp.content)
df.head(20)

Unnamed: 0,Name,Type,Latitutde,Longitude
0,جسر كراج بغداد,highway:trunk,32.032554,44.323044
1,كراج النجف الجنوبي,building:garage,31.981235,44.351372
2,كراج مسجد الكوفة,amenity:bus_station,32.025798,44.399097
3,كراج,amenity:car_wash,31.992382,44.320963
4,كراج ميسان,amenity:bus_station,32.02073,44.380629
5,كراج الرابطة,tourism:caravan_site,31.993346,44.320444
6,كراج سيارتي,shop:ticket,31.989473,44.31813
7,كراج الطوابق,amenity:car_rental,31.99855,44.311104
8,كراج الصلاحية,amenity:bus_station,31.963016,44.601279
9,كراج النجف الداخلي,amenity:bus_station,32.002836,44.329221


In [146]:
from urllib.parse import unquote

res = search_thor(query,origin)
print(res.content)

#df = get_data_frame(res.content)
#print(query)
#df.head(20)

504 b'upstream request timeout'
b'upstream request timeout'


In [117]:
query = 'شارع الربيعي كلاس كدز'
origin='33.3034217,44.4570539'

res = get_search_results(query,origin)
df = get_data_frame(res.content)
print(query)
df.head(20)

شارع الربيعي كلاس كدز


Unnamed: 0,Name,Type,Latitutde,Longitude
0,العبيدي,place:neighbourhood,33.373545,44.531364
1,Al-Rubaie Street,junction:roundabout,33.054631,44.358087
2,شارع الربيع,highway:primary,33.334136,44.324744
3,شارع الربيع,highway:primary,33.298363,44.323487
4,شارع الربيعي,highway:tertiary,33.32402,44.450668
5,شارع الربيع,highway:primary,33.358307,44.334356
6,شارع الربيع,highway:primary,33.30803,44.323798
7,Al-Rubaie Street,highway:tertiary,33.054484,44.360821
8,مجسر شارع الربيعي,highway:secondary,33.318821,44.443624
9,شارع الربيعي,highway:tertiary,33.052687,44.358051


In [118]:
data = query_google(query,origin)
df = google_places_to_df(data)
df.head(15)

200
3


Unnamed: 0,Name,Type,Latitutde,Longitude,Address
0,مركز منتضر كلاس,شركة هواتف,33.3197937,44.4453408,٢٫١ كم · شارع الربيعي بغداد الربيعي · 0771 789...
1,الفهد تويز سنتر,متجر ألعاب,33.3197937,44.4453408,٢٫٧ كم · 8FG3+C9W، شارع الربيعي
2,Frawila-Kids ملابس اطفال فراولة,متجر ملابس,33.3197937,44.4453408,٢٫٦ كم · 8FG3+QJG
3,First Step Kids Club,حضانة,33.3197937,44.4453408,٢٫٥ كم · 8FG2+36X، Unnamed Road · 0771 382 9440
4,Lailak,متجر ملابس,33.3197937,44.4453408,٢٫٦ كم · 8FG2+FX5، شارع الربيعي
5,مجمع الكوخ / فرع شارع الربيعي,متجر ملابس حريمي,33.3197937,44.4453408,"٢٫٢ كم · 8CCX+XJC, Rubaie St · 0771 882 2564"
6,زيونه_ شارع الربيعي _ الانيوم سنتر,متجر ملابس,33.3197937,44.4453408,"٢٫٦ كم · 8FG3+JHP, Rubaie St"
7,مجمع سنتر زيونة,متجر ملابس,33.3197937,44.4453408,٢٫٤ كم · 8FF2+RJ2، شارع الربيعي
8,مول جوهرة بغداد,مركز تسوق,33.3197937,44.4453408,٢٫٧ كم · 8FG3+VHR، شارع الربيعي · 0771 440 7702
9,مركز الغدير للعناية بالاسنان وزراعتها,مركز تسوق,33.3197937,44.4453408,"٢٫٦ كم · 8FG3+H42, شارع الربيعي، بغداد،،"


In [116]:
urlq = unquote('"http://gosecapo.baly-map-address.svc.cluster.local:8080/maps/api/place/autocomplete/json?input=%D8%B4%D8%A7%D8%B1%D8%B9%20%D8%A7%D9%84%D8%B1%D8%A8%D9%8A%D8%B9%D9%8A%20%D9%83%D9%84%D8%A7%D8%B3%20%D9%83%D8%AF%D8%B2&key=AIzaSyBKIB5clOPulZYnjvtYots18L7vRO5Snd8&language=ar-IQ&origin=33.257583704861275,44.401131645233875&radius=60000&components=country:iq"')
print(urlq)

"http://gosecapo.baly-map-address.svc.cluster.local:8080/maps/api/place/autocomplete/json?input=شارع الربيعي كلاس كدز&key=AIzaSyBKIB5clOPulZYnjvtYots18L7vRO5Snd8&language=ar-IQ&origin=33.257583704861275,44.401131645233875&radius=60000&components=country:iq"
