In [None]:
"""
List of columns:
_01_FULL_NAME
_02_ADDRESS
_03_ADDRESS_NUMBER
_04_PHONE
_05_CELLPHONE
_06_PROFILE_URL

# Additional columns
_07_SEARCH_KEYWORD
_08_DATA_FOUND
_09_SEARCH_URL
_10_FULL_ADDRESS
_11_MOBILE_2
_12_MOBILE_3
_13_MOBILE_4
_14_CELLPHONE_2
_15_CELLPHONE_3
_16_CELLPHONE_4
_17_CITY


https://www.imenik.hr/imenik/15109296/detalji/Igor%20Apn%20Zagreb%20%C5%BDerjav.html   (Teliphone)
https://www.imenik.hr/imenik/17093932/detalji/Cedepe%20Zagreb.html  (Mobile)
https://www.imenik.hr/imenik/17107674/detalji/Martina%20Podravec%20Zagreb%20Habitatio.html (2 Mobile)
https://www.imenik.hr/imenik/16423805/detalji/Josip%20Repnjak.html (3 Teliphone)

"""

In [2]:
import re
import math
from urllib.parse import quote
from urllib.parse import urlsplit, urlunsplit
from parsel import Selector



def split_address(raw_address):
    try:
        components = raw_address.split(', ')
        zip_code, city = components[0].split(' ')
        street = components[1]
        match = re.search(r'(\D+)(\d+\w*)', street)
        if match:
            street_name, street_number = match.groups()
        else:
            street_name = street
            street_number = None
        
        return zip_code, city, street_name, street_number
    except Exception as e:
        return None, None, None, None


def encode_url(url):
    return quote(url, safe=':/')




def calculate_total_pages(data_found):
    try:
        items_per_page = 10
        total_pages = math.ceil(data_found / items_per_page)
    except:
        total_pages = 1
    return total_pages

In [None]:
html = input()
response = Selector(text=html)

### **Profile Xpath**

In [None]:
_01_FULL_NAME = response.xpath("//h2/strong/text()").get()

_04_PHONE = response.xpath("(//th[contains(text(), 'TELEFON')])[1]/../following-sibling::tr/td/strong/text()").get()
_05_CELLPHONE = response.xpath("(//th[contains(text(), 'MOB')])[1]/../following-sibling::tr/td/strong/text()").get()
# _06_PROFILE_URLs


_10_FULL_ADDRESS = response.xpath("//h4/a/text()").get()
_18_ZIP_CODE, _17_CITY, _02_ADDRESS, _03_ADDRESS_NUMBER = split_address(_10_FULL_ADDRESS)
_11_MOBILE_2 = response.xpath("(//th[contains(text(), 'MOB')])[2]/../following-sibling::tr/td/strong/text()").get()
_12_MOBILE_3 = response.xpath("(//th[contains(text(), 'MOB')])[3]/../following-sibling::tr/td/strong/text()").get()
_13_CELLPHONE_2  = response.xpath("(//th[contains(text(), 'TELEFON')])[2]/../following-sibling::tr/td/strong/text()").get()
_14_CELLPHONE_3 = response.xpath("(//th[contains(text(), 'TELEFON')])[3]/../following-sibling::tr/td/strong/text()").get()

### **Search page xpath**

In [None]:
html = input()
response = Selector(text=html)

In [None]:
# _07_SEARCH_KEYWORD
_08_DATA_FOUND = response.xpath("//td[@class='c_32']/strong/text()").get()
# _09_SEARCH_URL

In [None]:
rez_item = response.xpath("//div[@class='rez_item rez_item_0']")
for item in rez_item:
    profile_url_raw = f"https://www.imenik.hr{item.xpath('.//h4/a/@href').get()}"
    _06_PROFILE_URLs = encode_url(profile_url_raw)

In [3]:


number_of_page = 5
base_url = "https://www.imenik.hr/imenik/trazi/{}"
current_url = "https://www.imenik.hr/imenik/trazi/1/Osobe/sve/sve/vaznost/zagreb,%20karlova%C4%8Dka%20cesta%202b.html"

url_components = urlsplit(current_url)

# Extract the changing part of the URL
url_part = url_components.path + url_components.query

for page_number in range(1, number_of_page + 1):
    # Construct the new URL
    new_path = url_part.replace('/trazi/1', f'/trazi/{page_number}')
    new_url_components = url_components._replace(path=new_path)
    new_url = urlunsplit(new_url_components)
    
    print(new_url)




https://www.imenik.hr/imenik/trazi/1/Osobe/sve/sve/vaznost/zagreb,%20karlova%C4%8Dka%20cesta%202b.html
https://www.imenik.hr/imenik/trazi/2/Osobe/sve/sve/vaznost/zagreb,%20karlova%C4%8Dka%20cesta%202b.html
https://www.imenik.hr/imenik/trazi/3/Osobe/sve/sve/vaznost/zagreb,%20karlova%C4%8Dka%20cesta%202b.html
https://www.imenik.hr/imenik/trazi/4/Osobe/sve/sve/vaznost/zagreb,%20karlova%C4%8Dka%20cesta%202b.html
https://www.imenik.hr/imenik/trazi/5/Osobe/sve/sve/vaznost/zagreb,%20karlova%C4%8Dka%20cesta%202b.html


### **Keyword Building Code**

In [None]:
#
import pandas as pd

# Assuming df is your DataFrame
df = pd.read_csv('1st.csv')

# Remove any leading/trailing white space
df['city'] = df['city'].str.strip()
df['street'] = df['street'].str.strip()

# Convert 'number' to string
df['number'] = df['number'].astype(str)

# Create the new column
df['keyword'] = df['city'] + ', ' + df['street'] + ' ' + df['number']

df.to_csv("data.csv", index=False)