# Imports

In [71]:
import pandas as pd
import xml.etree.ElementTree as ET
import re


# Functions

In [72]:
def get_tag_value(element, tag_key):
    tag = element.find(f'.//tag[@k="{tag_key}"]')
    return tag.get('v') if tag is not None else None

def has_greek_letters(s):
    greek_letters_pattern = re.compile('[Α-Ωα-ω]')
    return bool(greek_letters_pattern.search(s))

# Iterate over nodes, ways, and relations in the XML
def get_columns(get_tag_value, element):
    element_id = element.get('id')
    latitude = element.get('lat')
    longitude = element.get('lon')

    # Check if the element has a valid name
    name_tag = element.find('.//tag[@k="name"]')
    name_value = name_tag.get('v') if name_tag is not None else None
    
    if name_value is not None:
        if has_greek_letters(name_value):
            name_value = None

    category_value = ""

    amenity_value = get_tag_value(element, 'amenity')
    building_value = get_tag_value(element, 'building')
    historic_value = get_tag_value(element, 'historic')
    artwork_value = get_tag_value(element, "artwork_type")

    if amenity_value and building_value:
        category_value = amenity_value
    elif building_value:
        category_value = building_value
    elif historic_value:
        category_value = historic_value
    else:
        category_value = artwork_value
        
    return element_id,latitude,longitude,name_value,category_value



In [73]:

xml_file_path = 'Data\exportXML.osm'
with open(xml_file_path, 'r', encoding='utf-8') as file:
    xml_data = file.read()
root = ET.fromstring(xml_data)

datasetNodes = []
datasetWays = []
for element in root.findall('.//node'):
    data = {}
    element_id, latitude, longitude, name_value, category_value = get_columns(get_tag_value, element)
    if name_value and category_value:
        data['id'] = element_id
        data['latitude'] = latitude
        data['longitude'] = longitude
        data['name'] = name_value
        data['category'] = category_value
        datasetNodes.append(data)
        
for element in root.findall('.//way'):
    data = {}
    element_id, latitude, longitude, name_value, category_value = get_columns(get_tag_value, element)
    if name_value and category_value:
        data['id'] = element_id
        data['name'] = name_value
        data['category'] = category_value
        datasetWays.append(data)


dfNodes = pd.DataFrame(datasetNodes)
dfWays = pd.DataFrame(datasetWays)


dfNodes.to_csv("Output/nodes.csv", index=False)
dfWays.to_csv("Output/ways.csv", index=False)

In [74]:
def create_csv(df, isNode):
    unique_categories = df['category'].unique()

    for category in unique_categories:
        
        category_df = df[df['category'] == category]
        if len(category) >= 5:
                
            if isNode:
                file_name = f'Output/{category}__node_data.csv'
            else:
                file_name = f'Output/{category}__way_data.csv'
            category_df.to_csv(file_name, index=False)
    

In [75]:
create_csv(dfNodes, True)
create_csv(dfWays, False)