In [1]:
import re

def extract_integers_from_string(s):
    # Use regex to find all sequences of digits in the string
    integers = re.findall(r'\d+', str(s))
    # Convert the found sequences to integers
    return [int(num) for num in integers]

# Example usage
string = "50a and 2b, some_text 123"
integers = extract_integers_from_string(string)
print

<function print(*args, sep=' ', end='\n', file=None, flush=False)>

In [2]:
import unicodedata

def normalize_string(s):
    # Normalize the string to NFKD form and encode it to ASCII bytes, ignoring errors
    normalized = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore').decode('ascii')
    return normalized

# Example usage
string = "è, é, ê, and other accents"
normalized_string = normalize_string(string)
print(normalized_string)  # Output: e, e, e, and other accents

e, e, e, and other accents


In [3]:
import os
import csv
import networkx as nx
import pandas as pd

# Function to create graph from a single CSV file
def create_graph_from_csv(file_path, G):
    data = pd.read_csv(file_path)
    data['sp-tekst-r'] = data['sp-tekst-r'].str.strip().str.lower()
    for i in range(len(data)):
        node = data.loc[i, 'sp-tekst-r']
        line_number = data.loc[i, 'line_number']
        if G.has_node(node):
            if 'line_numbers' in G.nodes[node]:
                if line_number not in G.nodes[node]['line_numbers']:
                    G.nodes[node]['line_numbers'].append(line_number)
            else:
                G.nodes[node]['line_numbers'] = [line_number]
        else:
            G.add_node(node, pos=(i, data.loc[i, 'sp-km']), line_numbers=[line_number])
    for i in range(len(data) - 1):
        node1 = data.loc[i, 'sp-tekst-r']
        node2 = data.loc[i + 1, 'sp-tekst-r']
        dist = data.loc[i + 1, 'sp-km'] - data.loc[i, 'sp-km']
        if dist > 0:
            G.add_edge(node1, node2, weight=dist)
    return G

# Initialize the combined graph
G_combined = nx.Graph()

# Directory containing CSV files
csv_directory = 'csv_filtered'

# Read each CSV file and update the combined graph
for file_name in os.listdir(csv_directory):
    if file_name.endswith('.csv'):
        file_path = os.path.join(csv_directory, file_name)
        try:
            G_combined = create_graph_from_csv(file_path, G_combined)
        except:
            G_combined = G_combined

# Export the graph to ASP format
def export_to_asp(graph, file_path):
    with open(file_path, 'w') as file:
        for node, data in graph.nodes(data=True):
            line_numbers = data.get('line_numbers', [])
            for line in line_numbers:
                file.write(f"node({normalize_string(node).replace('-', '_').replace(' ', '_')}, {int(extract_integers_from_string(line)[0])}).\n")
                #print(f"node({normalize_string(node).replace('-', '_').replace(' ', '_')}, {int(extract_integers_from_string(line)[0])}).\n")
        for node1, node2, data in graph.edges(data=True):
            weight = data.get('weight', 1)
            file.write(f"edge({normalize_string(node1).replace('-', '_').replace(' ', '_')}, {normalize_string(node2).replace('-', '_').replace(' ', '_')}, {int(weight)}).\n")

# Export the combined graph to a .lp file
export_to_asp(G_combined, 'graph_all_filtered.lp')

print("Graph exported to graph.lp.")

Graph exported to graph.lp.


In [4]:
G_combined

<networkx.classes.graph.Graph at 0x21111c79e90>