In [1]:
from pathlib import Path
from src.processing.taxation_list import _open_file_taxation_list
file_path = Path().absolute().parent / "data/example_1_taxation_list.xlsx"
table_data_df = _open_file_taxation_list(file_path)
table_data_df

Unnamed: 0,origin_number,name,quantity,height,diameter,quality
0,1,береза,14 м2,1,-,Хорошее
1,2,ольха,14 м2,25,008,Хорошее
2,34,Ольха серая,2,55,45,Хорошее
3,5,береза,6 стволов,444444,555555,Хорошее
4,16,Ива ломкая,54 ствола,"5х14, 4х30, 3х8,2,1","7х3,4х11, 2х40",Хорошее
5,7,береза,1,25,-,Хорошее
6,8,береза,72 м2,15,-,Хорошее
7,9,ольха,72 м2,2,006,Хорошее
8,10,Яблоня домашняя,2 ствола,"1,5х2","0,04х2",Хорошее
9,11,Яблоня домашняя,4,1.5х4,0.04х4,Хорошее


In [20]:
import re

from src.parsing import Splitter, Templates

class SearchAmbiguity:
    """
    Поиск неоднозначности данных
    """

    @staticmethod
    def search_in_row_from_taxation_list(number: str, name: str, quantity: str, height: str, diameter: str,
                                         quality: str) -> bool:
        """
        Поиск неоднозначности в количестве численных характеристик объекта растительности
        Args:
            number (str): Исходный номер объекта растительности
            name (str): Наименование объекта растительности
            quantity (str): Количество объектов растительности
            height (str): Высоты объекта растительности
            diameter (str): Диаметры объекта растительности
            quality (str): Состояние объекта растительности

        Returns:
            bool: True, если неоднозначность найдена
        """

        count_numbers = len(Splitter.number(number))
        count_height = len(Splitter.size(height))
        count_diameter = len(Splitter.size(diameter))

        match_trunk = re.search(Templates.TRUNKS, quantity)
        match_contour = re.search(Templates.CONTOUR, quantity)
        match_line = re.search(Templates.LINE, quantity)

        if match_trunk:
            count_quantity = int(match_trunk.group(1))
        elif match_contour:
            count_quantity = 1
        elif match_line:
            count_quantity = 1
        else:
            count_quantity = int(quantity)

        if match_trunk and count_numbers == 1:
            if count_quantity == count_height == count_diameter:
                return False
        elif match_contour and count_numbers == 1:
            if count_quantity == count_height == count_diameter == 1:
                return False
        elif match_line and count_numbers == 1:
            if count_quantity == count_height == count_diameter == 1:
                return False
        elif not match_contour:
            if count_numbers == count_quantity == count_height == count_diameter:
                return False
        else:
            print("True")
            return True

ambiguities = []
for idx, series in table_data_df.iterrows():
    # print(series.values)
    ambiguities.append(SearchAmbiguity.search_in_row_from_taxation_list(*series.values))
print(ambiguities)
# table_data_df["ambiguity"] = ambiguities
# table_data_df

[False, None, False, False, False, None, None, None, None, None, None, None, None, None, None, None, None, None, None, False, False, None, None, None, False, None]


In [None]:
from shapely import Point
from src.objects import Tree, Number
from src.parsing import Parser, Splitter
import pandas as pd


def split(series: pd.Series) -> list[dict[str, str|int|float|None]]:
    split_row = []
    type_object = Parser.get_type_tree_object(series['quantity'])
    split_numbers = Splitter.number(series['origin_number'])
    specie = Parser.get_specie(series['name'])
    
    if isinstance(type_object, Tree):
        n_trunks = quantify_count_trunks(series)
        for split_number in split_numbers:
            number = Number(series['origin_number'], split_number, Point())
            tree = Tree(number, specie, trunks)

In [7]:
# from src.parsing import Splitter, Templates
# import re
# match_trunk = re.search(Templates.TRUNKS, "54 ствола")
# quantity_trunk = int(match_trunk.group(1)) if match_trunk else 1
# quantity_trunk

54

In [8]:
import pandas as pd

data = ['rjkd', 1, 4, '2 ствола', "54 м.кв."]
df = pd.DataFrame(data=data, columns=['Количество'], dtype=str)
df

Unnamed: 0,Количество
0,rjkd
1,1
2,4
3,2 ствола
4,54 м.кв.


In [11]:
import re
from src.parsing import Templates
def get_count(text: str) -> int:
    if re.match(Templates.DIGIT, text):
        return int(text.strip())
    elif re.match(Templates.TRUNKS, text):
        return 1
    elif re.match(Templates.CONTOUR, text):
        return 0
    elif re.match(Templates.LINE, text):
        return 0
    else:
        return 0

df_count: pd.DataFrame = df['Количество'].apply(get_count)
int(df_count.sum())

6

In [19]:
55%10

5