In [2]:
import xlwings as xw
import pandas as pd
from src.parsing import Splitter

In [6]:
sheet_taxation_list = xw.sheets['Ведомость']
taxation_list_df = sheet_taxation_list.range('A1').expand().options(pd.DataFrame, header=1).value
taxation_list_df = taxation_list_df[['Номер точки', 'Наименование', 'Количество', 'Высота', 'Толщина', 'Состояние', 'Кустарник']]

taxation_list_df

Unnamed: 0_level_0,Номер точки,Наименование,Количество,Высота,Толщина,Состояние,Кустарник
Индекс,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1,береза,14 м2,1,0.1,Хорошее,0.0
1,2,ольха,14 м2,2.5,0.08,Хорошее,0.0
2,34,Ольха серая,2,50.6,"0.04,0.7",Хорошее,0.0
3,5,береза,6 стволов,444444,555555,Хорошее,0.0
4,6,Ива ломкая,54 ствола,"5х14, 4х30, 3х8,2,1","7х3,4х11, 2х40",Хорошее,0.0
5,7,береза,1,2.5,0.1,Хорошее,0.0
6,8,береза,72 м2,1.5,0.2,Хорошее,0.0
7,9,ольха,72 м2,2,0.06,Хорошее,0.0
8,10,Яблоня домашняя,2 ствола,1.5х2,0.04х2,Хорошее,0.0
9,11,Яблоня домашняя,4,1.5х4,0.04х4,Хорошее,0.0


In [8]:
from src.parsing import Templates, Parser
import re


def orm_from_taxation_list_item(series: pd.Series) -> list[dict]:
    match_trunk = re.search(Templates.TRUNKS, series['Количество'])
    match_contour = re.search(Templates.CONTOUR, series['Количество'])
    match_line = re.search(Templates.LINE, series['Количество'])
    if not match_contour and not match_line and not match_trunk:
        split_numbers = Splitter.number(series['Номер точки'])
        split_height = Splitter.size(series['Высота'])
        split_diameter = Splitter.size(series['Толщина'])
        split_quality = Splitter.quality(series['Состояние'])
        is_stump = Parser.identification_stump(series['Высота'], series['Толщина'], bool(series['Кустарник']))
        if len(split_numbers) == 1:
            if (not is_stump or "пень" in series['Наименование'].lower()) and len(split_quality) == 1:
                return [series.to_dict()]
        else:
            if (not is_stump or "пень" in series['Наименование'].lower()) and len(split_quality) == 1:
                return [series.to_dict()]
            if len(split_height) == 1:
                split_height = split_height * int(series['Количество'])
            if len(split_diameter) == 1:
                split_diameter = split_diameter * int(series['Количество'])
            if len(split_quality) == 1:
                split_quality = split_quality * int(series['Количество'])
            series_data = []
            for idx in range(len(split_numbers)):
                if "пень" not in series['Наименование'].lower():
                    is_stump = Parser.identification_stump(split_height[idx], split_diameter[idx], bool(series['Кустарник']))
                    name = series['Наименование']+" (пень)" if is_stump else series['Наименование']
                else:
                    name = series['Наименование']
                series_data.append({
                    'Номер точки': split_numbers[idx],
                    'Наименование': name,
                    'Количество': 1,
                    'Высота': split_height[idx],
                    'Толщина': split_diameter[idx],
                    'Состояние': split_quality[idx],
                    'Кустарник': series['Кустарник']
                })
            return series_data
    else:
        return [series.to_dict()]

taxation_list_orm = []
for _, series in taxation_list_df.iterrows():
    # if series.to_dict()['Номер точки'] == '3,4':
    taxation_list_orm.extend(orm_from_taxation_list_item(series))

taxation_list_orm_df = pd.DataFrame(taxation_list_orm)

taxation_list_orm_df

Unnamed: 0,Номер точки,Наименование,Количество,Высота,Толщина,Состояние,Кустарник
0,1,береза,14 м2,1,0.1,Хорошее,0.0
1,2,ольха,14 м2,2.5,0.08,Хорошее,0.0
2,3,Ольха серая,1,5,0.04,Хорошее,0.0
3,4,Ольха серая (пень),1,0.6,0.7,Хорошее,0.0
4,5,береза,6 стволов,444444,555555,Хорошее,0.0
5,6,Ива ломкая,54 ствола,"5х14, 4х30, 3х8,2,1","7х3,4х11, 2х40",Хорошее,0.0
6,7,береза,1,2.5,0.1,Хорошее,0.0
7,8,береза,72 м2,1.5,0.2,Хорошее,0.0
8,9,ольха,72 м2,2,0.06,Хорошее,0.0
9,10,Яблоня домашняя,2 ствола,1.5х2,0.04х2,Хорошее,0.0


In [9]:
from shapely.wkt import loads

sheet_autocad = xw.sheets['Автокад']
autocad_df = sheet_autocad.range('A1').expand().options(pd.DataFrame, header=1, index=False).value
autocad_df['number_position'] = autocad_df['number_position'].apply(lambda x: loads(x))
autocad_df['geometry'] = autocad_df['geometry'].apply(lambda x: loads(x))

autocad_df

Unnamed: 0,index,origin_number,number_position,split_number,type,geometry,size
0,0,3,POINT (3108.838205298518 1707.285109871409),3,Point,POINT (3108.838205298518 1707.285109871409),
1,1,4,POINT (3114.150988844972 1707.4695572873902),4,Point,POINT (3114.150988844972 1707.4695572873902),
2,2,5,POINT (3115.690642463274 1707.5867820573599),5,Point,POINT (3115.690642463274 1707.5867820573599),
3,3,6,POINT (3115.1004453425708 1706.18062074892),6,Point,POINT (3115.1004453425708 1706.18062074892),
4,4,7,POINT (3114.699609216242 1705.0905155704238),7,Point,POINT (3114.699609216242 1705.0905155704238),
5,5,10,POINT (3104.043722283615 1698.543651078497),10,Point,POINT (3104.043722283615 1698.543651078497),
6,6,11,POINT (3105.090899215862 1695.9481208535972),11,Point,POINT (3105.090899215862 1695.9481208535972),
7,7,12,POINT (3106.332887793542 1693.7013226121771),12,Point,POINT (3106.332887793542 1693.7013226121771),
8,8,14,POINT (3104.361115387819 1692.888423763281),14,Point,POINT (3104.361115387819 1692.888423763281),
9,9,13,POINT (3107.4888707400646 1689.420932022136),13,Point,POINT (3107.4888707400646 1689.420932022136),


In [33]:
def get_shapes_from_autocad_df(df: pd.DataFrame, number: str) -> dict:
    number_positions, geometries = [], []
    split_numbers = Splitter.number(number)
    df = df.set_index('split_number')
    for split_number in split_numbers:
        shapes = df.loc[split_number][['number_position', 'geometry']].to_dict()
        number_positions.append(shapes['number_position'])
        geometries.append(shapes['geometry'])
    return {'number_positions': number_positions, 'geometries': geometries}

assert autocad_df['split_number'].is_unique

taxation_list_orm_df[['number_positions', 'geometries']] = taxation_list_orm_df['Номер точки'].apply(lambda x: pd.Series(get_shapes_from_autocad_df(autocad_df, x)))

taxation_list_orm_df

Unnamed: 0_level_0,Номер точки,Наименование,Количество,Высота,Толщина,Состояние,Кустарник,number_positions,geometries
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,береза,14 м2,1,0.1,Хорошее,0.0,[POINT (3075.965582309878 1696.6051757815592)],[POLYGON ((3075.045526933311 1693.259229309259...
1,2,ольха,14 м2,2.5,0.08,Хорошее,0.0,[POINT (3074.823165523758 1693.9006874227402)],[POLYGON ((3075.045526933311 1693.259229309259...
2,3,Ольха серая,1,5,0.04,Хорошее,0.0,[POINT (3108.838205298518 1707.285109871409)],[POINT (3108.838205298518 1707.285109871409)]
3,4,Ольха серая (пень),1,0.6,0.7,Хорошее,0.0,[POINT (3114.150988844972 1707.4695572873902)],[POINT (3114.150988844972 1707.4695572873902)]
4,5,береза,6 стволов,444444,555555,Хорошее,0.0,[POINT (3115.690642463274 1707.5867820573599)],[POINT (3115.690642463274 1707.5867820573599)]
5,6,Ива ломкая,54 ствола,"5х14, 4х30, 3х8,2,1","7х3,4х11, 2х40",Хорошее,0.0,[POINT (3115.1004453425708 1706.18062074892)],[POINT (3115.1004453425708 1706.18062074892)]
6,7,береза,1,2.5,0.1,Хорошее,0.0,[POINT (3114.699609216242 1705.0905155704238)],[POINT (3114.699609216242 1705.0905155704238)]
7,8,береза,72 м2,1.5,0.2,Хорошее,0.0,[POINT (3106.665839634858 1703.063361392738)],[POLYGON ((3106.916523995033 1690.249262830753...
8,9,ольха,72 м2,2,0.06,Хорошее,0.0,[POINT (3104.129760581689 1690.6193231862521)],[POLYGON ((3106.916523995033 1690.249262830753...
9,10,Яблоня домашняя,2 ствола,1.5х2,0.04х2,Хорошее,0.0,[POINT (3104.043722283615 1698.543651078497)],[POINT (3104.043722283615 1698.543651078497)]


In [34]:
taxation_list_orm_df.index.name = 'index'

taxation_list_orm_df.index = taxation_list_orm_df.index.astype(str)
taxation_list_orm_df['number_positions'] = taxation_list_orm_df['number_positions'].apply(lambda geom: [g.wkt for g in geom])
taxation_list_orm_df['geometries'] = taxation_list_orm_df['geometries'].apply(lambda geom: [g.wkt for g in geom])

taxation_list_orm_df

Unnamed: 0_level_0,Номер точки,Наименование,Количество,Высота,Толщина,Состояние,Кустарник,number_positions,geometries
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,береза,14 м2,1,0.1,Хорошее,0.0,[POINT (3075.965582309878 1696.6051757815592)],[POLYGON ((3075.045526933311 1693.259229309259...
1,2,ольха,14 м2,2.5,0.08,Хорошее,0.0,[POINT (3074.823165523758 1693.9006874227402)],[POLYGON ((3075.045526933311 1693.259229309259...
2,3,Ольха серая,1,5,0.04,Хорошее,0.0,[POINT (3108.838205298518 1707.285109871409)],[POINT (3108.838205298518 1707.285109871409)]
3,4,Ольха серая (пень),1,0.6,0.7,Хорошее,0.0,[POINT (3114.150988844972 1707.4695572873902)],[POINT (3114.150988844972 1707.4695572873902)]
4,5,береза,6 стволов,444444,555555,Хорошее,0.0,[POINT (3115.690642463274 1707.5867820573599)],[POINT (3115.690642463274 1707.5867820573599)]
5,6,Ива ломкая,54 ствола,"5х14, 4х30, 3х8,2,1","7х3,4х11, 2х40",Хорошее,0.0,[POINT (3115.1004453425708 1706.18062074892)],[POINT (3115.1004453425708 1706.18062074892)]
6,7,береза,1,2.5,0.1,Хорошее,0.0,[POINT (3114.699609216242 1705.0905155704238)],[POINT (3114.699609216242 1705.0905155704238)]
7,8,береза,72 м2,1.5,0.2,Хорошее,0.0,[POINT (3106.665839634858 1703.063361392738)],[POLYGON ((3106.916523995033 1690.249262830753...
8,9,ольха,72 м2,2,0.06,Хорошее,0.0,[POINT (3104.129760581689 1690.6193231862521)],[POLYGON ((3106.916523995033 1690.249262830753...
9,10,Яблоня домашняя,2 ствола,1.5х2,0.04х2,Хорошее,0.0,[POINT (3104.043722283615 1698.543651078497)],[POINT (3104.043722283615 1698.543651078497)]
