In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from pyproj import CRS
from pyproj import Transformer
from osgeo import osr
from scipy.spatial import distance
import ast
import operator
from geopy.distance import geodesic
import winsound
import os
import geopandas as gpd
from shapely import wkt
import operator
pd.set_option('display.max_columns', 500)


def drop_duplicate_col_sec_iter(df):
    cols_list = ["FFF",'first_conn','first_isla','last_islan','last_conne','df_was_','flip_df','flip_pip_1',
     'tail_y_tra','closest_se','y_round','estpop2','mifal_y','good_col','tail_coord','measef_x',
     'good','tail_x_tra','mifal_x','x_for_tran','y_transfor','x_round','mifal','flip_df',
     'measef','dye','measef_y','new_coords','x_transfor','distance_f','sum_pop','y_for_tran','2','1','0',"manual_decision"]

    for col in cols_list:
        try:
            df = df.drop(columns=[col])
        except:
            pass
    return(df)

def import_pipes(source):
    e = nx.read_shp(source, simplify=True, geom_attrs=True, strict=False) # read shp file

    pipes = pd.DataFrame(e.edges.data())
    j_norma = pd.json_normalize(pipes[2])
    pipes = pd.concat([pipes, j_norma], axis=1)
    pipes = drop_duplicate_col_sec_iter(pipes)
    
    pipes["manual_decision"] = np.nan
    # החלפה של הקורדינטות בשמות
    pipes["mifal"] = pipes[0].astype(str)
    pipes["measef"] = pipes[1].astype(str)

    return (pipes)


def Transformer_coordinates(manholes_shp,EPSG):
    mifal_measef_dict = {0: "new_coords" , 1 :"tail_coords"}
    crs = CRS.from_epsg(4326)
    crs.to_epsg()
    crs = CRS.from_proj4(
        "+proj=tmerc +lat_0=31.7343936111111 +lon_0=35.2045169444445 +k=1.0000067 +x_0=219529.584 +y_0=626907.39 +ellps=GRS80 +towgs84=-24.002400,-17.103200,-17.844400,-0.33007,-1.852690,1.669690,5.424800 +units=m +no_defs")
    transformer = Transformer.from_crs(EPSG, "EPSG:4326")

    for k,v in mifal_measef_dict.items():
        manholes_shp["x_for_transform"] = manholes_shp[k].str[0]
        manholes_shp["y_for_transform"] = manholes_shp[k].str[1]
        if v == "new_coords" :
            manholes_shp["new_coords"] = list(zip(manholes_shp["x_for_transform"], manholes_shp["y_for_transform"]))
        else:
            manholes_shp["tail_coords"] = list(zip(manholes_shp["x_for_transform"], manholes_shp["y_for_transform"]))

        for ind in list(manholes_shp.index):
            x = manholes_shp.at[ind, "x_for_transform"]
            y = manholes_shp.at[ind, "y_for_transform"]
            transfrom_coord = transformer.transform(x, y)
            manholes_shp.at[ind, v] = transfrom_coord
    manholes_shp["mifal"] = manholes_shp["new_coords"].astype(str)
    manholes_shp["measef"] = manholes_shp["tail_coords"].astype(str)
    return(manholes_shp)

def island_connect(pipes):
    for ind in list(pipes.index):
        is_island_list = []
        measef = pipes.at[ind, "measef"]
        all_nodes_above_list = list(nx.node_connected_component(G, measef))
        all_nodes_above_list.append(measef)
        is_island = pipes.loc[pipes["mifal"] == measef][["mifal", "measef", "name"]].shape[ 0]  # אם לא קיים מפעל שהוא המאסף אז זה אי
        if is_island == 0:
            point = pipes.at[ind, "tail_coords"]
            mifal = pipes.at[ind, "mifal"]  # לקחתי את נקודת הסיום של המפעל
            all_nodes_above_list.append(mifal)
            all_nodes_above_list.append(point)
            optional_nodes_df = pipes[~pipes.measef.isin(all_nodes_above_list)].copy()
            c_p = closest_node(point, list(optional_nodes_df["new_coords"].values))  # הנקודה הכי קרובהלקצה הקו
            pipes.at[ind, "closest"] = str(c_p)  # הוספת הנקודה הכי קרובה

    # החלפת ההמאסף הכי קרוב במאסף הישן

    for ind in pipes.loc[pipes["closest"].notnull()].index:
        #     print(pipes.at[ind,"measef"],"to:",pipes.at[ind,"closest"])
        pipes.at[ind, "measef"] = pipes.at[ind, "closest"]
    #     print(pipes.at[ind,"name"],pipes.loc[pipes["mifal"]==pipes.at[ind,"closest"]]["name"])
    return (pipes)


def check_duplicated_m_h(pipes):
    duplicated_m_h = pd.concat(g for _, g in pipes.groupby("mifal") if len(g) > 1)  # מציאת שוחות כפולות.הפוכות
    duplicated_list = list(duplicated_m_h.mifal.value_counts().index)  # רשימת שוחות הפוכות
    return (duplicated_list)


def check_if_is_up_conect(G, mifal):
    list(nx.node_connected_component(G, mifal))
    if len(list(nx.node_connected_component(G, mifal))) > 0:
        the_len_conect = 1
    else:
        the_len_conect = 0
    return (the_len_conect)


def do_flip(pipes, ind, mifal, measef):
    pipes.at[ind, "mifal"] = measef
    pipes.at[ind, "measef"] = mifal
    #     print(pipes.at[ind,"name"])
    return (pipes)


def closest_node(node, nodes):
    closest_index = distance.cdist([node], nodes).argmin()
    return nodes[closest_index]


def find_espg(prj_file):
    prj_filef = open(prj_file, 'r')
    prj_txt = prj_filef.read()
    prj_filef.close()
    srs = osr.SpatialReference()
    srs.ImportFromESRI([prj_txt])
    srs.AutoIdentifyEPSG()
    code = srs.GetAuthorityCode(None)
    code = "EPSG:" + code
    return (code)


def make_wkt(pipes):
    pipes[['mifal_y', 'mifal_x']] = pipes['mifal'].str.split(',', 1, expand=True)
    pipes[['measef_y', 'measef_x']] = pipes['measef'].str.split(',', 1, expand=True)
    pipes["mifal_y"] = pipes["mifal_y"].str[1:]
    pipes["measef_y"] = pipes["measef_y"].str[1:]
    pipes["mifal_x"] = pipes["mifal_x"].str[:-1]
    pipes["measef_x"] = pipes["measef_x"].str[:-1]
    pipes[['mifal_y', 'mifal_x', 'measef_y', 'measef_x']]

    pipes["WKT_matan"] = "LINESTRING (" + pipes["mifal_x"] + " " + pipes["mifal_y"] + "," + pipes["measef_x"] + " " + \
                         pipes["measef_y"] + " )"
    return (pipes)


def make_wkt_2(pipes):
    pipes[['mifal_y', 'mifal_x']] = pipes['mifal'].str.split(',', 1, expand=True)
    pipes[['measef_y', 'measef_x']] = pipes['measef'].str.split(',', 1, expand=True)
    pipes["mifal_y"] = pipes["mifal_y"].str[1:]
    pipes["measef_y"] = pipes["measef_y"].str[1:]
    pipes["mifal_x"] = pipes["mifal_x"].str[:-1]
    pipes["measef_x"] = pipes["measef_x"].str[:-1]
    pipes[['mifal_y', 'mifal_x', 'measef_y', 'measef_x']]

    pipes["WKT_matan"] = "LINESTRING (" + pipes["mifal_y"] + " " + pipes["mifal_x"] + "," + pipes["measef_y"] + " " + \
                         pipes["measef_x"] + " )"
    return (pipes)


def component_list_update(pipes, last_points):
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    all_nodes_above_list = []
    for pump_stations in last_points:
        all_nodes_above_list.extend(list(nx.node_connected_component(G, pump_stations)))

    optional_nodes_to_connect = pipes[pipes["mifal"].isin(all_nodes_above_list)]["new_coords"].tolist()
    return (optional_nodes_to_connect, G)


def delet_keys_in_islands(sub_graphs, last_points):
    islands_dict = {}
    for i, sg in enumerate(sub_graphs):
        islands_dict[i] = sg
    key_list_del = []
    for key in islands_dict.keys():
        if any(elem in last_points for elem in islands_dict[key]):
            key_list_del.append(key)
    for bad_key in key_list_del:
        del islands_dict[bad_key]
        return (islands_dict)


def do_conect(pipes, len_dis):
    if "closest" in list(pipes.columns):
        df = pd.DataFrame(pipes["closest"].value_counts().reset_index())
        good_list = df[df["closest"] < 5]["index"].tolist()
        all_values_dict = {}
        for close_site in good_list:
            temp_df = pipes[pipes["closest"] == close_site].copy()
            the_most_close_dict = {}
            for ind in list(temp_df.index):
                dist = geodesic(eval(temp_df.at[ind, "measef"]), eval(close_site)).meters
                the_most_close_dict[ind] = dist
            sorted_most_c = sorted(the_most_close_dict.items(), key=operator.itemgetter(1))[0]
            all_values_dict[sorted_most_c[0]] = sorted_most_c[1]

        for k, v in list(all_values_dict.items()):
            if v > len_dis:
                del all_values_dict[k]
        for k_ind in all_values_dict.keys():
            pipes.at[k_ind, "measef"] = pipes.at[k_ind, "closest"]
            pipes.at[k_ind, "good"] = "good"
        pipes.drop('closest', axis=1, inplace=True)
    else:
        print("no closest pipes to conect")
    return (pipes)


def get_islands(pipes, last_points):
    print("get_islands")
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    UG = G.to_undirected()
    sub_graphs = nx.connected_components(UG)
    islands_dict = delet_keys_in_islands(sub_graphs, last_points) #Deletes an island from the dict if it is connected to WWTP
    all_measefim_list = set(list(pipes["measef"]))
    all_mifalim_list = set(list(pipes["mifal"]))
    potential_points_to_connect = list(all_measefim_list - all_mifalim_list)
    return (islands_dict, potential_points_to_connect)


def find_sec_closest(potential_points_to_connect, pipes, islands_dict):
    print("find_sec_closest")
    loop_time = 0
    len_loop = len(potential_points_to_connect)
    for measef in potential_points_to_connect:
        print("len_loop :", len_loop, "ind = ", loop_time)
        loop_time = loop_time + 1
        no_suspect_key = np.nan
        for k, v in islands_dict.items():
            if measef in v:
                suspect_val_list = list(pipes[~pipes["mifal"].isin(list(islands_dict[k]))]["mifal"])
                suspect_val_listeval = []
                for suspect_val in suspect_val_list:
                    suspect_val_listeval.append(eval(suspect_val))

                c_p = closest_node(eval(measef), suspect_val_listeval)
                ind_list = list(pipes[pipes["measef"] == measef].index)
                for ind in ind_list:
                    pipes.at[ind, "closest_sec"] = str(c_p)
                break
    return (pipes)


def connect_lines(pipes, islands_dict, potential_points_to_connect, G, len_dis):
    print("find_sec_closest")
    loop_time = 0
    len_loop = len(islands_dict)
    for k, v in islands_dict.items():
        print("len_loop :", len_loop, "ind = ", k)
        island_list = list(nx.node_connected_component(G, list(v)[0]))
        option_to_connect = list(
            pipes[~pipes.mifal.isin(island_list)]["mifal"])  # Create a list of all the lines that are not in island K
        suspect_val_list = list(set(v) & set(
            potential_points_to_connect))  # Creating a list that unites the potential lines with the lines that are on the island and leaves a list of suspects

        option_to_connect_listeval = []  # Changes the points type to calculate distances
        for suspect_val in option_to_connect:
            option_to_connect_listeval.append(eval(suspect_val))

        for measef in suspect_val_list:   # Calculation of the closest point to our point among all potential points
            c_p = closest_node(eval(measef),
                               option_to_connect_listeval)
            ind_list = list(pipes[pipes["measef"] == measef].index)
            for ind in ind_list:
                pipes.at[ind, "closest_sec"] = str(c_p) # Adding to each potential connection in island points the closest point to it
        pipes = do_conect_for_one_island(pipes, len_dis)
    return (pipes)


def do_conect_for_one_island(pipes, len_dis):
    if "closest_sec" in list(pipes.columns):
        df = pd.DataFrame(
            pipes["closest_sec"].value_counts().reset_index())  # מספור כמה פעמים כל נקודה חדשה שחיברנו אליה מופיעה
        good_list = df["index"].tolist()
        all_values_dict = {}
        for close_site in good_list:
            temp_df = pipes[pipes["closest_sec"] == close_site].copy()
            the_most_close_dict = {}  # Creating a dictionary that will include only the closest point to our target
            for ind in list(temp_df.index):
                dist = geodesic(eval(temp_df.at[ind, "measef"]), eval(close_site)).meters
                the_most_close_dict[ind] = dist
            sorted_most_c = sorted(the_most_close_dict.items(), key=operator.itemgetter(1))[0]
            all_values_dict[sorted_most_c[0]] = sorted_most_c[1]

        for k, v in list(all_values_dict.items()): # Deleting potential connections if the distance is greater than
            if v > len_dis:
                del all_values_dict[k]

        if len(all_values_dict) > 0:

            sorted_most_c_point_in_island = sorted(all_values_dict.items(), key=operator.itemgetter(1))[
                0]  # מציאת הקו הכי קרוב בכל האי
            measef_in_df = pipes.at[sorted_most_c_point_in_island[0], "measef"]  # בחירת המאסף שצריך להחליף
            pipes.loc[pipes["measef"] == measef_in_df, "good"] = pipes.loc[pipes["measef"] == measef_in_df][
                "good_col"]  # הוספת לצבע בQGIS
            pipes.loc[pipes["measef"] == measef_in_df, "measef"] = pipes.loc[pipes["measef"] == measef_in_df][
                "closest_sec"]  # החלפת המאסף בכל הדאטה
            pipes.drop('closest_sec', axis=1, inplace=True)
        else:
            print("no closest pipes to conect2")
    else:
        print("no closest pipes to conect")
    return (pipes)




def sum_pop_per_unit(pipes):
    pipes = distance_for_end_fun(pipes)
    pipes["estpop2"] = pipes["estPop"]
    pipes["sum_pop"] = 0
    pipes["distance_for_end"] = pipes["distance_for_end"].fillna(-2)
    start_cut = max(list(pipes["distance_for_end"]))
    while start_cut >= 0:
        temp_to_sum_df = pipes[pipes["distance_for_end"] == start_cut].copy()
        for ind in list(temp_to_sum_df.index):
            measef = pipes.at[ind, "mifal"]
            temp_df = pipes[pipes["measef"] == measef].copy()
            size_in_degree = temp_df.shape[0]
            if size_in_degree > 0:
                pop_to_add1 = pipes[pipes["measef"] == measef]["estpop2"].sum()
                pop_to_add2 = pipes[pipes["measef"] == measef]["sum_pop"].sum()
                clean_pop_list = list(pipes[pipes["measef"] == measef].index)
                pipes.loc[pipes.index.isin(clean_pop_list), "estpop2"] = 0
                pipes.at[ind, "sum_pop"] = pop_to_add1 + pop_to_add2
        start_cut = start_cut - 1
        print(start_cut)
    return (pipes)


def distance_for_end_fun(pipes, last_points):
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    df = pipes.copy()
    for ind in list(df.index):
        print(ind)
        taraget = df.at[ind, "mifal"]
        try:
            distance_for_end = nx.shortest_path_length(G, last_points[0], taraget)
            pipes.at[ind, "distance_for_end"] = distance_for_end
        except:
            continue
    return (pipes)


def print_info(pipes, last_points):
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    connect_list = (list(nx.node_connected_component(G, last_points[0])))
    print("len all", pipes.shape[0])
    print("len connect list", len(connect_list))
    print("Percentage of connected lines", round(len(connect_list) * 100. / pipes.shape[0], 4), "%")
    return ()


def sum_pop_per_unit2(pipes, last_points):
    pipes = pipes.sort_values(by="estPop")
    pipes.drop_duplicates(subset=['mifal'], keep='last', inplace=True)
    pipes.reset_index(drop=True, inplace=True)
    pipes = distance_for_end_fun(pipes, last_points)
    pipes["estpop2"] = pipes["estPop"]
    pipes["sum_pop"] = 0
    pipes["distance_for_end"] = pipes["distance_for_end"].fillna(-2)
    start_cut = max(list(pipes["distance_for_end"]))
    while start_cut >= 0:
        temp_to_sum_df = pipes[pipes["distance_for_end"] == start_cut].copy()
        for ind in list(temp_to_sum_df.index):
            measef = pipes.at[ind, "mifal"]
            temp_df = pipes[pipes["measef"] == measef].copy()
            size_in_degree = temp_df.shape[0]
            if size_in_degree > 0:
                clean_duplicate_pipes_df = pipes[pipes["measef"] == measef].copy()
                clean_duplicate_pipes_df.sort_values(by=["estpop2"], inplace=True)
                clean_duplicate_pipes_df.drop_duplicates(subset=['mifal'], keep='last')

                pop_to_add1 = clean_duplicate_pipes_df["estpop2"].sum()
                pop_to_add2 = clean_duplicate_pipes_df["sum_pop"].sum()
                clean_pop_list = list(clean_duplicate_pipes_df.index)
                pipes.loc[pipes.index.isin(clean_pop_list), "estpop2"] = 0
                pipes.at[ind, "sum_pop"] = pop_to_add1 + pop_to_add2
        start_cut = start_cut - 1
        print(start_cut)
    return (pipes)


def keep_connect_network(pipes, last_points,cycle_list):
    print_info(pipes, last_points)
    keep_connect = "y"
    while keep_connect != "n":
        #         print("Percentage of connected lines is :",Percentage_of_connected_lines)
        keep_connect = input("do you want to keep connect?: n\y ")
        if keep_connect == "y":
            new_len = input("Insert maximum radius for connection")
            len_dis_list = [int(new_len)]

            for len_dis in len_dis_list:
                G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
                last_time_duplicated = [0, 0]
                last_points = pipes[pipes["name"] == "WWTP"]["mifal"].tolist()
                pipes = flip_upside_down_line3 (pipes , 0)
                print_info(pipes, last_points)
                islands_dict, potential_points_to_connect = get_islands(pipes, last_points)
                pipes = connect_lines(pipes, islands_dict, potential_points_to_connect, G, len_dis)
    return (pipes)


def sum_pop(pipes, last_points,cycle_list):
    pipes.drop_duplicates(subset=['mifal', 'measef'], keep='last', inplace=True)
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    last_time_duplicated = [0, 0]
    pipes = flip_upside_down_line3 (pipes , 0)
    pipes = sum_pop_per_unit2(pipes, last_points)
    return (pipes)


def make_csv(pipes, last_points):
    file_name = input("Insert a file name")
    file_name = file_name + ".csv"
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    connect_list = (list(nx.node_connected_component(G, last_points[0])))
    len_list_to_paint = pipes.loc[pipes["mifal"].isin(connect_list)].shape[0]
    pipes.loc[pipes["mifal"].isin(connect_list), "dye"] = ["dye"] * len_list_to_paint
    pipes = make_wkt(pipes)
    print(pipes["dye"].value_counts())
    pipes.to_csv(file_name, encoding='utf-8')

def make_sound ():
    duration = 1500 # milliseconds
    freq = 500  # Hz
    winsound.Beep(freq, duration)
    return

def adding_statistics ( pipes,last_points):
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    connect_list = (list(nx.node_connected_component(G, last_points[0])))
    return(round(len(connect_list)/pipes.shape[0],2))

def do_statistics(statistics_df,pipes,islands_list,last_points):
    statistics_df["first_islands_number"] = islands_list[0]
    statistics_df["last_islands_number"] = islands_list[-1]
    statistics_df["last_connected"] = [adding_statistics(pipes,last_points)]
    try:
        statistics_df["pipes_was_connected"] = pipes["good"].value_counts()["good"]
    except:
        print("no pipes was coneccted")
    try:
        statistics_df["flip_pipes_tot"] = pipes["flip_pipes"].value_counts()["pipes_was_fliped"]
    except:
        print("no pipes was flip")
    statistics_df["name"] = "WWTP"
    result  = pd.merge(statistics_df, pipes, how="right", on=["name"])
    return(result )

def find_cycle (pipes,G):
    cycle = nx.find_cycle(G, orientation="original")
    cycle_list = [ ]
    for i in range(len(cycle)):
        cycle_list.extend(cycle[i][:-1])
    cycle_list = list(set(cycle_list))
    try:
        pipes.loc[pipes["mifal"].isin(cycle_list), "cycle"] = 1
    except:
        pass
    if len(cycle_list)>0:
        print ("the GIS got cycle")
        print(cycle_list)
    return(pipes,cycle_list)

def make_shp_file (pipes):
    shp_name = input("Insert a file name")
    shp_name = shp_name + ".shp"
    
    pipes.columns = pipes.columns.astype(str)
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    last_points = pipes[pipes["name"]=="WWTP"]["mifal"].values[0]
    connect_list = (list(nx.node_connected_component(G, last_points)))
    len_list_to_paint = pipes.loc[pipes["mifal"].isin(connect_list)].shape[0]
    pipes.loc[pipes["mifal"].isin(connect_list), "dye"] = ["dye"] * len_list_to_paint
    geo_df = make_wkt (pipes)
    geo_df['WKT_matan'] = geo_df['WKT_matan'].apply(wkt.loads)
    line_gdf = gpd.GeoDataFrame(geo_df, geometry='WKT_matan')
    gdf = gpd.GeoDataFrame(line_gdf, geometry='WKT_matan',crs="epsg:4326")
#     gdf["flip_pipes"] = gdf["flip_pipes"].replace({"pipes_was_fliped": 1})
#     gdf["flip_pipes"] =gdf["flip_pipes"].fillna(0)
#     gdf["flip_pipes"] =gdf["flip_pipes"].astype(str)
    skip_col = ["good","WKT_matan","dye","flip_pipes"]
    for col in geo_df.columns:
        if col in skip_col:
            pass
        else:
            try:
                geo_df[col]= geo_df[col].astype(float)
            except:
                geo_df[col]= geo_df[col].astype(str)

    print("""gdf["flip_pipes"]""",gdf["flip_pipes"].value_counts())
    gdf.to_file(shp_name)
    return(pipes,gdf)


def flip_upside_down_line3 (pipes,time_flip ):

    last_points = pipes[pipes["name"]=="WWTP"]["mifal"].values[0]
    pipes["temp_mifal"] = pipes["mifal"]
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True) #Create a graph
    cycle_list = fine_cycle_2(pipes)

    all_nodes_above_list = list(nx.node_connected_component(G, last_points))
    df_for_pump_stations = pipes[pipes["mifal"].isin(all_nodes_above_list)].copy()

    duplicate_df = pd.DataFrame(df_for_pump_stations["mifal"].value_counts())
    duplicate_list = duplicate_df[duplicate_df["mifal"]>1].index.to_list()
    duplicate_list_no_cycle = set(duplicate_list)-set(cycle_list)
    duplicate_list_no_cycle = list(duplicate_list_no_cycle)
    print("len duplicate_list - ",len(duplicate_list_no_cycle))
    if len(duplicate_list_no_cycle)>0:
        for duplicate_mifal in duplicate_list_no_cycle:
            try:
                ind_list_duplicate = pipes[pipes["mifal"]==duplicate_mifal].index.to_list()
                suspect_short_len_dict = {}
                for ind_duplicate in ind_list_duplicate:
    #                     print(ind_duplicate)
                    measef = pipes.at[ind_duplicate,"measef"]
                    suspect_len = nx.shortest_path_length(G, last_points, measef)
                    suspect_short_len_dict[ind_duplicate] = suspect_len

                if len(suspect_short_len_dict)>1:
                    sorted_suspect_short_len_dict = dict(sorted(suspect_short_len_dict.items(), key=operator.itemgetter(0),reverse=False))
                    sorted_suspect_short_len_dict.popitem()
                    list_to_flip = list(sorted_suspect_short_len_dict.keys())

                    for ind_to_flip in list_to_flip:
                        measef_to_flip = pipes.at[ind_to_flip,"measef"]
                        mifal_to_flip  = pipes.at[ind_to_flip,"mifal"]
                        pipes = do_flip(pipes, ind_to_flip, mifal_to_flip, measef_to_flip)
                        pipes.at[ind_to_flip, "flip_pipes"] = 1
                        measef_to_flip = pipes.at[ind_to_flip,"measef"]
                        mifal_to_flip  = pipes.at[ind_to_flip,"mifal"]


            except Exception as e:
                print("no statistics for :", e)
                pass
    df_for_pump_stations = pipes[pipes["mifal"].isin(all_nodes_above_list)].copy()
    duplicate_df = pd.DataFrame(df_for_pump_stations["mifal"].value_counts())
    if (duplicate_df.shape[0]>0) & (time_flip<5):
        print(time_flip)
        time_flip = time_flip+1
        flip_upside_down_line3 (pipes,time_flip )
    return(pipes)

def fine_cycle_2 (pipes):
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    cycle_list = [ ]
    cycles = nx.cycle_basis(G)

    for ind_cyc in range(len(cycles)):
        cycle_list.extend(cycles[ind_cyc])
    return(cycle_list)

def change_coord_numb (mifal_to_move):
    coord_int = int(mifal_to_move[-3])
    coord_int = coord_int+1
    if coord_int==10:
        coord_int=1
    new_mifal = mifal_to_move[:-3] + str(coord_int) +mifal_to_move[-2:]
    return (new_mifal)
def split_dup_cycle (pipes):
    cycle_list = fine_cycle_2 (pipes)
    temp_duplicated_df = pd.DataFrame(pipes["mifal"].value_counts())
    duplicated_list = temp_duplicated_df[temp_duplicated_df["mifal"]>1].index.to_list()
    duplicated_in_cycle = list(set(duplicated_list)&set(cycle_list))
    for count, dup_ind_cyc in enumerate(duplicated_in_cycle):
        ind_mifal_to_move = pipes[pipes["mifal"]==dup_ind_cyc].index[0]
        mifal_to_move = pipes.at[ind_mifal_to_move,"mifal"]
        new_mifal = change_coord_numb (mifal_to_move)
        cout_loop_coord_change = 0
        while (cout_loop_coord_change<9) & (pipes[pipes["mifal"]==new_mifal].shape[0]>0): 
            mifal_to_move = change_coord_numb(mifal_to_move)
        pipes.at[ind_mifal_to_move,"mifal"] = new_mifal
        pipes.at[ind_mifal_to_move,"move_place"] = 1
    return(pipes)


def manual_decision (pipes):
    manual_decision_df = pd.DataFrame(pipes["mifal"].value_counts())
    list_manual_decision = manual_decision_df[manual_decision_df["mifal"]>1].index.tolist()
    pipes.loc[pipes["mifal"].isin(list_manual_decision),"manual_decision"] = 1
    return(pipes)

def fix_gis (pipes,EPSG,EPSG_number):
    if EPSG_number != 4326:
        pipes = Transformer_coordinates(pipes,EPSG) #convert mifal & measef coordinates to epsg(4326)
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True) #Create a graph
    last_points = pipes[pipes["name"]=="WWTP"]["mifal"].tolist() #Create last points pumping stations and WWTP
    pipes = split_dup_cycle (pipes)
    print("last_points : ", last_points)
    pipes["good_col"] = "good" #Create a GIS color column at the end of the process
    len_dis_list = [10,20,30,100,10] #List of radii for searching distances

    statistics_df = pd.DataFrame()  #Creating a data framework for statistics
    statistics_df["first_connected"] = [adding_statistics(pipes, last_points)]
    islands_list = [] #A list of islands to which the number of islands in each iteration will enter for statistics
    try:
        pipes,cycle_list = find_cycle(pipes,G)
    except Exception as e:
        print("ERROR")
        print(e)
        cycle_list = []
        pass
    for len_dis in len_dis_list:
        print(len_dis)
        G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
        last_time_duplicated = [0,0] #Reset stop conditions to reverse the direction of the lines
        pipes = flip_upside_down_line3 (pipes , 0)  # reverse the direction of lines reset
        print_info (pipes , last_points) #Printing of statistical information
        islands_dict , potential_points_to_connect = get_islands (pipes,last_points)#Creating a dictionary of islands, and creating a list of suspects
        islands_list.append(len(islands_dict)) #Adding a number of islands for statistics
        pipes = connect_lines (pipes , islands_dict , potential_points_to_connect,G,len_dis) #Connecting islands
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
    last_time_duplicated = [0,0]
    pipes = flip_upside_down_line3 (pipes , 0)
    print_info (pipes , last_points)
    make_sound()  # Alarm at the end of the first part
    pipes = keep_connect_network (pipes,last_points,cycle_list)  # Choose a name for the output file, choose whether to continue to connect islands or continue
    
    ### sum_pop
    sum_pop_a_decision = input("do you want to sum pop? y/n")
    if sum_pop_a_decision == "y" :
        print("estPop :", pipes["estPop"].sum())
        pipes = sum_pop(pipes, last_points,cycle_list)
    else:
        print("no sum pop" )
    try:
        pipes = do_statistics(statistics_df, pipes, islands_list, last_points)  # Adding statistics on the WWTP line
    except Exception as e:
        print("no statistics for :", e)
    
    pipes = manual_decision (pipes)
    
    ###  sum_pop
#     make_csv(pipes, file_name, last_points)  # Create a CSV file
    pipes,gdf = make_shp_file (pipes)
    make_sound()  # An alarm that announces the end of the run
    return (pipes)



In [2]:
print("Insert the path to the file:")
source = input()
print("insert EPSG")
EPSG_number = input()
EPSG = "EPSG:" + str(EPSG_number)
print(EPSG)

try:
    pipes = import_pipes(source)
    pipes = fix_gis (pipes,EPSG,EPSG_number)
except Exception as e:
    print("ERROR")
    print(e)
    os.system("pause")


Insert the path to the file:
C:\Users\Matan\Downloads\cici_78_t.shp
insert EPSG
3735
EPSG:3735
last_points :  ['(39.199961037335044, -84.585489991532)']
the GIS got cycle
['(39.18651832540301, -84.58974822345844)', '(39.18740415001342, -84.5895835836301)', '(39.18759860810062, -84.58956598566073)', '(39.18684874111302, -84.58971486769367)', '(39.18665442042236, -84.5896531037116)', '(39.18657727140502, -84.58979673580039)', '(39.187005978157885, -84.58970052819089)', '(39.187934251476655, -84.58953513323247)', '(39.18760376281904, -84.58964348043423)', '(39.187234341237016, -84.58959895055406)', '(39.187576697909265, -84.58956796811226)', '(39.18655583992248, -84.58966234624084)', '(39.187894067011584, -84.58961577466565)', '(39.1865047294565, -84.58966709918961)', '(39.18786295283261, -84.58954206245815)', '(39.18704052825617, -84.58961689981976)', '(39.187239569995214, -84.58967823596008)', '(39.1871500622489, -84.58960662943137)', '(39.1874088406238, -84.58966208211241)', '(39.18679

In [None]:
# C:\Users\Matan\Downloads\cyclr_22.shp
2277

In [6]:
pipes[pipes["manual_decision"]==1]

Unnamed: 0,first_connected,first_islands_number,last_islands_number,last_connected,pipes_was_connected,name,0,1,2,SEG_ID,SEG_TYPE,SIZE_,fid,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,gID,GEO_ID,NAME_2,thePop,NUMPOINTS,avgPop,totalPop,estPop,ShpName,Wkb,Wkt,Json,mifal,measef,x_for_transform,y_for_transform,new_coords,tail_coords,move_place,good_col,cycle,temp_mifal,flip_pipes,good,closest_sec,manual_decision,dye,mifal_y,mifal_x,measef_y,measef_x,WKT_matan
829,,,,,,,"(1373789.2497639358, 438723.8123689592)","(1373795.7185830176, 438732.18472753465)","{'SEG_ID': '23514012-23406013', 'SEG_TYPE': 'F...",23514012-23406013,Force Main,6,2315,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xf6]?...,"LINESTRING (1373789.24976394 438723.812368959,...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.18562285185364, -84.5983090547059)","(39.18559945788416, -84.59833118424386)",1373796.0,438732.184728,"(39.18559945788416, -84.59833118424386)","(39.18562285185364, -84.5983090547059)",,good,,"(39.18559945788416, -84.59833118424386)",1.0,,,1.0,dye,39.18562285185364,-84.5983090547059,39.18559945788416,-84.59833118424386,LINESTRING ( -84.5983090547059 39.185622851853...
830,,,,,,,"(1373795.7185830176, 438732.18472753465)","(1373843.4799704254, 438794.0012049526)","{'SEG_ID': '23514012-23406013', 'SEG_TYPE': 'F...",23514012-23406013,Force Main,6,2316,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xf6c\...,"LINESTRING (1373795.71858302 438732.184727535,...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.18562285185364, -84.5983090547059)","(39.18579557873783, -84.59814566475293)",1373843.0,438794.001205,"(39.18562285185364, -84.5983090547059)","(39.18579557873783, -84.59814566475293)",,good,,"(39.18562285185364, -84.5983090547059)",,,,1.0,dye,39.18562285185364,-84.5983090547059,39.18579557873783,-84.59814566475293,LINESTRING ( -84.5983090547059 39.185622851853...
944,,,,,,,"(1370468.7019430995, 440564.23454111814)","(1370476.2307994366, 440567.406778872)","{'SEG_ID': '23408010-23408006', 'SEG_TYPE': 'G...",23408010-23408006,Gravity,8,2202,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xe9d\...,"LINESTRING (1370468.7019431 440564.234541118,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.190438781972034, -84.61019461714612)","(39.190447972444325, -84.61016832226099)",1370476.0,440567.406779,"(39.190438781972034, -84.61019461714612)","(39.190447972444325, -84.61016832226099)",,good,,"(39.190447972444325, -84.61016832226099)",1.0,,,1.0,dye,39.190438781972034,-84.61019461714612,39.190447972444325,-84.61016832226099,LINESTRING ( -84.61019461714612 39.19043878197...
945,,,,,,,"(1370468.7019430995, 440564.23454111814)","(1370332.82918334, 440581.7738761157)","{'SEG_ID': '23408010-23408013', 'SEG_TYPE': 'D...",23408010-23408013,Dry line,8,73,39.0,61.0,20812.0,3004.0,390610208123004.0,Block 3004,39061020812.0,1400000US39061020812,"Census Tract 208.12, Hamilton County, Ohio",4561.0,1926.0,2.368,4561.0,11.84,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xe9d\...,"LINESTRING (1370468.7019431 440564.234541118,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.190438781972034, -84.61019461714612)","(39.19047820722432, -84.61067532530116)",1370333.0,440581.773876,"(39.190438781972034, -84.61019461714612)","(39.19047820722432, -84.61067532530116)",,good,,"(39.190438781972034, -84.61019461714612)",,,,1.0,dye,39.190438781972034,-84.61019461714612,39.19047820722432,-84.61067532530116,LINESTRING ( -84.61019461714612 39.19043878197...
1754,,,,,,,"(1371437.1842581034, 440126.4463746995)","(1371444.9998592585, 440137.9998292923)","{'SEG_ID': '23407036-23407035', 'SEG_TYPE': 'T...",23407036-23407035,Tap,6,2,,,,,,,,,,,,,,0.0,cinci_3735_for_test,"b""\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xed-/...","LINESTRING (1371437.1842581 440126.446374699,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.189299186124224, -84.60674246719762)","(39.189331399717304, -84.60671585071186)",1371445.0,440137.999829,"(39.189299186124224, -84.60674246719762)","(39.189331399717304, -84.60671585071186)",,good,,"(39.189331399717304, -84.60671585071186)",1.0,,,1.0,dye,39.189299186124224,-84.60674246719762,39.1893313997173,-84.60671585071186,LINESTRING ( -84.60674246719762 39.18929918612...
1755,,,,,,,"(1371437.1842581034, 440126.4463746995)","(1371722.049830094, 440104.8499612063)","{'SEG_ID': '23408001-23407013', 'SEG_TYPE': 'F...",23408001-23407013,Force Main,4,2091,39.0,61.0,20812.0,3004.0,390610208123004.0,Block 3004,39061020812.0,1400000US39061020812,"Census Tract 208.12, Hamilton County, Ohio",4561.0,1926.0,2.368,4561.0,7.104,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xed-/...,"LINESTRING (1371437.1842581 440126.446374699,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.189299186124224, -84.60674246719762)","(39.18925814836316, -84.60573590374362)",1371722.0,440104.849961,"(39.189299186124224, -84.60674246719762)","(39.18925814836316, -84.60573590374362)",,good,,"(39.189299186124224, -84.60674246719762)",,,,1.0,dye,39.189299186124224,-84.60674246719762,39.18925814836316,-84.60573590374362,LINESTRING ( -84.60674246719762 39.18929918612...


In [None]:
# C:\Users\Matan\Downloads\cinci_3735_for_test.shp
# 3735

In [18]:
# source = "C:/Users/Matan/Downloads/cyclr_22.shp"
# EPSG = "EPSG:" + str(2277)
# pipes = import_pipes(source)
# pipes = Transformer_coordinates(pipes,EPSG) #convert mifal & measef coordinates to epsg(4326)
# G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True) #Create a graph
# last_points = pipes[pipes["name"]=="WWTP"]["mifal"].tolist() #Create last points pumping stations and WWTP

# # pipes = split_dup_cycle (pipes)

# pipes["good_col"] = "good" #Create a GIS color column at the end of the process

# statistics_df = pd.DataFrame()  #Creating a data framework for statistics
# statistics_df["first_connected"] = [adding_statistics(pipes, last_points)]
# islands_list = [] #A list of islands to which the number of islands in each iteration will enter for statistics
# cycle_list = []
# G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
# last_time_duplicated = [0,0] #Reset stop conditions to reverse the direction of the lines
# # pipes = flip_upside_down_line3 (pipes , 0)  # reverse the direction of lines reset

In [44]:
time_flip = 0
last_points = pipes[pipes["name"]=="WWTP"]["mifal"].values[0]
pipes["temp_mifal"] = pipes["mifal"]
G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True) #Create a graph
cycle_list = fine_cycle_2(pipes)

all_nodes_above_list = list(nx.node_connected_component(G, last_points))
df_for_pump_stations = pipes[pipes["mifal"].isin(all_nodes_above_list)].copy()

duplicate_df = pd.DataFrame(df_for_pump_stations["mifal"].value_counts())
duplicate_list = duplicate_df[duplicate_df["mifal"]>1].index.to_list()
duplicate_list_no_cycle = set(duplicate_list)-set(cycle_list)
duplicate_list_no_cycle = list(duplicate_list_no_cycle)
print("len duplicate_list - ",len(duplicate_list_no_cycle))
if len(duplicate_list_no_cycle)>0:
    for duplicate_mifal in duplicate_list_no_cycle:
        try:
            ind_list_duplicate = pipes[pipes["mifal"]==duplicate_mifal].index.to_list()
            suspect_short_len_dict = {}
            for ind_duplicate in ind_list_duplicate:
#                     print(ind_duplicate)
                measef = pipes.at[ind_duplicate,"measef"]
                suspect_len = nx.shortest_path_length(G, last_points, measef)
                suspect_short_len_dict[ind_duplicate] = suspect_len

            if len(suspect_short_len_dict)>1:
                sorted_suspect_short_len_dict = dict(sorted(suspect_short_len_dict.items(), key=operator.itemgetter(0),reverse=True))
                sorted_suspect_short_len_dict.popitem()
                list_to_flip = list(sorted_suspect_short_len_dict.keys())

                for ind_to_flip in list_to_flip:
                    print("make_flipppppp")
                    measef_to_flip = pipes.at[ind_to_flip,"measef"]
                    mifal_to_flip  = pipes.at[ind_to_flip,"mifal"]
                    pipes = do_flip(pipes, ind_to_flip, mifal_to_flip, measef_to_flip)
                    
                    pipes.at[ind_to_flip, "flip_pipes"] = 1
                    print("flipeddddddd : " , pipes.at[ind_to_flip, "mifal"] )
#                     measef_to_flip = pipes.at[ind_to_flip,"measef"]
#                     mifal_to_flip  = pipes.at[ind_to_flip,"mifal"]


        except Exception as e:
            print("no statistics for :", e)
            pass
duplicate_df = pd.DataFrame(df_for_pump_stations["mifal"].value_counts())
if (duplicate_df.shape[0]>0) & (time_flip<5):
    print(time_flip)
    time_flip = time_flip+1

len duplicate_list -  2
make_flipppppp
flipeddddddd :  (39.189299186124224, -84.60674246719762)
make_flipppppp
flipeddddddd :  (39.18701481303375, -84.59773988513244)
0


In [None]:
flipeddddddd :  (39.18925814836316, -84.60573590374362)
make_flipppppp
flipeddddddd :  (39.1870857597852, -84.59777508568281)

In [None]:
len duplicate_list -  2
make_flipppppp
flipeddddddd :  (39.189299186124224, -84.60674246719762)
make_flipppppp
flipeddddddd :  (39.18701481303375, -84.59773988513244)

In [5]:
pipes[pipes["manual_decision"]==1]

Unnamed: 0,first_connected,first_islands_number,last_islands_number,last_connected,pipes_was_connected,name,0,1,2,SEG_ID,SEG_TYPE,SIZE_,fid,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,gID,GEO_ID,NAME_2,thePop,NUMPOINTS,avgPop,totalPop,estPop,ShpName,Wkb,Wkt,Json,mifal,measef,x_for_transform,y_for_transform,new_coords,tail_coords,move_place,good_col,cycle,temp_mifal,flip_pipes,good,closest_sec,manual_decision,dye,mifal_y,mifal_x,measef_y,measef_x,WKT_matan
829,,,,,,,"(1373789.2497639358, 438723.8123689592)","(1373795.7185830176, 438732.18472753465)","{'SEG_ID': '23514012-23406013', 'SEG_TYPE': 'F...",23514012-23406013,Force Main,6,2315,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xf6]?...,"LINESTRING (1373789.24976394 438723.812368959,...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.18562285185364, -84.5983090547059)","(39.18559945788416, -84.59833118424386)",1373796.0,438732.184728,"(39.18559945788416, -84.59833118424386)","(39.18562285185364, -84.5983090547059)",,good,,"(39.18559945788416, -84.59833118424386)",1.0,,,1.0,dye,39.18562285185364,-84.5983090547059,39.18559945788416,-84.59833118424386,LINESTRING ( -84.5983090547059 39.185622851853...
830,,,,,,,"(1373795.7185830176, 438732.18472753465)","(1373843.4799704254, 438794.0012049526)","{'SEG_ID': '23514012-23406013', 'SEG_TYPE': 'F...",23514012-23406013,Force Main,6,2316,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xf6c\...,"LINESTRING (1373795.71858302 438732.184727535,...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.18562285185364, -84.5983090547059)","(39.18579557873783, -84.59814566475293)",1373843.0,438794.001205,"(39.18562285185364, -84.5983090547059)","(39.18579557873783, -84.59814566475293)",,good,,"(39.18562285185364, -84.5983090547059)",,,,1.0,dye,39.18562285185364,-84.5983090547059,39.18579557873783,-84.59814566475293,LINESTRING ( -84.5983090547059 39.185622851853...
944,,,,,,,"(1370468.7019430995, 440564.23454111814)","(1370476.2307994366, 440567.406778872)","{'SEG_ID': '23408010-23408006', 'SEG_TYPE': 'G...",23408010-23408006,Gravity,8,2202,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xe9d\...,"LINESTRING (1370468.7019431 440564.234541118,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.190438781972034, -84.61019461714612)","(39.190447972444325, -84.61016832226099)",1370476.0,440567.406779,"(39.190438781972034, -84.61019461714612)","(39.190447972444325, -84.61016832226099)",,good,,"(39.190447972444325, -84.61016832226099)",1.0,,,1.0,dye,39.190438781972034,-84.61019461714612,39.190447972444325,-84.61016832226099,LINESTRING ( -84.61019461714612 39.19043878197...
945,,,,,,,"(1370468.7019430995, 440564.23454111814)","(1370332.82918334, 440581.7738761157)","{'SEG_ID': '23408010-23408013', 'SEG_TYPE': 'D...",23408010-23408013,Dry line,8,73,39.0,61.0,20812.0,3004.0,390610208123004.0,Block 3004,39061020812.0,1400000US39061020812,"Census Tract 208.12, Hamilton County, Ohio",4561.0,1926.0,2.368,4561.0,11.84,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xe9d\...,"LINESTRING (1370468.7019431 440564.234541118,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.190438781972034, -84.61019461714612)","(39.19047820722432, -84.61067532530116)",1370333.0,440581.773876,"(39.190438781972034, -84.61019461714612)","(39.19047820722432, -84.61067532530116)",,good,,"(39.190438781972034, -84.61019461714612)",,,,1.0,dye,39.190438781972034,-84.61019461714612,39.19047820722432,-84.61067532530116,LINESTRING ( -84.61019461714612 39.19043878197...
1754,,,,,,,"(1371437.1842581034, 440126.4463746995)","(1371444.9998592585, 440137.9998292923)","{'SEG_ID': '23407036-23407035', 'SEG_TYPE': 'T...",23407036-23407035,Tap,6,2,,,,,,,,,,,,,,0.0,cinci_3735_for_test,"b""\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xed-/...","LINESTRING (1371437.1842581 440126.446374699,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.189299186124224, -84.60674246719762)","(39.189331399717304, -84.60671585071186)",1371445.0,440137.999829,"(39.189299186124224, -84.60674246719762)","(39.189331399717304, -84.60671585071186)",,good,,"(39.189331399717304, -84.60671585071186)",1.0,,,1.0,dye,39.189299186124224,-84.60674246719762,39.1893313997173,-84.60671585071186,LINESTRING ( -84.60674246719762 39.18929918612...
1755,,,,,,,"(1371437.1842581034, 440126.4463746995)","(1371722.049830094, 440104.8499612063)","{'SEG_ID': '23408001-23407013', 'SEG_TYPE': 'F...",23408001-23407013,Force Main,4,2091,39.0,61.0,20812.0,3004.0,390610208123004.0,Block 3004,39061020812.0,1400000US39061020812,"Census Tract 208.12, Hamilton County, Ohio",4561.0,1926.0,2.368,4561.0,7.104,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xed-/...,"LINESTRING (1371437.1842581 440126.446374699,1...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.189299186124224, -84.60674246719762)","(39.18925814836316, -84.60573590374362)",1371722.0,440104.849961,"(39.189299186124224, -84.60674246719762)","(39.18925814836316, -84.60573590374362)",,good,,"(39.189299186124224, -84.60674246719762)",,,,1.0,dye,39.189299186124224,-84.60674246719762,39.18925814836316,-84.60573590374362,LINESTRING ( -84.60674246719762 39.18929918612...


In [41]:
a = pd.DataFrame(pipes["mifal"].value_counts())
a[a["mifal"]>1]

Unnamed: 0,mifal
"(39.18701481303375, -84.59773988513244)",2
"(39.189299186124224, -84.60674246719762)",2


In [34]:
G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True) #Create a graph
pipes,cycle_list = find_cycle(pipes,G)

the GIS got cycle
['(39.17721885928897, -84.58496582982394)', '(39.17725405155113, -84.58496260228213)', '(39.17718384656239, -84.5849550088242)', '(39.177193905043445, -84.5848941428461)']


In [22]:
last_points = pipes[pipes["name"]=="WWTP"]["mifal"].values[0]
pipes["temp_mifal"] = pipes["mifal"]
G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True) #Create a graph
cycle_list = fine_cycle_2(pipes)

all_nodes_above_list = list(nx.node_connected_component(G, last_points))
df_for_pump_stations = pipes[pipes["mifal"].isin(all_nodes_above_list)].copy()

duplicate_df = pd.DataFrame(df_for_pump_stations["mifal"].value_counts())
duplicate_list = duplicate_df[duplicate_df["mifal"]>1].index.to_list()
duplicate_list_no_cycle = set(duplicate_list)-set(cycle_list)
duplicate_list_no_cycle = list(duplicate_list_no_cycle)
print("len duplicate_list - ",len(duplicate_list_no_cycle))
duplicate_list_no_cycle

len duplicate_list -  2


['(39.18579557873783, -84.59814566475293)',
 '(39.18925814836316, -84.60573590374362)']

In [23]:
pipes[pipes["mifal"]=="(39.18579557873783, -84.59814566475293)"]

Unnamed: 0,first_connected,first_islands_number,last_islands_number,last_connected,pipes_was_connected,name,0,1,2,SEG_ID,SEG_TYPE,SIZE_,fid,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,gID,GEO_ID,NAME_2,thePop,NUMPOINTS,avgPop,totalPop,estPop,ShpName,Wkb,Wkt,Json,mifal,measef,x_for_transform,y_for_transform,new_coords,tail_coords,move_place,good_col,cycle,temp_mifal,flip_pipes,good,closest_sec
830,,,,,,,"(1373795.7185830176, 438732.18472753465)","(1373843.4799704254, 438794.0012049526)","{'SEG_ID': '23514012-23406013', 'SEG_TYPE': 'F...",23514012-23406013,Force Main,6,2316,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xf6c\...,"LINESTRING (1373795.71858302 438732.184727535,...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.18579557873783, -84.59814566475293)","(39.18562285185364, -84.5983090547059)",1373843.0,438794.001205,"(39.18562285185364, -84.5983090547059)","(39.18579557873783, -84.59814566475293)",,good,,"(39.18579557873783, -84.59814566475293)",1.0,,
831,,,,,,,"(1373843.4799704254, 438794.0012049526)","(1373855.7263370156, 438809.8512388766)","{'SEG_ID': '23514012-23406013', 'SEG_TYPE': 'F...",23514012-23406013,Force Main,6,2317,,,,,,,,,,,,,,0.0,cinci_3735_for_test,b'\x00\x00\x00\x00\x02\x00\x00\x00\x02A4\xf6\x...,"LINESTRING (1373843.47997043 438794.001204953,...","{ ""type"": ""LineString"", ""coordinates"": [ [ 137...","(39.18579557873783, -84.59814566475293)","(39.18583986668906, -84.59810377023993)",1373856.0,438809.851239,"(39.18579557873783, -84.59814566475293)","(39.18583986668906, -84.59810377023993)",,good,,"(39.18579557873783, -84.59814566475293)",,,


In [42]:
pipes.at[ind_to_flip, "mifal"] = measef_to_flip
# pipes.at[ind, "measef"] = mifal

In [40]:
measef_to_flip

'(39.19466415080037, -84.57833407325363)'

In [41]:
mifal_to_flip

'(39.194694330471556, -84.57906252493059)'

In [25]:
pipes[pipes.index.isin([202,238,167])]["flip_pipes"]

167    0.0
202    0.0
238    0.0
Name: flip_pipes, dtype: float64

In [620]:
e = nx.read_shp("C:/Users/Matan/Downloads/cycle_2_3735.shp", simplify=True, geom_attrs=True, strict=False) # read shp file

ed = pd.DataFrame(e.edges.data())
ed[2]

  e = nx.read_shp("C:/Users/Matan/Downloads/cycle_2_3735.shp", simplify=True, geom_attrs=True, strict=False) # read shp file


0      {'field_1': 27192, 'first_conn': None, 'first_...
1      {'field_1': 29189, 'first_conn': None, 'first_...
2      {'field_1': 27074, 'first_conn': None, 'first_...
3      {'field_1': 20596, 'first_conn': None, 'first_...
4      {'field_1': 26985, 'first_conn': None, 'first_...
                             ...                        
325    {'field_1': 83254, 'first_conn': None, 'first_...
326    {'field_1': 84321, 'first_conn': None, 'first_...
327    {'field_1': 83224, 'first_conn': None, 'first_...
328    {'field_1': 83223, 'first_conn': None, 'first_...
329    {'field_1': 73679, 'first_conn': None, 'first_...
Name: 2, Length: 330, dtype: object

In [619]:
df = geo_df.copy()
df = df.drop(["0","1",'Wkb','Wkt','Json',"y_for_transform","x_for_transform"], axis=1)
# df["Wkt"] = df["WKT_matan"]

df[['mifal_y', 'mifal_x']] = df['mifal'].str.split(',', 1, expand=True)
df[['measef_y', 'measef_x']] = df['measef'].str.split(',', 1, expand=True)
df["mifal_y"] = df["mifal_y"].str[1:]#.astype(float)
df["measef_y"] = df["measef_y"].str[1:]#.astype(float)
df["mifal_x"] = df["mifal_x"].str[:-1]#.astype(float)
df["measef_x"] = df["measef_x"].str[:-1]#.astype(float)
# df['mifal'] = df[['mifal_y', 'mifal_x']].values.tolist()
# df['measef'] = df[['measef_y', 'measef_x']].values.tolist()
# df['mifal'] = df[['mifal_y', 'mifal_x']].apply(tuple, axis=1)
# df['measef'] = df[['measef_y', 'measef_x']].apply(tuple, axis=1)

df["Wkt"] = "LINESTRING (" + df["mifal_x"] + " " + df["mifal_y"] + "," + df["measef_x"] + " " + \
                     df["measef_y"] + " )"

df["mifal_y"] = df["mifal_y"].astype(float)
df["measef_y"] = df["measef_y"].astype(float)
df["mifal_x"] = df["mifal_x"].astype(float)
df["measef_x"] = df["measef_x"].astype(float)
df['mifal'] = df[['mifal_y', 'mifal_x']].apply(tuple, axis=1)
df['measef'] = df[['measef_y', 'measef_x']].apply(tuple, axis=1)

  df[['mifal_y', 'mifal_x']] = df['mifal'].str.split(',', 1, expand=True)
  df[['measef_y', 'measef_x']] = df['measef'].str.split(',', 1, expand=True)


In [467]:
df["mifal"][0]

('4.647243754400109', ' -102.6163856301295')

In [470]:
df["mifal"][0]

(4.647243754400109, -102.6163856301295)

In [476]:
df = df[["Wkt","mifal","measef","dye"]]
df["mifal"][0][0]

4.647243754400109

In [477]:
G = nx.from_pandas_edgelist(df,source='mifal',target='measef',edge_attr=True,create_using=nx.DiGraph())
G.edges.data()
G = nx.from_pandas_edgelist(df,    "mifal",       'measef', edge_attr=True)

In [478]:
nx.write_shp(G, "C:/Users/Matan/Downloads/aaa20.shp")

  nx.write_shp(G, "C:/Users/Matan/Downloads/aaa20.shp")


In [67]:
# x = source.split("\\")
import os
x = '\\'.join(source.split('\\')[0:-1])
x

'C:\\Users\\Matan\\Downloads'

In [4]:
pipes.columns = pipes.columns.astype(str)
G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True)
last_points = pipes[pipes["name"]=="WWTP"]["mifal"].values[0]
connect_list = (list(nx.node_connected_component(G, last_points)))
len_list_to_paint = pipes.loc[pipes["mifal"].isin(connect_list)].shape[0]
pipes.loc[pipes["mifal"].isin(connect_list), "dye"] = ["dye"] * len_list_to_paint
geo_df = make_wkt (pipes)
geo_df['WKT_matan'] = geo_df['WKT_matan'].apply(wkt.loads)
line_gdf = gpd.GeoDataFrame(geo_df, geometry='WKT_matan')
# gdf = gpd.GeoDataFrame(line_gdf, geometry='WKT_matan',crs="epsg:4326")

  pipes[['mifal_y', 'mifal_x']] = pipes['mifal'].str.split(',', 1, expand=True)
  pipes[['measef_y', 'measef_x']] = pipes['measef'].str.split(',', 1, expand=True)


In [6]:
gdf = gpd.GeoDataFrame(line_gdf, geometry='WKT_matan',crs="epsg:4326")
skip_col = ["good","WKT_matan","dye","flip_pipes"]
for col in geo_df.columns:
    if col in skip_col:
        pass
    else:
        try:
            geo_df[col]= geo_df[col].astype(float)
        except:
            geo_df[col]= geo_df[col].astype(str)

print("""gdf["flip_pipes"]""",gdf["flip_pipes"].value_counts())

gdf["flip_pipes"] fliped    4
Name: flip_pipes, dtype: int64


In [591]:
df = gdf.copy()
df[['mifal_y', 'mifal_x']] = df['mifal'].str.split(',', 1, expand=True)
df[['measef_y', 'measef_x']] = df['measef'].str.split(',', 1, expand=True)
df["mifal_y"] = df["mifal_y"].str[1:].astype(float)
df["measef_y"] = df["measef_y"].str[1:].astype(float)
df["mifal_x"] = df["mifal_x"].str[:-1].astype(float)
df["measef_x"] = df["measef_x"].str[:-1].astype(float)
# df['mifal'] = df[['mifal_y', 'mifal_x']].values.tolist()
# df['measef'] = df[['measef_y', 'measef_x']].values.tolist()
df['mifal'] = df[['mifal_y', 'mifal_x']].apply(tuple, axis=1)
df['measef'] = df[['measef_y', 'measef_x']].apply(tuple, axis=1)

# df = df.drop(["0","1","2","WKT_matan"], axis=1)
df = df[["mifal","measef","cycle"]]
df

  df[['mifal_y', 'mifal_x']] = df['mifal'].str.split(',', 1, expand=True)
  df[['measef_y', 'measef_x']] = df['measef'].str.split(',', 1, expand=True)


Unnamed: 0,mifal,measef,cycle
0,"(4.647243754400109, -102.6163856301295)","(4.646610620475555, -102.61702302362869)",
1,"(4.646610620475555, -102.61702302362869)","(4.646333777597153, -102.61714533643661)",
2,"(4.653440655459034, -102.61555086503733)","(4.652595540311509, -102.61571437653517)",
3,"(4.652595540311509, -102.61571437653517)","(4.652121650948764, -102.61576379276997)",
4,"(4.659995140169646, -102.6129095741102)","(4.660036590636827, -102.6134861197121)",
...,...,...,...
325,"(4.651038647480682, -102.61501937033418)","(4.6510563414770525, -102.61542393234879)",
326,"(4.6453482759030695, -102.6170430139782)","(4.645169263830103, -102.61802857768336)",
327,"(4.656001766677245, -102.61308094569685)","(4.656055662537225, -102.61368892550863)",
328,"(4.656055662537225, -102.61368892550863)","(4.656096993845552, -102.61418016807826)",


In [None]:
def flip_upside_down_line3 (pipes,time_flip ):

    last_points = pipes[pipes["name"]=="WWTP"]["mifal"].values[0]
    pipes["temp_mifal"] = pipes["mifal"]
    G = nx.from_pandas_edgelist(pipes, "mifal", 'measef', edge_attr=True) #Create a graph
    cycle_list = fine_cycle_2(pipes)

    all_nodes_above_list = list(nx.node_connected_component(G, last_points))
    df_for_pump_stations = pipes[pipes["mifal"].isin(all_nodes_above_list)].copy()

    duplicate_df = pd.DataFrame(df_for_pump_stations["mifal"].value_counts())
    duplicate_list = duplicate_df[duplicate_df["mifal"]>1].index.to_list()
    duplicate_list_no_cycle = set(duplicate_list)-set(cycle_list)
    duplicate_list_no_cycle = list(duplicate_list_no_cycle)
    print("len duplicate_list - ",len(duplicate_list_no_cycle))
    if len(duplicate_list_no_cycle)>0:
        for duplicate_mifal in duplicate_list_no_cycle:
            try:
                ind_list_duplicate = pipes[pipes["mifal"]==duplicate_mifal].index.to_list()
                suspect_short_len_dict = {}
                for ind_duplicate in ind_list_duplicate:
    #                     print(ind_duplicate)
                    measef = pipes.at[ind_duplicate,"measef"]
                    suspect_len = nx.shortest_path_length(G, last_points, measef)
                    suspect_short_len_dict[ind_duplicate] = suspect_len

                if len(suspect_short_len_dict)>1:
                    sorted_suspect_short_len_dict = dict(sorted(suspect_short_len_dict.items(), key=operator.itemgetter(0),reverse=True))
                    sorted_suspect_short_len_dict.popitem()
                    list_to_flip = list(sorted_suspect_short_len_dict.keys())

                    for ind_to_flip in list_to_flip:
                        measef_to_flip = pipes.at[ind_to_flip,"measef"]
                        mifal_to_flip  = pipes.at[ind_to_flip,"mifal"]
                        pipes = do_flip(pipes, ind_to_flip, mifal_to_flip, measef_to_flip)
                        pipes.at[ind_to_flip, "flip_pipes"] = "fliped"
                        measef_to_flip = pipes.at[ind_to_flip,"measef"]
                        mifal_to_flip  = pipes.at[ind_to_flip,"mifal"]


            except Exception as e:
                print("no statistics for :", e)
                pass
    duplicate_df = pd.DataFrame(df_for_pump_stations["mifal"].value_counts())
    if (duplicate_df.shape[0]>0) & (time_flip<5):
        print(time_flip)
        time_flip = time_flip+1
        flip_upside_down_line3 (pipes,time_flip )
    return(pipes)

In [None]:
C:\Users\Matan\Downloads\busta_to_fix_01.shp

In [7]:
pipes = flip_upside_down_line3 (pipes,0) 


len duplicate_list -  25
0
len duplicate_list -  16
1
len duplicate_list -  15
2
len duplicate_list -  15
3
len duplicate_list -  15
4
len duplicate_list -  15
