In [7]:
import numpy as np
import pandas as pd
import os
import multiprocessing as mp
import typing
from os import walk
from typing import Union,Generator,Callable
from pandas._typing import FilePath,ReadBuffer
from utils.file_ops import add_str_to_filename



#Need to input the Directory I want to use 
Input_dir=r"C:\Projects\2022\Michaels_Code\grundfos-express-tools\pipe diameter finder\input files"
Output_dir=r"C:\Projects\2022\Michaels_Code\grundfos-express-tools\pipe diameter finder\output files"



def iterate_over_dir(dir,filter:typing.Callable=None)->Generator:
    filenames = next(walk(Input_dir), (None, None, []))[2]
    for file in filenames:
        _file=os.path.join(dir, file)
        yield _read_single_file(_file)
            
def _read_single_file(file,chunksize:int=1024)->tuple[bytes,str]:
    res=b''
    with open(file,'rb') as f2:
        while True:
            data=f2.read(chunksize)
            if not data:
                break
            res+=data
        return res,file

#Run in Excel Sheet
count=0
a=iterate_over_dir(Input_dir)
# while True:
#     try:
#         next(a)
#         print(count)
#         count+=1
#     except StopIteration:
#         break
# next(a)
# next(a)
# next(a)
(d1,file)=next(a)





def get_df(data:Union[FilePath,ReadBuffer[bytes],bytes],sheet_names:str|list[str])->pd.DataFrame|None:
    
    #Check if sheets are iterable or not 

    #Check if sheets are in the excel doc

    #Check if the data is in the correct format

    df1=pd.read_excel(data,sheet_name=sheet_names)
    
    branch_df= df1.copy()[["Max Branch Flow (gpm)","Max Branch Diameter (in.)"]]
    branch_df.loc[:,'copy_index']=branch_df.index

    header_df = df1.copy()[["Max Header Flow (gpm)","Max Header Diameter (in.)"]]
    header_df.loc[:,'copy_index']=header_df.index

    reference_df = df1.copy()[["Flow (gpm)", "Pipe Diameter (in.)"]]
    reference_df.set_index("Flow (gpm)", inplace=True)
    reference_df.dropna(axis='index', how='any', inplace=True) #This throws a warning

    branch_output_df = pd.merge_asof(branch_df.sort_values('Max Branch Flow (gpm)'), reference_df, left_on="Max Branch Flow (gpm)", right_on="Flow (gpm)", direction='backward')
    branch_output_df.sort_values(by=['copy_index'], inplace=True)

    header_output_df = pd.merge_asof(header_df.sort_values('Max Header Flow (gpm)'), reference_df, left_on="Max Header Flow (gpm)", right_on="Flow (gpm)", direction='backward')
    header_output_df.sort_values(by=['copy_index'], inplace=True)

    output_df = pd.merge(branch_output_df, header_output_df, on="copy_index")
    output_df.drop(['copy_index'], axis=1, inplace=True)

    output_df["Max Branch Diameter (in.)"]=np.where(output_df["Max Branch Diameter (in.)"]==output_df['Pipe Diameter (in.)_x'],output_df["Max Branch Diameter (in.)"],output_df['Pipe Diameter (in.)_x'])
    output_df["Max Header Diameter (in.)"]=np.where(output_df["Max Header Diameter (in.)"]==output_df['Pipe Diameter (in.)_y'],output_df["Max Header Diameter (in.)"],output_df['Pipe Diameter (in.)_y'])
    output_df.drop(columns=['Pipe Diameter (in.)_x','Pipe Diameter (in.)_y'],inplace=True)

    return output_df


      
sheetname='Max flow to diameter'
d2=get_df(d1,sheetname)




Write to a New File

In [5]:
#I need to create a new file to write the df to
"""I need the following:
1) file_name of the original file
2) data
3) new directory"""

file #path to the OG file
d1 #data of the file 
Output_dir=r"C:\Projects\2022\Michaels_Code\grundfos-express-tools\pipe diameter finder\output files" #Destination Directory for new File
new_sheet=sheetname+'_Completed' #New Sheet Name

new_file_path=os.path.join(Output_dir,add_str_to_filename(file,'Revised')) #New File to write to
new_file_path
with open(new_file_path,'wb') as f:
    f.write(d1)

with pd.ExcelWriter(new_file_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:  
    d2.to_excel(writer, sheet_name=new_sheet, index=False)



Complete Code

In [41]:
import numpy as np
import pandas as pd
import os
import multiprocessing as mp
import typing
from os import walk
from typing import Union,Generator,Callable
from pandas._typing import FilePath,ReadBuffer
from utils.file_ops import add_str_to_filename

def iterate_over_dir(dir:FilePath)->Generator[Callable,None,None]:
    """Iterates over a given directory
    
    Params:
        dir: A directory 
        
    Returns a generator object that calls a read from file method. 
    """
    filenames = next(walk(Input_dir), (None, None, []))[2]
    for file in filenames:
        _file=os.path.join(dir, file)
        yield _read_single_file(_file)
            
def _read_single_file(file,chunksize:int=1024)->tuple[bytes,str]:
    res=b''
    with open(file,'rb') as f2:
        while True:
            data=f2.read(chunksize)
            if not data:
                break
            res+=data
        return res,file

def get_df(data:Union[FilePath,ReadBuffer[bytes],bytes],sheet_names:str|list[str])->pd.DataFrame|None:
    
    #Check if sheets are iterable or not 

    #Check if sheets are in the excel doc

    #Check if the data is in the correct format

    df1=pd.read_excel(data,sheet_name=sheet_names)
    
    branch_df= df1.copy()[["Max Branch Flow (gpm)","Max Branch Diameter (in.)"]]
    branch_df.loc[:,'copy_index']=branch_df.index
    branch_df["Max Branch Flow (gpm)"] = branch_df["Max Branch Flow (gpm)"].astype(float)
    branch_df.dropna(axis='index', how='any', inplace=True,subset=["Max Branch Flow (gpm)"])
    

    header_df = df1.copy()[["Max Header Flow (gpm)","Max Header Diameter (in.)"]]
    header_df.loc[:,'copy_index']=header_df.index
    header_df["Max Header Flow (gpm)"] = header_df["Max Header Flow (gpm)"].astype(float)
    header_df.dropna(axis='index', how='any', inplace=True,subset=["Max Header Flow (gpm)"])


    reference_df = df1.copy()[["Flow (gpm)", "Pipe Diameter (in.)"]]
    reference_df["Flow (gpm)"] = reference_df["Flow (gpm)"].astype(float)
    reference_df.set_index("Flow (gpm)", inplace=True)
    reference_df.dropna(axis='index', how='any', inplace=True) #This throws a warning

    branch_output_df = pd.merge_asof(branch_df.sort_values('Max Branch Flow (gpm)'), reference_df, left_on="Max Branch Flow (gpm)", right_on="Flow (gpm)", direction='backward')
    branch_output_df.sort_values(by=['copy_index'], inplace=True)

    header_output_df = pd.merge_asof(header_df.sort_values('Max Header Flow (gpm)'), reference_df, left_on="Max Header Flow (gpm)", right_on="Flow (gpm)", direction='backward')
    header_output_df.sort_values(by=['copy_index'], inplace=True)

    output_df = pd.merge(branch_output_df, header_output_df, on="copy_index")
    output_df.drop(['copy_index'], axis=1, inplace=True)

    output_df["Max Branch Diameter (in.)"]=np.where(output_df["Max Branch Diameter (in.)"]==output_df['Pipe Diameter (in.)_x'],output_df["Max Branch Diameter (in.)"],output_df['Pipe Diameter (in.)_x'])
    output_df["Max Header Diameter (in.)"]=np.where(output_df["Max Header Diameter (in.)"]==output_df['Pipe Diameter (in.)_y'],output_df["Max Header Diameter (in.)"],output_df['Pipe Diameter (in.)_y'])
    output_df.drop(columns=['Pipe Diameter (in.)_x','Pipe Diameter (in.)_y'],inplace=True)

    return output_df

def write_excel_file(file_data:bytes,target_dir:str,filename_mod_func:Callable[...,typing.Any],*func_args:typing.Any)->str:
    new_file_path=os.path.join(target_dir,filename_mod_func(*func_args))
    with open(new_file_path,'wb') as f:
        f.write(file_data)
    return new_file_path #return this only if a new file was created

def write_df_to_excel(df:pd.DataFrame,file_path:FilePath,sheet:str)->None:
    with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:  
        df.to_excel(writer, sheet_name=new_sheet, index=False)

def main():
    Input_dir=r"C:\Projects\2022\Michaels_Code\grundfos-express-tools\pipe diameter finder\input files"
    Output_dir=r"C:\Projects\2022\Michaels_Code\grundfos-express-tools\pipe diameter finder\output files"
    a=iterate_over_dir(Input_dir)
    sheetname='Max flow to diameter'
    new_sheet=sheetname+'_Completed'
    while True:
        try:
            (d1,file)=next(a)
            d2=get_df(d1,sheetname)
            new_path=write_excel_file(d1,Output_dir,add_str_to_filename,file,'Revised')
            write_df_to_excel(d2,new_path,new_sheet)
        except StopIteration:
            break

    
    
main()