TO DO:
- Implement differences between efficiency/power curve PSDs
- Update populated curve PSD to reflect Diameter units (Cell B3), Power (B4), Pwr/Eta cell (B5) 

##### Imports, File Setup

In [287]:
import os
import pandas as pd
import shutil
from openpyxl import load_workbook
import re

In [288]:
# This is the folder/file with the curve export csv
myDir = r"C:\Users\104092\OneDrive - Grundfos\Documents\10-19 Projects\12 NBS Curve PSD Separation\12.01 Original Files"
myFile = "GPC NBS Curves.xlsx"
filePath = os.path.join(myDir, myFile)

# This creates a dataframe of the curve export csv, and fills in the RPM(curve nominal) column
data = pd.read_excel(filePath, sheet_name='GPC curves - removed duplicates', index_col=False)
data['RPM(Pump data)'] = data['RPM(Pump data)'].ffill()

In [289]:
# This points to the curve PSD template to be used
templateDir = r"C:\Users\104092\OneDrive - Grundfos\Documents\30-39 Resources\32 GXS"
template = "SKB Blank Curve PSD - Efficiency_Metric.xlsx"
template = os.path.join(templateDir, template)

# Create a local working copy to leave template unmodified
outputPath = r"C:\Users\104092\OneDrive - Grundfos\Documents\10-19 Projects\12 NBS Curve PSD Separation\12.02 Output Files"
workingCopy = os.path.join(outputPath, "Populated Curve PSD.xlsx")
shutil.copyfile(template, workingCopy)
wb = load_workbook(workingCopy)      
tab_to_copy = wb['NEW']    

##### Functions

In [290]:
def extract_trim_from_modelno(model_name:str):   
    " Takes model name: i.e. 012-070-2P-109_Std and returns 109"
    res = re.search("-(\d+)_Std", model_name)
    curve_trim_size_mm = int(res.group(1))
    return(curve_trim_size_mm)

In [291]:
def curveDataByPartNumber(data):
    """Creates dictionary of dataframes for each PN """
    import math
    
    # This will become a dictionary of dataframes of pns and curve data
    dict_curveDataByPn = {}
    
    # Iterate through each row in pump curve data export 
    for index, value in data.iterrows():    
        # When a PN is encountered:
        if not math.isnan(value['ProductNumber']):
            currentProductNumber = value['Model']
            
            # Resets list for each PN
            listOfFlows = []
            listOfHeads = []
            listOfPow = []
            listOfSpeeds = []
            listOfNPSH = []
            
        # Recording Q/H values to lists
        if value['RPM(Pump data)'] > 0:
            listOfFlows.append(value['Q'])
            listOfHeads.append(value['H'])
            listOfNPSH.append(value['NPSH'])
            listOfPow.append(value['P1'])
            listOfSpeeds.append(value['RPM(Pump data)'])
            
        # At end of Q/H values, store in dataframe before moving to next PN, or when end of data is reached
        if pd.isna(value['Q']) and pd.isna(value['H']) or (index == len(data)-1):    
            zipped = list(zip(listOfFlows, listOfHeads, listOfPow, listOfNPSH, listOfSpeeds))
            df = pd.DataFrame(zipped, columns=['Q','H','P1', 'NPSH','RPM'])

            # Drop rows that have NaNs, then add df to dictionary
            df = df.dropna()
            dict_curveDataByPn.update({currentProductNumber:df})

            continue

    return dict_curveDataByPn

In [292]:
def add_a_curve(list_unique_curves):  
    """ CREATE new tabs for each unique curve family """
    
    global wb
    
    # Add a tab for each unique curve
    for item in list_unique_curves: 
        
        list_of_speeds = []
        
        # Add 1 tab
        tabName = item
        wb.copy_worksheet(tab_to_copy).title = tabName
        
        # Fill in all speeds in column A
        # for key, value in curveDataDict[tabName].iterrows():
        #     list_of_speeds.append(value['RPM'])
        # speed_set = sorted(set(list_of_speeds), reverse=True)
        # print(tabName, speed_set)  
        
        # for index, eachSpeed in enumerate(speed_set):
        #     cell_name = "{}{}".format('A', 10+index)
        #     curveSheet = wb[tabName]
        #     curveSheet[cell_name].value = int(eachSpeed)
       
        def populate_cell(tab, cell_coord, cell_value):
            cell_name = "{}".format(cell_coord)
            curveSheet = wb[tab]
            curveSheet[cell_name].value = cell_value

        trim_size = extract_trim_from_modelno(tabName)
        populate_cell(tabName, 'D7', trim_size)
        populate_cell(tabName, 'A10', trim_size)
        populate_cell(tabName, 'B3', 'mm')
        populate_cell(tabName, 'B4', 'kW')
        populate_cell(tabName, 'B5', 'Power')    
    return

In [293]:
def findSpeedCells(my_df):
    """ function to find which cell to start populating curve data based on RPM"""
    # from openpyxl.utils.cell import coordinate_from_string, column_index_from_string, get_column_letter
    from openpyxl.utils.cell import get_column_letter
    speedCells = []
    
    max_cols = 21 * len(my_df.RPM.unique())
    row = 7    
    first_col = 4
    diameter_cols = list(range(first_col,max_cols,21))
    
    for item in diameter_cols:
        col = get_column_letter(item)
        cell_coordinate = "{}{}".format(col,row)
        speedCells.append(cell_coordinate)      

    return(speedCells)

In [294]:
def curveFiller(pn_vs_curves_dict):
    """ fills curve tables in each tab """
    global wb
    cellName = 'D7' 
    first_row_offset = 3

    # Iterate through each tab in workbook
    for sheet in wb.worksheets:
        sheetname = sheet.title

        # Only process tabs that contain curve data
        if sheetname[-4:] == "_Std":
            
            for model_name, curve_dataframe in pn_vs_curves_dict.items():
                if model_name == sheetname:

                    # Iterate through dataframe row by row and fill out each row in PSD    
                    for key, value in curve_dataframe.iterrows():
                        sheet[cellName].offset(first_row_offset + key, 0).value = value['Q']
                        sheet[cellName].offset(first_row_offset + key, 1).value = value['H']
                        sheet[cellName].offset(first_row_offset + key, 7).value = value['Q']
                        # sheet[cellName].offset(first_row_offset + key, 8).value = value['Eta1']
                        sheet[cellName].offset(first_row_offset + key, 8).value = value['P1']
                        sheet[cellName].offset(first_row_offset + key,14).value = value['Q']
                        sheet[cellName].offset(first_row_offset + key,15).value = value['NPSH']      
    return   

##### Main

In [296]:
# Creates dictionary with part numbers as keys, curves as dataframes for each key
curveDataDict = curveDataByPartNumber(data)    


In [297]:
# Creates list of which part numbers share curve data
uniqueCurvePartnumbers = list(curveDataDict.keys())

In [298]:
# Adds curve tabs for each unique curve
add_a_curve(uniqueCurvePartnumbers)

In [299]:
# Fills each new curve tab
curveFiller(curveDataDict)


In [300]:

# Save changes to excel sheet
wb.save(workingCopy)