In [8]:
import pandas as pd

In [9]:
data = pd.read_csv("https://media.geeksforgeeks.org/wp-content/uploads/nba.csv") 

In [15]:
data.columns

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [16]:
import file_handling.folder as folder


In [19]:
params = folder.parse_filename('0.8MDa-TPAM-1wtpct-0-FeCl','sampleinfo','MW-backbone-c-ratio-ion')

In [29]:
params

{'sample': '0.8MDa-TPAM-1wtpct-0-FeCl',
 'MW': '0.8MDa',
 'backbone': 'TPAM',
 'c': '1wtpct',
 'ratio': '0',
 'ion': 'FeCl'}

In [None]:
def make_summary_dataframe(df: pd.DataFrame, fitting_bounds: typing.Tuple[float, float] = [0.1, 0.045]) -> pd.DataFrame:
    # add arguments for fh.folder.parse_filename
    # 
    """
    Condenses a DOS run into an extensional relaxation time by fitting the EC region (t > tc) to a decaying exponential

    Parameters
    ----------
    df : pd.DataFrame
        Contains R/R0, time, t - tc, strain rate, R(tc)/R0, etc for multiple runs and samples
    fitting_bounds: list, optional
        [start, end]
        These are the R/R0 values we look for to set the bounds for the EC region fitting

    Returns
    -------
    lambdaE_df : pd.DataFrame
        dataframe containing lambdaE relaxation time for each run from the input df
    """
    # Initalize parameters and empty list #
    start = fitting_bounds[0]
    end = fitting_bounds[1]
    fitting_results_list = []
    

    samples = df["sample"].unique()
    for sample in samples:
        # Grab sample info from "sample" field #
        params = fh.folder.parse_filename(sample,"sampleinfo",sampleinfo_format,fname_split,sample_split)
        # Select individual sample from df
        sample_dataset = df[(df["sample"] == sample)]
        run_values = sample_dataset['run'].unique()
        for run in run_values:
            run_dataset = sample_dataset[(sample_dataset['run'] == run)]
            run_dataset = run_dataset.reset_index(drop=True)
            R_tc_R0 = run_dataset.loc[0, "Rtc/R0"]
            fitting_results_temp =  [*params.values(), *find_EC_slope(run_dataset, start, end),run, R_tc_R0]
            fitting_results_list.append(fitting_results_temp)
    #### Clean up the dataframe column names ###
    summary_df = annotate_summary_df(fitting_results_list)
                                     ### Save the df as a csv later in integration ? ###
    return summary_df

In [None]:
{0:"sample", 1:"-b", 2:"Intercept", 3:"R",4:"run",5:"Rtc/R0"}

In [32]:
# i need to reproduce the dictionary above
df_header = {}
for i in range(0, len(params)):
    df_header[i] = params.key(i)

AttributeError: 'dict' object has no attribute 'key'

In [30]:
len(params)

6

In [27]:
print(*params.values())

0.8MDa-TPAM-1wtpct-0-FeCl 0.8MDa TPAM 1wtpct 0 FeCl


In [28]:
print(*params.keys())

sample MW backbone c ratio ion


In [None]:
fname = Path(csv).name
params = fh.folder.parse_filename(fname,fname_format,sampleinfo_format,fname_split,sample_split)
for key, value in params.items():
    dataset[key] = value

In [None]:
def parse_filename(filename: str, fname_format: str, sampleinfo_format: str, fname_split: str ="_", sample_split: str ='-') -> dict:
    """
    Parses filenames into a dictonary of parameters using supplied format

    Parameters
    ----------
    filename : str
        the name of the folder
        ex. "20210929_6M-PEO_fps-25k_1"
    fname_format : str
        the format of the filename with parameter names separated
        by the deliminator specified by fname_split
        ex. "date_sampleinfo_fps_run"
    sampleinfo_format : str
        the format of the sampleinfo section of the filename
        separated by the deliminator specified by sample_split
    fname_split : str, optional
        the deliminator for splitting the filename (default is "_")
    sample_split : str, optional
        the deliminator for splitting the sampleinfo section
        of the filename (default is "-")

    Returns
    -------
    parse_filename : dict
        dictionary of parameters from filename
    """

    # Split filename and format into components.
    name_split = filename.split(fname_split)
    tag_split = fname_format.split(fname_split)

    param_dict = {} # initialize dictionary for outputting parameters from the filename

    i = 0 # index in the folder name
    for tag in tag_split:
        value = name_split[i] # entry in the folder name corresponding to the tag from the fname_format

        if "fps" in tag.lower():
            if "k" in value: # check if fps is formated with k to represent 1000
                fps = int(''.join(i for i in value if i.isdigit()))* 1000 # take numeric part of fps and multiply by 1000 if k was used, i.e. 25k becomes 25000
            else:
                fps = int(''.join(i for i in value if i.isdigit())) # take numeric part of fps only
            param_dict["fps"] = fps  # set entry in parameter dictionary
        elif "run" in tag.lower(): # look for run number spec
            param_dict["run"] = int(''.join(i for i in value if i.isdigit())) # take numeric part of run only and set in parameter
        elif "sampleinfo" in tag.lower():
            param_dict["sample"] = value # full sampleinfo in Sample column
            sampleinfo_split = value.split(sample_split) # split sampleinfo using the sample_split deliminator
            sample_tag_split = sampleinfo_format.split(sample_split) # split sampleinfo_format into sample tags using sample_split deliminator
            j = 0 # index in the sampleinfo
            for sample_tag in sample_tag_split:
                sample_value = sampleinfo_split[j] # entry within sampleinfo coresponding to the sample_tag from the sampleinfo_format
                param_dict[sample_tag] = sample_value # set entry in parameter dictionary
                j = j + 1
        else:
            param_dict[tag] = value  # set entry in parameter dictionary
        i = i + 1

    return param_dict #output parameters

In [None]:
def annotate_summary_df(fitting_results_list: list) -> pd.DataFrame:
    """
    Do we want to bring other columns with us like ion, polymer identity, etc? How to code that?

    Parameters
    ----------
    fitting_results_list: list
        generated by find_EC_slope
    original_df : pd.DataFrame
        Contains R/R0, time, t - tc, strain rate, R(tc)/R0, etc for multiple runs and samples

    Returns
    -------
    lambdaE_df : pd.DataFrame
        dataframe containing lambdaE relaxation time for each run from the input df
    """
    lambdaE_df = pd.DataFrame(fitting_results_list)
    lambdaE_df = lambdaE_df.rename(columns={0:"sample", 1:"-b", 2:"Intercept", 3:"R",4:"run",5:"Rtc/R0"})
    lambdaE_df['Lambda E (s)'] = -1/(3*lambdaE_df['-b'])
    lambdaE_df['Lambda E (ms)'] = lambdaE_df['Lambda E (s)']*1000
    lambdaE_df['R^2'] = (lambdaE_df['R'])**2
    lambdaE_df = lambdaE_df.drop(["-b","Intercept","R","Lambda E (s)", ],axis=1)
    return lambdaE_df