In [None]:
import tribo as tb
import numpy as np
import os 
import re
import pandas as pd

from tribo import Tribo_file

outlier_tests = {'OA_Project_Tests_CoF':['20wt_C20A2wtOA_10N_100mms_Test11', \
                       '20wt_C20A_20N_100mms_Test2', \
                        '20wt_C20A_20N_20mms_Test8', \
                        '20wt_C20A2wtOA_20N_20mms_Test7', \
						'100-TOCN_10N_20mms_Test7', \
						'100-TOCN_20N_20mms_Test10'], \
            
            'PAO_IL_ZDDP_CoF': ['0_OA0_20N_100mms_test2', \
                                '1_IL_20N_100mms_Test5', \
                                '1_IL_20N_20mms_Test2', \
                                '1_IL_20N_20mms_Test1']}

folder = 'OA_Project_Tests_CoF'

files_in_folder = os.listdir(folder)
print(f" Found {len(files_in_folder)} files in {folder}")


Tribo_files_list, complete_CoF_2 = tb.load_files(files_in_folder,folder)
sg_smoothing_array=tb.smoothing_df(complete_CoF_2)
speeds_list, names_list = tb.get_speeds_and_names_in_folder(files_in_folder,Tribo_files_list)

rows=tb.remove_outliers(Tribo_files_list)
df = pd.DataFrame(rows,columns=['Name','CoF_avg','CoF_std','Speed','Percent','Force','Test'])
speed_sheets_list=tb.sort_dfs_by_speed(speeds_list,df)


tb.export_excel_results(speed_sheets_list,speeds_list,sg_smoothing_array,folder,complete_CoF_2,verbose=True)			

In [None]:
import matplotlib.pyplot as plt


def cof_overlays(smoothed_df):
    """
    #TODO add type of the params in the docstrings (type hinting)
    PLotting Coefficient of Friction (CoF) vs Cycle number for all the tests in the dataframe
    :param smoothed_df: dataframe with columns containing the smoothed CoF values for all the tests

    Output: CoF vs Cycle number overlays in one plot which contains all the tests in the dataframe
    """
    for column in smoothed_df.columns: # iterates over all the column names in the list of column names
        plt.plot(df[column],label=column) # plots the datapoints within the column on the graph
        plt.legend() # adds a legend to the graph
    plt.show() # displays the graph



In [None]:
#-----------------------------------------------------------------------
# The following is a Python Dictionary of the specific tests that were chosen to be used in the Average CoF Bar Graphs
avg_CoF_tests = {'OA_Project_Tests_CoF':['100-TOCN_20N_20mms_Test8', \
                                         '20wt_C20A_10N_20mms_Test6', \
                                         '20wt_C20A_10N_100mms_Test14', \
                                         '20wt_C20A_20N_20mms_Test16new', \
                                         '20wt_C20A_20N_100mms_Test12', \
                                        '20wt_C20A2wtOA_10N_20mms_Test8', \
                                        '20wt_C20A2wtOA_10N_100mms_Test4', \
                                        '20wt_C20A2wtOA_20N_20mms_Test6', \
                                        '20wt_C20A2wtOA_20N_100mms_Test17new', \
                                        '100-TOCN_10N_100mms_Test12', \
                                        '100-TOCN_10N_20mms_Test6', \
                                        '100-TOCN_20N_100mms_Test14'], \
                                        
                 'PAO_IL_ZDDP_CoF': ['0_OA0_20N_100mms_test13', \
                                     '0_OA0_20N_20mms_test1', \
                                     '1-ZADP_20N_100mms_Test5', \
                                     '1-ZADP_20N_20mms_Test3', \
                                     '1_IL_20N_100mms_Test5', \
                                     '1_IL_20N_20mms_Test4']}                      


#-----------------------------------------------------------------------
def plot_combinations_of_params(smoothed_df, folder):
    """A function that plots a Coefficient of Friction vs Cycles graph for each combination of parameters of a sample in the dataframe.
    The graph will contain all the tests that have the same combination of parameters.

    The purpose of this function is to identify the outlier tests that may be contributing to the high standard deviation in the Average CoF values.
    Additionally, this function will allow one to see the general trend of the CoF values for each combination of parameters for each sample.
    
    :param 
    """
    

    pattern = r'(_test|_Test).*' # Regex pattern to remove the test number from the column name
    unique_names = {}  # Dictionary to store the unique names of the tests

    for column in smoothed_df.columns: # Iterate over all the column names in the list of column names
        if '10mms' in column: # Skip the column if it contains '10mms' in the name <-- we had considered it an invalid test
            continue
        if '20N' or '10N' in column:
        # if '20N' in column:

            unique_name = re.sub(pattern, '', column) # Apply the regex pattern to get the unique name
            if unique_name in unique_names: # Check if the unique name is already a key in the dictionary
                unique_names[unique_name].append(column) # Append the original column name to the list associated with the unique name
            else:
                unique_names[unique_name] = [column] # Create a new entry with the unique name as the key and a list containing the original column name

    os.makedirs(f'{folder}_Images', exist_ok=True) # make a new directory to store the images if it doesn't already exist

    for (key, value) in unique_names.items(): # keys are the unique names which include sample name along with the specific combination of parameters
        # values are the list of tests (repetitions) done on the same combination of parameters
        for spec_val in value: # iterates through tests within the list of tests done on the same combination of parameters
            plt.plot(sg_smoothing[spec_val],label=spec_val) # plot the CoF values vs cycle number with the legend being the specific file name of the test
        # plt.title(key)
        plt.legend() # includes legend in the graph
        plt.savefig(os.path.join(f'{folder}_Images', f'{key}.png')) # saves the figure as the unique name in the previously created folder
        plt.show() # displays the graph
    # print(key, unique_names[key])

# for column in sg_smoothing.columns:
#     if '20N' in column:
#         if re.sub(pattern, '', column) in unique_names:
            
#             plt.plot(sg_smoothing[column],label=column)
#             plt.legend()

In [None]:
# with pd.ExcelWriter(f'{folder}.xlsx') as writer:
    # print(sg_smoothing.head())
print(avg_CoF_tests[folder], f'\n Length: {len(avg_CoF_tests[folder])}')
count = 0
good_test_in_folder_list = []
for ii in range(len(sg_smoothing.columns)):
    for jj in range(len(avg_CoF_tests[folder])):
        # print(  avg_CoF_tests[folder][jj]==sg_smoothing.columns[ii])
        if  avg_CoF_tests[folder][jj]==sg_smoothing.columns[ii]:
            count += 1
            print(avg_CoF_tests[folder][jj])
            good_test_in_folder_list.append(avg_CoF_tests[folder][jj])
print(count)

good_test_df = sg_smoothing.loc[:, good_test_in_folder_list]

# for good_test in avg_CoF_tests[folder]:
#     good_test_df = pd.concat([good_test_df, sg_smoothing[good_test]], ignore_index=True)
print(good_test_df)

with pd.ExcelWriter(f'{folder}.xlsx', engine='openpyxl', mode='a') as writer: 
    good_test_df.to_excel(writer, sheet_name='Good_Tests', index=False)
    # good_test_df.to_excel(writer, sheet_name='Good Tests', index=False)


In [None]:
print('This block of code will reorganize the "total" sheet into a friction table based on the user input which will \n include percent reduction for ease of integration into origin.')
total_dataframe = pd.read_excel(f'{folder}.xlsx', sheet_name='Total')


def input_table_title(): # recursive function to get the table title from the user
    table_title = input('Would you like to create a friction table based on force or speed? (only type f for force or s for speed): ')
    if table_title != 'f' and table_title != 's':
        print('Invalid input. Please enter f or s')
        return input_table_title() # recursive call to get the user input
    else:
        return table_title
    
def input_include_percent(): # recursive function to get the user input for including the percentages in the friction table
    include_percent = input('Would you like to include the percentages in the names in the friction table? (y/n): ')
    if include_percent != 'y' and include_percent != 'n':
        print('Invalid input. Please enter y or n')
        return input_include_percent()
    else:
        return include_percent

def input_ref_sample():
    ref_sample = input('Please enter the reference sample for the friction table: (sample that you are measuring the percent reduction with respect to)' \
                       'If you put included the percentage in the names please include the percentage in the reference name as well: ')
    if ref_sample not in total_dataframe['Name'].values:
        print('Invalid input. Please enter a valid sample name')
        return input_ref_sample()
    return ref_sample

table_title = input_table_title()
include_percent = input_include_percent()

if include_percent == 'y':
    total_dataframe['Name'] = total_dataframe['Percent'].astype(str) + '%' + ' ' + total_dataframe['Name']

reference_sample = input_ref_sample()

possible_params = {'f': 'Force', 's': 'Speed'} # dictionary storing the possible parameters in total sheet and their units
dataframe_dict = {} # dictionary to store the dataframes for each unique table title
other_unique_params = {} # dictionary to store the unique parameter values for the other parameter(s) in the possible params dictionary


unique_param_titles = total_dataframe[possible_params[table_title]].unique() # unique parameter table title values stored as a list

for (key, value) in possible_params.items(): # loop through the possible params dictionary
    if key == table_title: # ignore the table title sample
        continue
    else:
        other_unique_params[value] = total_dataframe[value].unique() # store the unique parameter values for the other parameter(s) in the other unique params dictionary

for param in unique_param_titles:
    param_df = total_dataframe[total_dataframe[possible_params[table_title]] == param]
    param_df = param_df.drop(columns=[possible_params[table_title]])
    final_df = pd.DataFrame()
    percent_reduction_df = pd.DataFrame()
    for other_param in other_unique_params: # entering the other unique params dictionary using the key value
        for param2 in other_unique_params[other_param]: # for loop getting all unique params for all non table title parameters
            temp_df = param_df[param_df[other_param] == param2]
            temp_df = temp_df.drop(columns=[other_param])
            
            if include_percent == 'n':
                temp_df = temp_df.drop(columns=['Percent'])

            reference_value = temp_df[temp_df['Name'] == reference_sample]['CoF_avg'].values[0]
            percent_reduction_df = temp_df['CoF_avg'].apply(lambda x: 'REF' if x == reference_value else ((reference_value - x) / reference_value) * 100)

            temp_df = temp_df.rename(columns={'CoF_avg': f'{param2}', 'CoF_std': f'{param2} STDEV'})
            temp_df.insert(2, f'% Percent Reduction {param2}', percent_reduction_df)

            # Separate the reference sample row
            reference_row = temp_df[temp_df['Name'] == reference_sample]
            # Drop the reference sample row from temp_df to avoid duplication
            temp_df = temp_df[temp_df['Name'] != reference_sample]
            # Concatenate the reference sample row at the top
            temp_df = pd.concat([reference_row, temp_df], ignore_index=True)

            if final_df.empty:
                final_df = temp_df
            else:
                final_df = pd.merge(final_df, temp_df, how='left', on='Name')
    dataframe_dict[f'{param} Organized Table'] = final_df

for (key, value) in dataframe_dict.items():
    print(f'{key}\n{value.to_string()}\n\n')

with pd.ExcelWriter(f'{folder}.xlsx', engine='openpyxl', mode='a') as writer:
    for (key, value) in dataframe_dict.items():
        value.to_excel(writer, sheet_name=f'{key}', index=False)

# df['% Reduction 10N'] = df['10N'].apply(lambda x: 'REF' if x == reference_value else ((reference_value - x) / reference_value) * 100)
