In [1]:
def cleanup_data_values_return_float(data_in, on_error_return_runtimerror=True, on_wrong_datatype_return_errormessage_string=False):
    '''
    Description:
        Checks raw data quality, convert if required
        Remove and converts commas and dots, to assure decimal point usage but removal of thousand seperators
        Raises errors if datatype not useable

    Parameters:
        data_in = raw data in, required, no default
            raw data in
        on_error_return_runtimerror = boolean return runtimeerror or not, default True
            Boolean to determine whether program should give runtimeerrors upon encountering error; wrong datatype (default True)
        on_wrong_datatype_return_errormessage_string = boolean return errormessage string, default False
            Boolean to determine whether program should return string upon encountering error; wrong datatype (default False)

    Returns:
        Floating point number of input
        Errormessage in case wrong datatype when on_wrong_datatype_return_errormessage_string is True (default False)
        
    Raises:
        RuntimeError in case wrong datatype when on_error_return_runtimerror is set to True (default True)
    '''
    message_error_string = ' is the datatype value supplied, but it must be a string datatype'

    if type(data_in) is str:
        # assure all commas become dots
        data_in = data_in.replace(',','.') 
        # assure we only remain with the most right dot
        if data_in.count('.') > 1: 
            data_in = data_in.replace('.','', data_in.count('.') - 1)

    try: 
        # convert to float
        data_in = float(data_in) 
    except:
        # errorhandling
        if on_error_return_runtimerror:
            if type(data_in) is bool:
                # this will stop the program!
                raise RuntimeError("Boolean " + message_error_string)
            if type(data_in) is str:
                # this will stop the program!
                raise RuntimeError("String " + message_error_string)
        elif on_wrong_datatype_return_errormessage_string:
            # return a string that can be used for storage in database or identification purposes
            return str(type(data_in)) + message_error_string

    return data_in



def cleanup_dictionary_values_convert_numbers_to_float_and_indicated_strings_to_function(dict, string_indicator):
    '''
    Description:
        cleans up the dictionary values
        remove and converts commas and dots, to assure decimal point usage but removal of thousand seperators
        floating point number if number type
        in case there is a string (indicated by startswith), the string is converted to a function if possible
        does not raise errors if datatype not useable but just continues

    Parameters:
        dict = dictionary it relates to, required, no default
        string_indicator = indicative starting part of a string to indicate it is a function, required, no default

    Returns:
        cleaned up dictionary 

    Depending on other functions:
        depends on function cleanup_data_values_return_float
    '''
    for item in list(dict.keys()):
        try:
            # cleanup the style of numbering and decimal points
            dict[item] = cleanup_data_values_return_float(dict[item])
        except:
            try:
                if type(dict[item]) is str and dict[item].startswith(string_indicator):
                    # updates value to a 'pointer' of the actual formula
                    dict[item] = eval(dict[item])
            except:
                pass
            pass
    return



def remove_character_from_string(input_string, character_to_remove='.', on_wrong_datatype_return_errormessage_string=False):
    '''
    Description:
        removes a given character from an input string
        checks whether data type is string otherwise returns input

    Parameters:
        input_string = input string, required, no default
        character_to_remove = input character to be removed, default is dot
        on_wrong_datatype_return_errormessage_string = Boolean to determine whether program should return string upon encountering error; wrong datatype (default False)

    Returns:
        input string, cleaned up if possible
        errormessage if on_wrong_datatype_return_errormessage_string is set to True (default False)
    '''
    message_error_string = ' is the datatype value supplied, but it must be a string datatype'

    if type(input_string) == str:
        # remove character from string and replace with nothing
        return input_string.replace(character_to_remove,'')
    else:
        # errorhandling
        if on_wrong_datatype_return_errormessage_string:
            # return a string that can be used for storage in database or identification purposes
            return str(type(input_string)) + message_error_string

    return input_string



def convert_units_from_dict(dict_to_use, unit_subject, unit_system, unit_specs, data_in, on_error_return_runtimerror=True, on_wrong_datatype_return_string=False):
    '''
    Description:
        retrieves conversion units from dictionary to be multiplied
        apply a function in case it is a function
        multiplies a factor if it is a factor

    Parameters:
        dict_to_use = dictionary used to retrieve value
        unit_subject = describes the subject of unit, e.g. temperature, length
        unit_system = describes the unit systeml USCS, Imperial, SI etc
        unit_specs = specificies the exact unit used
        on_error_return_runtimerror = boolean to determine whether program should give runtimeerrors upon encountering error; conversion not available in dictionary (default True)
        on_wrong_datatype_return_errormessage_string = boolean to determine whether program should return string upon encountering error; wrong datatype (default False), might be handy for storing an error in the dataset
        
    Returns:
        converted value of data_in
    
    Raises:
        RuntimeError when retrieval_value not in dictionary, when on_error_return_runtimerror is set to True (default False)
    '''
    message_error_string = ' is the datatype value supplied, but it must be a floating point or integer datatype'

    retrieval_value = unit_subject + '_' + unit_system + '_' + unit_specs
    
    if on_error_return_runtimerror:            
        if retrieval_value not in dict_to_use:
            # this will stop the program!
            raise RuntimeError("Unit "  + retrieval_value + " not found in dictionary. Please update data or dictionary.")

    try:
        x = dict_to_use[retrieval_value]
        if callable(x):
             # applies the unit transform function as stated in dictionary
            return x(data_in)
        else:
            # multiple with unit transform amount (e.g. conversion rate)
            return data_in * x 
    except:
        # errorhandling
        if on_wrong_datatype_return_string:
            # return a string that can be used for storage in database or identification purposes
            return str(type(data_in)) + message_error_string
        else:
            pass
    return




def import_dictionary_from_csv(csv_filepath='.\conv.csv', csv_seperator=';', string_indicator_for_unit_conversion_formula='lambda'):
    '''
    Description:
        Imports a csv file and transforms it to a dictionary
        Also assures that formulas gets stored as python formulas, use the string_indicator_for_unit_conversion_formula parameter 

    Parameters:
        csv_filepath = pathname as string (default = '.\conv.csv')
            describes the filepath to the to be imported dictionary csv
        csv_seperator = string (default = ';')
            the seperator used in the csv file for column identification purposes
        string_indicator_for_unit_conversion_formula
            tThis gets parsed to function cleanup_dictionary_values_convert_numbers_to_float_and_indicated_strings_to_function
            please refer to documentation in that function for further insights (prevent error in description text)
    
    Returns:
        Cleaned up dictionary with conversion values or formulas to preferred system

    Depends on:
        cleanup_dictionary_values_convert_numbers_to_float_and_indicated_strings_to_function
    '''
    # Read csv to dictionary
    import csv
    reader = csv.reader(open(csv_filepath, 'r'), delimiter=csv_seperator)
    # start with empty dictionary
    dict_unit_csv = {}

    for k, v in reader:
        # add key and value to dict
        dict_unit_csv[k] = v

    # cleanup dictionary values
    cleanup_dictionary_values_convert_numbers_to_float_and_indicated_strings_to_function(dict_unit_csv, string_indicator_for_unit_conversion_formula)

    return dict_unit_csv



def import_new_supplier(dict_to_use, supplier_name, csv_filepath = '.\Data.csv', csv_has_header = None, csv_seperator = ';', csv_encoding = 'UTF-8', csv_low_memory = False, columns_not_useable = [range(1,7), range(8,9), range(10,14)], column_with_product_id = 0, system_name_for_product_id = '12NC', product_id_seperator_to_be_removed='',
row_num_unit_subject = 0, row_num_unit_system = 1, row_num_unit_specs = 2, row_num_data_starts = 6):
    '''
    Description:
        Facilitates the import of a new supplier throug a CSV file and cleans the dataset:
            1. Transforms CSV to Pandas DataFrame
            2. Removes unneccessary columns
            3. Converts numbers to correct unit, cleans up string values
            4. Removes metadata rows from dataframe

    Parameters:
        - Dictionary:
            dict_to_use = dictionary used for unit transformation, is required to be given (no default)

        - Supplier:
            supplier_name = string containing the name of the supplier, required (no default)

        - Standard python pandas dataframe input parameters:
            csv_filepath = pathname as string (default = '.\Data.csv')
                describes the filepath of the to be imported dataset
            csv_has_header = int, list of int or None (default = None)
                indicates whether dataset contains a header for column name information
            csv_seperator = string (default = ';')
                the seperator used in the csv file for column identification purposes
            csv_encoding = string (default = 'UTF-8')
                encoding system used in the csv
            csv_low_memory = boolean (default = False)
                deterement to let go memory restrictions for large datasets

        - Dataset specifics:
        These parameters describe information that must be either stored in the dataset (metadata) or can be removed (unnecessary columns, rows with metadata) 
            columns_not_useable = list (default = [range(1,7), range(8,9), range(10,14)])
                the numbers here indicate the columns that will immediately be removed to reduce the size of the dataframe
            column_with_product_id = (default = 0)
                indicates the column where product identification systems are stored (barcode numbers, 12NC etc.)
            product_id_seperator_to_be_removed = default = '' (nothing to be removed)
                character of a string that can be removed from the product_id (such as a dot)
            system_name_for_product_id = (default = '12NC')
                the system used for product_id in the company or organization
            row_num_unit_subject = (default = 0)
                metadata for subject of unit (e.g. temperature), must be same as the unit transformation keys used in the dictionary for unit transformation
            row_num_unit_system = (default = 1)
                metadata for unit system (e.g. USCS), must be same as the unit transformation keys used in the dictionary for unit transformation  
            row_num_unit_specs = (default = 2)
                metadata of specifics about the unit (e.g. Fahrenheit), must be same as the unit transformation keys used in the dictionary for unit transformation
            row_num_data_starts = (default = 6)
                all rows above contain metadata or not interesting information so they can be removed

    Returns:
        Cleaned up pandas dataframe with converted values to SI system

    Depends on:
        cleanup_data_values_return_float
        convert_units_from_dict
        remove_dots_from_string

    Adjustments inside code:
        There are a few booleans value where the other functions get called that might help during error handling:
            cleanup_data_values_return_float:   on_error_return_runtimerror=False
            convert_units_from_dict:    on_error_return_runtimerror=True, on_wrong_datatype_return_string=True
            remove_dots_from_string:    on_wrong_datatype_return_errormessage_string=False)
        Please refer to documentation of individual functions for further information
    '''
    import pandas as pd
    df_import_supplier = pd.read_csv(csv_filepath, header=csv_has_header, sep=csv_seperator, encoding=csv_encoding, low_memory=csv_low_memory)

    # drop unneccessary columns
    for col in columns_not_useable:
        try:
            df_import_supplier.drop(columns=col, axis=1, inplace=True)
        except:
            pass

    # clean up the database and by going over each column in the dataframe as each column might require different work
    for col in df_import_supplier.columns.tolist():
        # use metadata information from dataset to buildup the unit conversion information
        # this information is also used for column names later in code
        try:
            unit_subject = df_import_supplier[col].iloc[row_num_unit_subject].lower()
            unit_system = df_import_supplier[col].iloc[row_num_unit_system].upper()
            unit_specs = df_import_supplier[col].iloc[row_num_unit_specs]
        except:
            if col == column_with_product_id:
                unit_subject = system_name_for_product_id
            else:
                unit_subject = 'column datatype properties for ' + str(col) + ' are not given by database please adjust'

        # clean up all the individual data values to prepare them for storing to main database
        if col != column_with_product_id:
            # if not column with product_id info it must be convertable information
            df_import_supplier[col] = df_import_supplier[col].map(lambda x : cleanup_data_values_return_float(x, on_error_return_runtimerror=False))
            df_import_supplier[col] = df_import_supplier[col].map(lambda x : convert_units_from_dict(dict_to_use, unit_subject, unit_system, unit_specs, x, on_error_return_runtimerror=True, on_wrong_datatype_return_string=False))
        else:
            df_import_supplier[col] = df_import_supplier[col].map(lambda x: remove_character_from_string(x, character_to_remove=product_id_seperator_to_be_removed, on_wrong_datatype_return_errormessage_string=True))

        # rename columns
        df_import_supplier.rename(columns={col: unit_subject}, inplace=True)

    # drop the rows which contain the metadata info
    df_import_supplier.drop(df_import_supplier.index[range(0, row_num_data_starts)], inplace=True)

    # add column to indicate which supplier it is
    df_import_supplier['supplier'] = supplier_name

    return df_import_supplier

In [1]:
# import a new supplier and perform unit transformation and store to a preferred storage datatype

# hyperparameters, located here for easy adjustment
filepath_of_csv_unit_conversion_list = '.\conversion_of_units.csv'
filepath_of_csv_unit_of_database = '.\DataA.csv'
filepath_for_storing_the_cleaned_database = r'.\converted_db_of_supplier'
system_name_for_product_id = '12NC'
supplier_name = 'Company_A'
storing_filesystem = 'csv' # options: csv, sql, hdf, parquet and feather


# import libraries
import pandas as pd
if storing_filesystem == 'sql':
    from sqlalchemy import create_engine
elif storing_filesystem == 'parquet':
    ! pip install pyarrow # will auto import upon function call


# import the unit conversion csv to dictionary
unit_conversion_dictionary_file = import_dictionary_from_csv(
        csv_filepath=filepath_of_csv_unit_conversion_list, 
        csv_seperator=';', 
        string_indicator_for_unit_conversion_formula='lambda'
    )


# import a supplier
df_import = import_new_supplier(
        dict_to_use=unit_conversion_dictionary_file, 
        supplier_name=str(supplier_name), 
        csv_filepath=filepath_of_csv_unit_of_database, 
        csv_has_header=None, 
        csv_seperator=';', 
        csv_encoding='UTF-8', 
        csv_low_memory=False, 
        columns_not_useable=[range(1,7), range(8,9), range(10,14)], 
        column_with_product_id=0, 
        system_name_for_product_id=system_name_for_product_id, 
        product_id_seperator_to_be_removed='.', 
        row_num_unit_subject=0, 
        row_num_unit_system=1, 
        row_num_unit_specs=2, 
        row_num_data_starts=6
    )


# store file to database
filepath_for_storing_the_cleaned_database = (filepath_for_storing_the_cleaned_database + '_' + str(supplier_name))
storing_filesystem = str(storing_filesystem.lower())

if storing_filesystem == 'csv':
    # csv, https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html#pandas.DataFrame.to_csv
    # options for compression: csv, gzip, zip, bz2, xz. 
    # compression = infer, takes from filename
    df_import.to_csv(
        (filepath_for_storing_the_cleaned_database + '.csv'),
        index=False, 
        header=True, 
        sep=';', 
        mode='w',
        compression='infer',
        encoding='UTF-8',
        decimal='.'
    )

elif storing_filesystem == 'sql':
    # sql, https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html#pandas.DataFrame.to_sql
    from sqlalchemy import create_engine
    engine = create_engine('sqlite://', echo=False)
    filepath_for_storing_the_cleaned_database = filepath_for_storing_the_cleaned_database.replace('.', '')
    filepath_for_storing_the_cleaned_database = filepath_for_storing_the_cleaned_database.replace('\\', '')
    df_import.to_sql(
        (filepath_for_storing_the_cleaned_database), 
        con=engine,
        if_exists='replace',
        index_label='id',
    )

elif storing_filesystem == 'hdf':
    # hdf, https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_hdf.html#pandas.DataFrame.to_hdf
    df_import.to_hdf(
        (filepath_for_storing_the_cleaned_database + '.h5'),
        key=system_name_for_product_id,
        mode='w',
        complevel=5,
        complib='zlib',
        append=False,
        format='table',
        errors='strict',
        encoding='UTF-8',        
    )

elif storing_filesystem == 'parquet':
    # parquet, https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html
    df_import.to_parquet(
        (filepath_for_storing_the_cleaned_database),
        engine='auto',
        compression='snappy',
        index=None,
    )

elif storing_filesystem == 'feather':
    # parquet, https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_feather.html
    df_import.to_parquet(
        (filepath_for_storing_the_cleaned_database + '.feather'),
    )

else:
    # csv is the main goto storage system for this project
    df_import.to_csv(
        (filepath_for_storing_the_cleaned_database + '.csv'),
        index=False, 
        header=True, 
        sep=';', 
        mode = 'w',
        compression = 'infer',
        encoding='UTF-8',
        decimal = '.'
    )

df_import.head()

Unnamed: 0,12NC,temperatures,mass,supplier
6,402255334708,15.0,89.811289,Company_A
7,402258698328,1.111111,183.70491,Company_A
8,402249767239,25.0,698.985842,Company_A
9,402235950496,55.0,311.617958,Company_A
10,402264653803,85.555556,306.17485,Company_A


In [1]:
# Merge a new supplier to the main database and store it
import pandas as pd

filepath_of_csv_of_main_database = '.\main_database.csv'
filepath_of_csv_of_new_supplier_cleaned_database = r'.\converted_db_of_supplier_Company_B.csv'

# read csv files
df_main = pd.read_csv(
        filepath_of_csv_of_main_database,
        header=0, 
        sep=';', 
        encoding='UTF-8', 
        low_memory=False
    )
    
df_import = pd.read_csv(
        filepath_of_csv_of_new_supplier_cleaned_database, 
        header=0, 
        sep=';', 
        encoding='UTF-8', 
        low_memory=False
    )

# merge, https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html
df_main = pd.concat([df_main, df_import], ignore_index=True, sort=False)

# store file to database
df_main.to_csv(
        filepath_of_csv_of_main_database,
        index=False, 
        header=True, 
        sep=';', 
        mode = 'w',
        compression = 'infer',
        encoding='UTF-8',
        decimal = '.'
    )

df_main.head(10)

Unnamed: 0,12NC,temperatures,mass,supplier
0,402255334708,15.0,89.811289,Company_A
1,402258698328,1.111111,183.70491,Company_A
2,402249767239,25.0,698.985842,Company_A
3,402235950496,55.0,311.617958,Company_A
4,402264653803,85.555556,306.17485,Company_A
5,402210410701,13.888889,740.262748,Company_A
6,402262421235,82.777778,406.872356,Company_A
7,402224951198,16.666667,759.76722,Company_A
8,402254974878,38.888889,801.044125,Company_A
9,402257850893,42.222222,137.438488,Company_A


In [1]:
# Self-made Dictionary
# All conversions are made to SI units using https://en.wikipedia.org/wiki/United_States_customary_units#Units_of_length

dict_unit_hardcoded = {
    # distance to Meter (SI) using * multiplication to go to SI
    'distance_SI_km':0.001, # kilometer
    'distance_SI_m':1, # meter
    'distance_SI_cm':100, # centimeter
    'distance_SI_mm':1000, # millimeter
    'distance_USCS_mi.':1609.344, # miles
    'distance_USCS_ft':0.3048, # feet
    'distance_USCS_in':0.0254, # inch
    # volume to Liter (SI) using * multiplication to go to SI
    'volume_USCS_cu_in':0.016387064, # cubic inch
    'volume_USCS_cu_ft':28.316846592, # cubic feet
    'volume_USCS_cu_yd':764.554857984, # cubic yard
    'volume_USCS_bbl':158.987294928, # oil barrel
    'volume_SI_L':1, # Liter
    # temperatures
    'temperatures_USCS_°F': lambda x : ((5/9) * (x - 32)), # Fahrenheit to C
    'temperatures_USCS_F': lambda x : ((5/9) * (x - 32)), # Fahrenheit to C
    'temperatures_SI_°K': lambda x : (x - 273.15), # Kelvin
    'temperatures_SI_K': lambda x : (x - 273.15), # Kelvin
    'temperatures_SI_°C': 1, # Celsius
    'temperatures_SI_C': 1, # Celsius
    # weights
    'mass_USCS_lb': 0.45359237, # Pounds
    'mass_SI_kg': 1, # Kilogram
    'mass_SI_g': 1000, # grams
}

In [None]:
## From below is only draft code, used to test the libraries. Please note that errors in code are possible, as the code is not used anymore only for examplory purposes ##

In [None]:
## From below is only draft code, used to test the libraries. Please note that errors in code are possible, as the code is not used anymore only for examplory purposes ##

In [None]:
## From below is only draft code, used to test the libraries. Please note that errors in code are possible, as the code is not used anymore only for examplory purposes ##

In [None]:
'''
#######
#   PintPy
# convert_pintpy(1,"meter","cm") # does not work
ureg.meter
#   PyPi: Unit-converter
# #convert_unit_converter(100, kilometres, miles) # does not work
#   PyPi: Workdays
#   PyPi: BusinessHours
#######
'''

from datetime import datetime
start_time = datetime.now() # for time loop
# START TIME LOOP #
# END OF TIME LOOP #
time_elapsed = datetime.now() - start_time
print('Time elapsed (h:m:s.ms) {}'.format(time_elapsed))

In [None]:
from unit_converter.converter import convert, converts
convert('2.78 daN*mm^2', 'mN*µm^2')
Decimal('2.78E+10')

converts('2.78 daN*mm^2', 'mN*µm^2')
#'2.78E+10'

converts('78 min', 'h')
#'1.3'

converts('52°C', '°F')
#'125.6'

converts('120 km*h^-1', 'mile*h^-1')
#'74.56454306848007635409210214'

In [31]:

# General importing
import pandas as pd
from unit_converter.converter import convert, converts # copnvert returns decimal, converts returns string

dict_unit_for_pypi_unit_converter = {
    '°C':'°C',
    '°F':'°C',
    'lb':'kg'
}

# import
df_import_supplier_A = pd.DataFrame()
df_import_supplier_A = pd.read_csv("Data.csv", header=None, sep=';', encoding='UTF-8', low_memory=False)

# Clean up the database
# drop column
df_import_supplier_A = df_import_supplier_A.drop(columns=range(1,7), axis=1)
df_import_supplier_A = df_import_supplier_A.drop(columns=range(8,9), axis=1)
df_import_supplier_A = df_import_supplier_A.drop(columns=range(10,14), axis=1)

df_products = pd.DataFrame()

for col in df_import_supplier_A.columns.tolist():
    # Define the types as used in columns derived from database
    try:
        unit_subject = df_import_supplier_A[col].iloc[0].lower()
        unit_system = df_import_supplier_A[col].iloc[1].upper()
        unit_specs = df_import_supplier_A[col].iloc[2]
    except:
        unit_subject = "12NC"
    
    # Add columns and convert in place
    new_col_name = str('Supplier A '+ unit_subject)
    df_products[new_col_name] = df_import_supplier_A[col].iloc[6:df_import_supplier_A.shape[0]]
    try: 
        if col > 0 :
            df_products[new_col_name] = df_products[new_col_name].apply(lambda x : convert(str(x + '' + unit_specs), dict_unit_for_pypi_unit_converter[unit_specs]))
    except:
        df_products[new_col_name] = None
        pass

df_products = df_products.set_index("Supplier A 12NC")
df_products.head()

Unnamed: 0_level_0,Supplier A temperatures,Supplier A mass
Supplier A 12NC,Unnamed: 1_level_1,Unnamed: 2_level_1
402.255.334.708,15.0,
402.258.698.328,1.1111111111111112,
402.249.767.239,25.0,
402.235.950.496,55.0,
402.264.653.803,85.55555555555556,


In [5]:
########
# PyPi: Unit-Converter https://pypi.org/project/unit-convert/
########

dict_unit_for_pypi_unit_convert = {
    'fahrenheit':'celsius',
    'F':'celsius',
    'lb':'kg'
}

def convert_unit_converter(unit_in, x):
    from unit_convert import UnitConvert
    unit_out = dict_unit_for_pypi_unit_convert[unit_in]
    
    print(unit_in)
    print(unit_out)

    unit_in = 'fahrenheit'
    attrs = {unit_in: x}
    return getattr(UnitConvert(**attrs), unit_out)

'''
# Examplary code how to use pypi:
# Yards + kilometres to miles
x = UnitConvert(yards=136.23, kilometres=60).miles
print(x)
# Bytes to terabytes
x = UnitConvert(b=19849347813875).tb
print(x)
'''

# General importing
import pandas as pd

# import
df_import_supplier_A = pd.DataFrame()
df_import_supplier_A = pd.read_csv("Data.csv", header=None, sep=';', low_memory=False)

# Clean up the database
# drop column
df_import_supplier_A = df_import_supplier_A.drop(columns=range(1,7), axis=1)
df_import_supplier_A = df_import_supplier_A.drop(columns=range(8,9), axis=1)
df_import_supplier_A = df_import_supplier_A.drop(columns=range(10,14), axis=1)

for col in df_import_supplier_A.columns.tolist():
    # Define the types as used in columns derived from database
    try:
        unit_subject = df_import_supplier_A[col].iloc[0].lower()
        unit_system = df_import_supplier_A[col].iloc[1].upper()
        unit_specs = df_import_supplier_A[col].iloc[2]
    except:
        unit_subject = "12NC"
    
    # Add columns and convert in place
    new_col_name = str('Supplier A '+ unit_subject)
    df_products[new_col_name] = df_import_supplier_A[col].iloc[6:df_import_supplier_A.shape[0]]
    if col > 0 :
        df_products[new_col_name] = df_products[new_col_name].apply(lambda x : convert_unit_converter(unit_specs, x))

df_products = df_products.set_index("Supplier A 12NC")
df_products.head()

def convert_unit_converter_old(data_in, unit_in, unit_out):
    from unit_convert import UnitConvert
    return(UnitConvert(unit_in=data_in).unit_out)

NameError: name 'df_products' is not defined

In [51]:
# Convert Units
########
# PintPy: https://pint.readthedocs.io/en/0.11/
## Error: currently this does not want to work

import pint
ureg = pint.UnitRegistry() 

'''
# Examplary code how to use pintpy: 
print(3 * ureg.meter + 4 * ureg.cm)
'''

#print(0 * ureg.meter + 4 * ureg.ft)
print(0 * ureg.meter + 2 * ureg.ft)

pint_dict = {
    'meter' : ureg.meter,
    'feet' : ureg.ft
}

def convert_pintpy(data_in, unit_in, unit_out):
    ureg = pint.UnitRegistry()

    unit_in = "ureg." + unit_in # this string conversion seems to create error
    unit_out = "ureg." + unit_out # but how to create this variable otherwise?
    out = (0 * unit_out + data_in * unit_in)

    return(out)

0.6095999999999999 meter
