# Setup

## Pip install

In [1]:
%pip install "labelbox[data]" --quiet
%pip install -U kaleido  --quiet # for saving the still figures
%pip freeze | grep matplotlib  # get version
%pip install plotly==5.7.0.    # need 5.7.0, not 5.5, so I can use ticklabelstep argument
# Don't forget to restart runtime after installing

[K     |████████████████████████████████| 165 kB 5.5 MB/s 
[K     |████████████████████████████████| 10.9 MB 27.3 MB/s 
[K     |████████████████████████████████| 6.3 MB 38.0 MB/s 
[?25h  Building wheel for pygeotile (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 79.9 MB 157 kB/s 
[?25hmatplotlib==3.2.2
matplotlib-inline==0.1.3
matplotlib-venn==0.11.7
Collecting plotly==5.7.0.
  Downloading plotly-5.7.0-py2.py3-none-any.whl (28.8 MB)
[K     |████████████████████████████████| 28.8 MB 112 kB/s 
Installing collected packages: plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 5.5.0
    Uninstalling plotly-5.5.0:
      Successfully uninstalled plotly-5.5.0
Successfully installed plotly-5.7.0


## Base imports


In [1]:
import os
import sys
print(sys.version)
import json
import numpy as np
import pandas as pd
import scipy
import scipy.stats

from PIL import Image
import requests

import labelbox
#from labelbox.data.annotation_types import Geometry

import IPython.display
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px

3.7.13 (default, Apr 24 2022, 01:04:09) 
[GCC 7.5.0]


In [48]:

notebook_filename = requests.get("http://172.28.0.2:9000/api/sessions").json()[0]["name"]

# Avoids scroll-in-the-scroll in the entire Notebook
def resize_colab_cell():
  display(IPython.display.Javascript('google.colab.output.setIframeHeight(0, true, {maxHeight: 10000})'))
get_ipython().events.register('pre_run_cell', resize_colab_cell)


#@markdown ### func `def get_path_to_save(...):`
def get_path_to_save(plot_props:dict=None, file_prefix="", save_filename:str=None, save_in_subfolder:str=None, extension="jpg", create_folder_if_necessary=True):
    """
    Code created myself (Rahul Yerrabelli)
    """
    replace_characters = {
        "$": "",
        "\\frac":"",
        "\\mathrm":"",
        "\\left(":"(",
        "\\right)":")",
        "\\left[":"[",
        "\\right]":"]",
        "\\": "",
        "/":"-",
        "{": "(",
        "}": ")",
        "<":"",
        ">":"",
        "?":"",
        "_":"",
        "^":"",
        "*":"",
        "!":"",
        ":":"-",
        "|":"-",
        ".":"_",
    }

    # define save_filename based on plot_props
    if save_filename is None:
        save_filename = "unnamed"

    #save_path = f"../outputs/{notebook_filename.split('.',1)[0]}"
    save_path = [
                 "outputs",
                f"{notebook_filename.split('.',1)[0]}",
                ]
    if save_in_subfolder is not None:
        if isinstance(save_in_subfolder, (list, tuple, set, np.ndarray) ):
            save_path.append(**save_in_subfolder)
        else:  # should be a string then
            save_path.append(save_in_subfolder)
    save_path = os.path.join(*save_path)

    if not os.path.exists(save_path) and create_folder_if_necessary:
        os.makedirs(save_path)
    return os.path.join(save_path, file_prefix+save_filename+"."+extension)
    #plt.savefig(os.path.join(save_path, save_filename+"."+extension))


In [49]:
#@title ## Mount google drive and import my code

mountpoint_folder_name = "gdrive"  # can be anything, doesn't have to be "drive"
project_path_within_drive = "PythonProjects/SpeculumAnalysis" #@param {type:"string"}
#project_path_within_drive = "UIUC ECs/Rahul_Ashkhan_Projects/SpeculumProjects_Shared/Analysis" #@param {type:"string"}
project_path_full = os.path.join("/content/",mountpoint_folder_name,
                        "MyDrive",project_path_within_drive)
try:
    import google.colab.drive
    import os, sys
    # Need to move out of google drive directory if going to remount
    %cd
    # drive.mount documentation can be accessed via: drive.mount?
    #Signature: drive.mount(mountpoint, force_remount=False, timeout_ms=120000, use_metadata_server=False)
    google.colab.drive.mount(os.path.join("/content/",mountpoint_folder_name), force_remount=True)  # mounts to a folder called mountpoint_folder_name

    if project_path_full not in sys.path:
        pass
        #sys.path.insert(0,project_path_full)
    %cd {project_path_full}
    
except ModuleNotFoundError:  # in case not run in Google colab
    import traceback
    traceback.print_exc()

<IPython.core.display.Javascript object>

/root
Mounted at /content/gdrive
/content/gdrive/MyDrive/Computer Backups/Rahul Yerrabelli drive/PythonProjects/SpeculumAnalysis


# Data

## Read in the collected data

#### Option 1: Read from labelbox

##### Set up labelbox
Works with LabelBox api, where I labeled all the distances

In [36]:
# Add your labelbox api key and project
# Labelbox API stored in separate file since it is specific for a labelbox 
#account and shouldn't be committed to git. Contact the 
# team (i.e. Rahul Yerrabelli) in order to access to the data on your own account.
with open("auth/LABELBOX_API_KEY.json", "r") as file:
  json_data = json.load(file)
API_KEY = json_data["API_KEY"]
del json_data   # delete sensitive info

PROJECT_ID = "cl2cept1u4ees0zbx6uan5kwa"
DATASET_ID_Glove = "cl2cerkwd5gtd0zcahfz98401"; DATASET_NAME_Glove = "SpeculumWithGlove"
DATASET_ID_Condom = "cl2hu1u8z019a0z823yl5f8gr"; DATASET_NAME_Condom = "SpeculumWithCondom"

client = labelbox.Client(api_key=API_KEY)
del API_KEY   # delete sensitive info
project = client.get_project(PROJECT_ID)
dataset_glove = client.get_dataset(DATASET_ID_Glove)
dataset_condom = client.get_dataset(DATASET_ID_Condom)
# Alternative way to get dataset
# dataset = next(client.get_datasets(where=(labelbox.Dataset.name == DATASET_NAME)))

# Below code is from labelbox tutorial
# Create a mapping for the colors
hex_to_rgb = lambda hex_color: tuple(
    int(hex_color[i + 1:i + 3], 16) for i in (0, 2, 4))
colors = {
    tool.name: hex_to_rgb(tool.color)
    for tool in labelbox.OntologyBuilder.from_project(project).tools
}

##### Get dataframe now that labelbox is set up

In [18]:
labels = project.label_generator()
labels = labels.as_list()
labels_df = pd.DataFrame([[
                           label.data.external_id, 
                           label.annotations[0].value.end.x - label.annotations[0].value.start.x, 
                           label.annotations[0].value.end.y - label.annotations[0].value.start.y, 
                           label.data.url, 
                           label.uid
                           ] 
                          for label in labels],
                         columns=["Filename","x","y","url", "Label ID"])
labels_df.to_csv("data/labels_df.csv")

<IPython.core.display.Javascript object>



### Option 2: Read from labelbox csv if already saved there from previous run

In [38]:
labels_df = pd.read_csv("data/labels_df.csv", index_col=0)

### Read trial data from saved excel sheet

In [204]:
def handle_vertical_ht(x):
    if x=="BROKE":
        return 0
    elif type(x)==str and x.lower() in ["n/a","na","nan"]:
        return np.nan
    else:
        return float(x)

# Made Trial a str because it is not really being used as a numeric variable - better for plotting as it becomes a discrete variable instead of continuous (i.e. for color legend)
speculum_df_raw = pd.read_excel("data/SpeculumData.xlsx", index_col=0, sheet_name="AllTrials",
                                dtype={"Order": np.int32, "Spec Ang": np.int32, "Spec Ht": np.int32, 
                                       #"Vertical Height": np.float64, 
                                       "Trial": str, "Filename": str, "Speculum Type": str},
                                converters={"Vertical Height": handle_vertical_ht},
                                )    
speculum_df = speculum_df_raw.dropna(axis="index", subset=["Filename"])   # Dropped the rows with failed trials

<IPython.core.display.Javascript object>

In [205]:
print([a for a in speculum_df_raw["Vertical Height"]])
speculum_df_raw[["Vertical Height","mmHg"]]

<IPython.core.display.Javascript object>

[2.7, 2.7, 2.7, 2.7, 2.7, 2.7, 3.2, 3.2, 3.2, 3.2, 3.2, 3.2, 2.7, 2.7, 2.7, 2.7, 2.7, 2.7, nan, nan, nan, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 3.2, 3.2, 3.2, 3.2, 3.2, 3.2, 5.2, 5.2, 5.2, 5.2, 5.2, 5.2, nan, nan, 2.7, 2.7, 2.7, 2.7, 2.7, 2.7, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 5.1, 5.1, 5.1, 5.1, 5.1, 5.1, 2.75, 2.75, 2.75, 2.75, 2.75, 2.75, 3.2, 3.2, 3.2, 3.2, 3.2, 3.2, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.55, 3.55, 3.55, 3.55, 3.55, 3.55, 3.65, 3.65, 3.65, 3.65, 3.65, 3.65, 3.55, 3.55, 3.55, 3.55, 3.55, 3.55, 3.65, 3.65, 3.65, 3.65, 3.65, 3.65, 3.55, 3.55, 3.55, 3.55, 3.55, 3.55, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.55, 3.55, 3.55, 3.55, 3.55, 3.55, 3.65, 3.65, 3.65, 3.65, 3.65, 3.65, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 5.4]


Unnamed: 0_level_0,Vertical Height,mmHg
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2.7,0
2,2.7,40
3,2.7,80
4,2.7,120
5,2.7,160
...,...,...
177,4.1,80
178,4.1,120
179,4.1,160
180,4.1,200


### Combine labelbox and excel sheet, calculate relative value

In [341]:
df_long=pd.merge(left=speculum_df, right=labels_df, on="Filename")

glove_rows = df_long["Material Type"]=="Glove"
# The glove images got rotated 90 degrees. To fix this and clarify the directions of the opening, renaming the columns.
df_long.loc[ glove_rows,"wd"] = df_long.loc[ glove_rows].y
df_long.loc[ glove_rows,"ht"] = df_long.loc[ glove_rows].x
df_long.loc[~glove_rows,"wd"] = df_long.loc[~glove_rows].x
df_long.loc[~glove_rows,"ht"] = df_long.loc[~glove_rows].y
df_long = df_long.drop(columns=["x","y"])

df_long.head()

# Calculate relative value by dividing by the 0mmHg value
base_mmHg = 0 # mmHg
for ind in df_long["Order"].unique():
    df_long.loc[df_long["Order"]==ind,"wd_rel"]  = 1- df_long.loc[df_long["Order"]==ind].wd / df_long.loc[ (df_long["Order"]==ind) & (df_long["mmHg"]==base_mmHg) ].wd.item()
    df_long.loc[df_long["Order"]==ind,"ht_rel"]  = 1- df_long.loc[df_long["Order"]==ind].ht / df_long.loc[ (df_long["Order"]==ind) & (df_long["mmHg"]==base_mmHg) ].ht.item()
#df_long



<IPython.core.display.Javascript object>

## Data rearranging

### Get wide form

In [348]:
df_wide = df_long.pivot(index=
                        ["Order","Speculum Type","Size","Material","Material Type","Method","Spec Ang","Spec Ht","Trial","Vertical Height"], 
                        columns="mmHg", values=["wd_rel","ht_rel"]).reset_index("Vertical Height")

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Vertical Height,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel,ht_rel,ht_rel,ht_rel,ht_rel,ht_rel,ht_rel
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,mmHg,Unnamed: 9_level_1,0,40,80,120,160,200,0,40,80,120,160,200
Order,Speculum Type,Size,Material,Material Type,Method,Spec Ang,Spec Ht,Trial,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,White,M,Nitrile,Glove,Middle,5,0,1,2.7,0.0,0.2607,0.533074,0.649805,0.747082,0.828794,0.0,-0.099617,0.034483,0.042146,-0.383142,0.463602
2,White,M,Nitrile,Glove,Middle,5,0,2,3.2,0.0,0.241877,0.472924,0.581227,0.696751,0.758123,0.0,0.090535,0.209877,0.028807,0.403292,0.502058
3,White,M,Nitrile,Glove,Middle,5,0,3,2.7,0.0,0.132159,0.409692,0.638767,0.735683,0.753304,0.0,0.238739,0.279279,0.252252,0.301802,0.387387
8,White,M,Vinyl,Glove,Middle,3,0,1,1.3,0.0,0.04886,0.159609,0.247557,0.270358,0.263844,0.0,-0.341837,-0.311224,-0.285714,0.193878,0.214286
9,White,M,Vinyl,Glove,Middle,3,0,2,1.05,0.0,0.019868,0.069536,0.155629,0.377483,0.377483,0.0,0.0,-0.28125,-0.1875,0.375,0.4375
10,White,M,Vinyl,Glove,Middle,3,0,3,1.0,0.0,0.12766,-0.425532,-0.37234,-0.441489,-0.462766,0.0,-0.115385,-0.153846,0.0,0.615385,0.846154
11,White,S,Nitrile,Glove,Middle,5,0,1,2.75,0.0,0.380137,0.592466,0.722603,0.808219,0.869863,0.0,-0.32093,-0.269767,-0.204651,-0.111628,-0.018605
12,White,L,Nitrile,Glove,Middle,5,0,1,3.2,0.0,0.381132,0.603774,0.735849,0.822642,0.879245,0.0,0.041509,0.132075,0.218868,0.283019,0.369811
13,White,M,Nitrile,Glove,Two,5,0,1,5.2,0.0,0.474359,0.709402,0.854701,0.931624,0.987179,0.0,0.019231,0.067308,0.134615,0.189103,0.240385
16,White,S,Nitrile,Glove,Middle,5,0,2,2.7,0.0,0.453488,0.633721,0.883721,0.94186,0.985465,0.0,-0.366667,-0.186667,-0.213333,-0.086667,0.006667


### Order by set and the mmHg within that set (multiindex)

In [25]:
df_multiindex = df_long.set_index(["Order","mmHg"])
df_multiindex

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Speculum Type,Spec Ang,Spec Ht,Size,Material,Material Type,Method,Trial,Vertical Height,Filename,url,Label ID,wd,ht,wd_rel,ht_rel
Order,mmHg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,0,White,5,0,M,Nitrile,Glove,Middle,1,2.7,20220423_142023.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2cez4xr5ki20zagcvnuf7sk,257.0,261.0,0.000000,0.000000
1,40,White,5,0,M,Nitrile,Glove,Middle,1,2.7,20220423_142031.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2cf020i5j8u0zdfhye4ehjg,190.0,287.0,0.260700,-0.099617
1,80,White,5,0,M,Nitrile,Glove,Middle,1,2.7,20220423_142049.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2cf0jjk5jak0zdf8j436nbt,120.0,252.0,0.533074,0.034483
1,120,White,5,0,M,Nitrile,Glove,Middle,1,2.7,20220423_142054.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2cf0z834fgl0zbx37jcdzhz,90.0,250.0,0.649805,0.042146
1,160,White,5,0,M,Nitrile,Glove,Middle,1,2.7,20220423_142100.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2cez4r95jdf0zam6uu20850,65.0,361.0,0.747082,-0.383142
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36,40,White,3,0,,,,,3,4.1,20220423_172235.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2hvx3u007nd107h64biho7u,100.0,392.0,0.615385,-0.015544
36,80,White,3,0,,,,,3,4.1,20220423_172239.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2hvxqgj0a450z8304rfe8au,58.0,364.0,0.776923,0.056995
36,120,White,3,0,,,,,3,4.1,20220423_172242.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2hvy0dw0a1j0zard6uo47jm,30.0,335.0,0.884615,0.132124
36,160,White,3,0,,,,,3,4.1,20220423_172246.jpg,https://storage.labelbox.com/cl2ceiao35hbj0zah...,cl2hvyahh0a7s0z83elgpcx57,19.0,307.0,0.926923,0.204663


### Get aggregate df across trials

In [41]:
# Group by all the parameters that will be the same across different trials of the same object
consistent_cols = ["Speculum Type", "Spec Ang", "Spec Ht", "Size", "Material", "Material Type", "Method", "mmHg"]
aggregatable_cols = ["wd","ht","wd_rel","ht_rel", "Vertical Height"]
grouped_trials = df_long[consistent_cols+aggregatable_cols].groupby(consistent_cols)
#display(grouped_trials.describe())

def sem(x, ddof=1):   # ddof=1 to get sample standard deviation, not the population standard deviation (np's default)
    sem = np.std(x, ddof=ddof)/np.sqrt(len(x))

def nonnan(x):
    return x[~np.isnan(x)]

df_agg = grouped_trials.agg([np.mean, scipy.stats.sem, np.std, np.min, np.median, np.max, np.count_nonzero], ddof=1).reset_index()

df_agg_flat = df_agg.copy()
df_agg_flat.columns = [".".join(col).strip(".") for col in df_agg.columns.values]
#df_agg_flat

  keepdims=keepdims, where=where)
  ret = ret.dtype.type(ret / rcount)


# Plotting/Tables

## Plot specific sets of 6 images in a trial, matplotlib

In [None]:
np.array([9*3, 16*2/0.9])/95

array([3.        , 3.95061728])

#### Define image plotting function

In [28]:
def plot_combined_images(order_current, label_dict, df_long=df_long, do_save=True, do_print=False, dpi=None):
    # default dpi is 72
    nrows = 2
    ncols = 3
    filenames= list(df_long[df_long["Order"]==order_current].Filename)  # list converts form pd.Series
    if len(filenames)==0:
        return None
    else:
        assert len(filenames) == 6, f"For order_current={order_current}, the len(filenames)=={len(filenames)}, when it should be 6. filenames={filenames}"

    """
    labels = project.label_generator()
    labels = labels.as_list()
    for ind in range(5):  # skip first 5, empirically found it gets the image groups to line up
        label = next(labels)
        filename = label.data.external_id
        data_row = df_long.loc[df_long["Filename"]==filename].squeeze()  # squeeze removes the additional index dimension to make a 1D pandas series 
        data_row_str = [
                        f"'{data_row['Method']} finger' method" " " f"at {data_row['mmHg']}mmHg",
                        f"with {data_row["Size"]}. {data_row['Material']} glove," " " f"Trial #{data_row['Trial']}"
                        ]
        print(filename + "\t " + " ".join(data_row_str))
    print("-----")
    """
        
    #labels = iter([next(dataset.data_row_for_external_id(filename).labels(), None) for filename in df_multiindex.loc[1]["Filename"]])


    plt.rcParams['text.usetex'] = False   # for Latex
    fig = plt.figure(figsize=(6,8), dpi=dpi)   #figsize=(16,12)  # wd,ht in in
    axes = fig.subplots(nrows=nrows, ncols=ncols, 
                        sharey=True, sharex=True
                        )



    data_rows = {}
    for ind in range(nrows*ncols):
        #label = next(labels)
        label = label_dict[filenames[ind]]
        filename = label.data.external_id
        box_size = {"x":label.annotations[0].value.end.x - label.annotations[0].value.start.x,  "y":label.annotations[0].value.end.y - label.annotations[0].value.start.y}
        annotations = label.annotations
        # image_np.shape is (2268, 4032, 3)  
        # This is a 16:9 (x252) aspect ratio
        # 4032 = 2^6 x 3^2 x 7
        # 2268 = 2^2 x 3^4 x 7
        image_np = label.data.value   


        data_row = df_long.loc[df_long["Filename"]==filename].squeeze()  # squeeze removes the additional index dimension to make a 1D pandas series 
        data_rows[data_row['Order']] = data_row
        data_row_str = [
                        f"'{data_row['Method']} finger' method" ,
                        f"with {data_row['Size']}. {data_row['Material'].lower()} glove," " " f"Trial #{data_row['Trial']}"
                        ]
        data_row_elem_str = [
                        f"'{data_row['Method']} finger' method" " " f"at {data_row['mmHg']}mmHg",
                        f"with {data_row['Size']}. {data_row['Material'].lower()} glove," " " f"Trial #{data_row['Trial']}"
                        ]
        if do_print:
            print(filename + "\t " + " ".join(data_row_elem_str))

        # Draw the annotations onto the source image
        for annotation in annotations:
            if isinstance(annotation.value, labelbox.data.annotation_types.Geometry):
                image_np = annotation.value.draw(canvas=image_np,
                                                color=colors[annotation.name],
                                                thickness=5)

        image_np = np.rot90(image_np)

        axes.flat[ind].imshow(image_np)
        #axes.flat[ind].text( image_np.shape[0]*0.02, image_np.shape[0]*0.02, filename, color="blue", fontsize=6, ha='left',va="top")
        axes.flat[ind].text( image_np.shape[0]*0.02, image_np.shape[0]*0.98, filename, color="blue", fontsize=6, ha='left',va="top")
        #axes.flat[ind].text( image_np.shape[0]*0.02, image_np.shape[0]*0.98,  "\n".join(data_row_elem_str), color="blue", fontsize=5, ha='left',va="bottom")
        # Note, the x-y nomenclature is confusing because of the 90 deg rotation
        axes.flat[ind].set_xlabel(f"Δx = {box_size['y']:.0f}px",fontsize=8)
        #axes.flat[ind].set_ylabel(f"Δy = {box_size['x']:.0f}px",fontsize=8)
        axes.flat[ind].set_title(f"At {data_row['mmHg']}mmHg", fontsize=10,fontweight="bold")


    for ind,ax in enumerate(axes.flat):
        ax.grid(which="major", alpha=0.75)  # set major grid lines
        ax.grid(which="minor", alpha=0.5, linestyle=":")  # set minor grid lines, but make them less visible
        ax.xaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator())
        ax.yaxis.set_minor_locator(matplotlib.ticker.AutoMinorLocator())

    fig.suptitle("Speculum Image Data", fontsize=20, fontweight="bold")
    fig.suptitle(" ".join(data_row_str),
            fontsize=12,
            fontweight="bold")

    plt.tight_layout(rect=[0,0.05,1,0.95]) # rect=[left, bottom, right top]

    """
    data_rows_df = pd.DataFrame(data_rows)
    col_names = ["Order","Size", "Material", "Method", "Trial"]
    common_args = []
    for col_name in col_names:
        if (data_rows_df[col_name].iloc[0]==data_rows_df[col_name]).all():
            common_args.append(col_name + "=" + str(data_rows_df[col_name].iloc[0]))
    #(data_rows_df["Size"].iloc[0]==data_rows_df["Size"]).all()
    print()"""


    if do_save:
        dpi = fig.dpi
        data_row_str_clean = " ".join(data_row_str)
        data_row_str_clean = data_row_str_clean.replace("#","".replace(".",""))
        plt.savefig(get_path_to_save(save_filename=f"Set {order_current}) " + " ".join(data_row_str) + f", dpi={dpi}"), 
                    bbox_inches='tight')  # Include the bbox_inches='tight' is critical to ensure the saved images aren't cutoff while the colab images are normal


<IPython.core.display.Javascript object>

#### Plot the images

In [None]:
labels = project.label_generator()
labels = labels.as_list()
label_dict = {label.data.external_id: label for label in labels}


#order_current = 4
#plot_combined_images(order_current=order_current, label_dict=label_dict, do_print=True, dpi=150)

for order_current in range(4, df_long["Order"].max()+1):
    plot_combined_images(order_current=order_current, label_dict=label_dict, do_print=True, dpi=150)


Output hidden; open in https://colab.research.google.com to view.

In [None]:
df_long[["Spec Ang", "Spec Ht"]]

Unnamed: 0,Spec Ang,Spec Ht
0,5,0
1,5,0
2,5,0
3,5,0
4,5,0
...,...,...
85,5,0
86,5,0
87,5,0
88,5,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,x,x,x,x,x,x,x,x,y,y,...,x_rel,x_rel,y_rel,y_rel,y_rel,y_rel,y_rel,y_rel,y_rel,y_rel
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Speculum Type,Spec Ang,Spec Ht,Glove Size,Glove Type,Glove Method,mmHg,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2
White,3,0,M,Vinyl,Middle,0,3.0,84.666667,96.464156,26.0,29.0,32.0,114.0,196.0,3.0,265.666667,...,1.0,1.0,3.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
White,3,0,M,Vinyl,Middle,40,3.0,108.0,134.242318,29.0,30.5,32.0,147.5,263.0,3.0,250.666667,...,1.228611,1.341837,3.0,0.934538,0.055781,0.87234,0.91174,0.95114,0.965636,0.980132
White,3,0,M,Vinyl,Middle,80,3.0,109.333333,128.001302,30.0,35.5,41.0,149.0,257.0,3.0,269.0,...,1.296237,1.311224,3.0,1.065462,0.315065,0.840391,0.885427,0.930464,1.177998,1.425532
White,3,0,M,Vinyl,Middle,120,3.0,105.333333,127.158694,26.0,32.0,38.0,145.0,252.0,3.0,248.0,...,1.236607,1.285714,3.0,0.989718,0.334533,0.752443,0.798407,0.844371,1.108356,1.37234
White,3,0,M,Vinyl,Middle,160,3.0,62.666667,82.712353,10.0,15.0,20.0,89.0,158.0,3.0,227.666667,...,0.715561,0.806122,3.0,0.931216,0.445144,0.622517,0.676079,0.729642,1.085566,1.441489
White,3,0,M,Vinyl,Middle,200,3.0,58.666667,82.857307,4.0,11.0,18.0,86.0,154.0,3.0,229.666667,...,0.674107,0.785714,3.0,0.94048,0.455868,0.622517,0.679336,0.736156,1.099461,1.462766
White,5,0,L,Nitrile,Middle,0,3.0,252.0,11.269428,245.0,245.5,246.0,255.5,265.0,3.0,269.666667,...,1.0,1.0,3.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
White,5,0,L,Nitrile,Middle,40,3.0,233.333333,17.925773,222.0,223.0,224.0,239.0,254.0,3.0,199.333333,...,0.936388,0.958491,3.0,0.738343,0.110559,0.618868,0.688996,0.759124,0.798081,0.837037
White,5,0,L,Nitrile,Middle,80,3.0,207.0,19.924859,195.0,195.5,196.0,213.0,230.0,3.0,143.666667,...,0.833962,0.867925,3.0,0.531751,0.122143,0.396226,0.48096,0.565693,0.599513,0.633333
White,5,0,L,Nitrile,Middle,120,3.0,183.0,20.808652,170.0,171.0,172.0,189.5,207.0,3.0,108.666667,...,0.741586,0.781132,3.0,0.401901,0.121994,0.264151,0.354703,0.445255,0.470776,0.496296


<bound method GroupBy.sem of <pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f3d8e4b4610>>


Degrees of freedom <= 0 for slice


invalid value encountered in double_scalars



## Create tables

In [393]:
#df_wide.groupby("Trial").agg(np.mean)
#df_wide.groupby(["Size","Material","Method"]).agg([np.mean, scipy.stats.sem, np.std, np.min, np.median, np.max, np.count_nonzero], ddof=1)
df_wide_agg = df_wide.groupby(["Speculum Type","Material Type","Material","Size","Method","Spec Ang","Spec Ht",]).agg([np.count_nonzero, np.mean, scipy.stats.sem], ddof=1)
df_wide_agg = df_wide_agg.rename(columns={"count_nonzero":"N nonzero"})

df_wide_agg_brief = df_wide.groupby(["Material","Size","Method","Spec Ang"]).agg([np.mean, scipy.stats.sem], ddof=1)
df_wide_agg_brief = df_wide_agg_brief.drop(columns=["ht_rel"],level=0).drop(columns=[0],level=1)
%ls outputs/tables
df_wide_agg.to_excel("outputs/tables/mean_and_sem.xlsx")
df_wide_agg_brief.to_excel("outputs/tables/mean_and_sem_brief.xlsx")
%ls outputs/tables

display(df_wide_agg_brief)

<IPython.core.display.Javascript object>


Degrees of freedom <= 0 for slice


invalid value encountered in double_scalars



mean_and_sem_brief.xlsx  mean_and_sem_detailed.xlsx  mean_and_sem.xlsx
mean_and_sem_brief.xlsx  mean_and_sem_detailed.xlsx  mean_and_sem.xlsx


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Vertical Height,Vertical Height,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel,wd_rel
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mmHg,Unnamed: 4_level_1,Unnamed: 5_level_1,40,40,80,80,120,120,160,160,200,200
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,mean,sem,mean,sem,mean,sem,mean,sem,mean,sem,mean,sem
Material,Size,Method,Spec Ang,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3
Durex,Unspecified,Precut,3,3.566667,0.016667,0.585696,0.013959,0.811128,0.030656,0.90896,0.033975,0.968884,0.010669,0.988015,0.004621
Lifestyle,Unspecified,Precut,3,3.633333,0.016667,0.65559,0.04701,0.884043,0.051733,0.939168,0.028387,0.983485,0.006956,0.99582,6.6e-05
Nitrile,L,Middle,5,3.133333,0.066667,0.261657,0.063831,0.468249,0.070519,0.598099,0.070433,0.693228,0.065845,0.757414,0.062756
Nitrile,M,Middle,5,2.866667,0.166667,0.211579,0.04008,0.471897,0.035621,0.623266,0.02126,0.726505,0.015237,0.780073,0.0244
Nitrile,M,Two,5,5.1,0.057735,0.498343,0.040822,0.738202,0.040508,0.872422,0.046892,0.93942,0.029237,0.981732,0.012422
Nitrile,S,Middle,5,2.733333,0.016667,0.333828,0.085644,0.533729,0.080253,0.709251,0.104798,0.788122,0.095095,0.850586,0.083993
,,,3,4.1,0.0,0.66125,0.089114,0.844367,0.058083,0.919026,0.038904,0.952237,0.022287,0.984207,0.007315
,,,5,5.4,,,,,,,,,,,
Skyn,Unspecified,Precut,3,3.666667,0.044096,0.568628,0.132138,0.828879,0.083256,0.937047,0.050799,0.982915,0.011069,0.993234,0.00292
Trojan,Unspecified,Precut,3,3.6,0.05,0.708569,0.031584,0.869356,0.042513,0.948528,0.024347,0.986554,0.009278,0.994554,0.001283


## Plotly Plots

### Setup for plotting

In [43]:
default_scale = 4
def save_figure(fig, file_name:str, animated=False, scale=default_scale):
    """
    - for saving plotly.express figures only - not for matplotlib
    - fig is of type plotly.graph_objs._figure.Figure,
    - Requires kaleido installation for the static (non-animated) images
    """    
    fig.write_html( get_path_to_save(save_filename=file_name, extension="html") )
    if not animated:
        fig.write_image( get_path_to_save(save_filename=file_name, extension="svg"), scale=scale)
        fig.write_image( get_path_to_save(save_filename=file_name, extension="png"), scale=scale)
        #fig.write_image(os.path.join(image_folder_path, file_name+".jpeg"), scale=scale)

col_options = {col_name:pd.unique(df_long[col_name]).tolist() for col_name in consistent_cols}
display(col_options)

### Individual Trial level data

In [105]:
df_long2 = df_long.copy()
df_long2["Trial"] = df_long2["Trial"].astype(str)
fig = px.bar(df_long2.loc[ (df_long["Material"]=="Nitrile") & (df_long["Method"]=="Middle") & (df_long["Size"]=="M") ], 
             x="mmHg", y="wd_rel",  
             text_auto=".1%", barmode='group', color="Trial",
             title="Speculum View Width - Specific Trials", 
             hover_data=["Size","Material","Method","Trial"],
             category_orders={"Size": ["S", "M", "L"],"Material":["Nitrile","Vinyl"],"Method":["Middle","Two","Palm"],"Speculum Type":["White","Green"]},
             labels={"wd_rel":"View width","mmHg":"Pressure (mmHg)","Material":"Glove Material", "Trial":"Trial #"},
             color_discrete_map={"1": "Lightgray", "2": "Darkgray", "3": "Gray"},
             template="simple_white"
)
       
fig.update_layout(width=500, height=300)

fig.show()
save_figure(fig, file_name="Basic, all trials", scale=4)

<IPython.core.display.Javascript object>

Get good pixel width sizes

In [None]:
# 80mm * 1in/25.4mm * 300ppi/4 (if we use a scale of 4)
# 180mm * 1in/25.4mm * 300ppi/4 (if we use a scale of 4)
# 180mm * 1in/25.4mm * 300ppi/4 (if we use a scale of 4)
80 * 1/25.4 * 300/4, 180 * 1/25.4 * 300/4

250 / (1/25.4 * 300/4), 550 / (1/25.4 * 300/4)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

(84.66666666666666, 186.26666666666665)

### Plot aggregates across trials

#### Setup for plotting aggregates

In [209]:
def customize_figure(fig, width=640, height=360, by_mmHg=True) -> dict:
    """ - for plotly figures only. """
    
    if by_mmHg:
        fig.update_xaxes(tickprefix="At ", ticksuffix="mmHg", showtickprefix="all", showticksuffix="all", tickfont=dict(size=16),
                        mirror=True, linewidth=2, 
                        title=dict(text="Applied Circumferential Pressure (mmHg)",font=dict(size=20, family="Arial Black")),
                        )
        fig.update_yaxes(tickformat=".0%", tickwidth=2,  nticks=21, ticklabelstep=4,
                        mirror=True, linewidth=2, range=(0,1), 
                        title=dict(text="Relative Inward Creep <br> Into Field of View (S.E.)",font=dict(size=18, family="Arial Black")), 
                        showgrid=True, gridcolor="#AAA", 
                        showspikes=True, spikemode="across", spikethickness=2, spikedash="solid", # ticklabelposition="inside top",
                        )
    #fig.update_traces(textangle=0, textposition="outside", cliponaxis=False)
    fig.update_layout(
        font=dict(
            family="Arial",
            size=16,
            color="black",
        ),
        title={
            "y":1,
            "x":0.5,
            "xanchor": "center",
            "yanchor": "top",
            "font":dict(size=16)
        }, 
        width=width, height=height,
        margin=dict(l=20, r=20, t=20, b=20),
        legend=dict(
            title={"font_family": "Arial Black",},
            yanchor="middle",
            y=0.5,
            xanchor="center",
            x=0.08,
            #bgcolor="LightSteelBlue",
            bordercolor="Black", #font_size=16,
            borderwidth=2,
        ), 
        bargap=0.05, bargroupgap=0.0,
        dragmode="drawopenpath",
        newshape_line_color="cyan",
    )

    fig.update_traces(textfont_size=16, textangle=0, textfont_color="#FFF", 
                      textposition="inside", cliponaxis=False, #textfont_family="Courier",
                      marker_line_color="#000", marker_line_width=2
                    )
    if by_mmHg:
        fig.update_traces(texttemplate=[None]+[""" <br><b>%{y:.1%}</b>"""]*5,)

    config = {
        "toImageButtonOptions" : {
            "format": "png", # one of png, svg, jpeg, webp
            "filename": 'custom_image',
            "scale": default_scale # Multiply title/legend/axis/canvas sizes by this factor
        },
        "modeBarButtonsToAdd": ["drawline","drawopenpath","drawclosedpath","drawcircle","drawrect","eraseshape"]
    }

    return config



category_orders={"Size": ["S", "M", "L","Unspecified","None"],
                 "Material":["Nitrile","Vinyl","Trojan", "Lifestyle", "Durex", "Skyn","None"],
                 "Material Type":["Glove","Condom","None"],
                 "Method":["Middle","Two","Palm","Precut","None"],"Speculum Type":["White","Green"]}
labels = {"wd_rel.mean":"Mean Relative Inward Creep (S.E.)", 
          "mmHg":"Pressure", "Material":"Material", "Material Type":"Material"}
def criteria_to_str(criteria:dict) -> str:
    return ", ".join([f"{labels.get(key) or key}={val}" for key,val in criteria.items()])

<IPython.core.display.Javascript object>

#### Actual plotting

In [None]:
criteria = {"Material":"Nitrile", "Method":"Middle"}
varying = "Size"

df_sampled = df_agg_flat.loc[ np.all([df_agg[arg]==val for arg, val in criteria.items()], axis=0) ]
fig = px.bar(df_sampled, 
             x="mmHg",y="wd_rel.mean", error_y="wd_rel.sem", #error_y_minus=[0]*18, 
             color=varying, pattern_shape=varying, 
             color_discrete_sequence=px.colors.qualitative.Safe, pattern_shape_sequence=["/", "+", "\\"], 
             barmode="group", #text=[".1%<br><br> " for a in range(18)],
             hover_data=["Size","Material","Method"],
             title=f"Varying {varying} with " + criteria_to_str(criteria), 
             category_orders=category_orders, labels=labels, template="simple_white", 
             )
#fig.update_traces(hovertemplate="""%{x}""") #
config = customize_figure(fig, width=1100, height=300)

fig.show(config=config)
save_figure(fig, file_name=f"Across {varying}- " + criteria_to_str(criteria) )

<IPython.core.display.Javascript object>

In [None]:
criteria = {"Size":"M", "Method":"Middle"}
varying = "Material"

df_sampled = df_agg_flat.loc[ np.all([df_agg[arg]==val for arg, val in criteria.items()], axis=0) ]
fig = px.bar(df_sampled, 
             x="mmHg",y="wd_rel.mean", error_y="wd_rel.sem", 
             color=varying, pattern_shape=varying, 
             color_discrete_sequence=px.colors.qualitative.Set1, pattern_shape_sequence=["|", "-", "\\"], 
             barmode="group", #text=[".1%<br><br> " for a in range(18)],
             hover_data=["Size","Material","Method"],
             title=f"Varying {varying} with " + criteria_to_str(criteria), 
             category_orders=category_orders, labels=labels, template="simple_white", 
             )

config = customize_figure(fig, width=1100, height=300)

fig.show(config=config)
save_figure(fig, file_name=f"Across {varying}- " + criteria_to_str(criteria) )

<IPython.core.display.Javascript object>

In [206]:
criteria = {"Size":"M", "Material":"Nitrile"}
varying = "Method"

df_sampled = df_agg_flat.loc[ np.all([df_agg[arg]==val for arg, val in criteria.items()], axis=0) ]
fig = px.bar(df_sampled, 
             x="mmHg",y="wd_rel.mean", error_y="wd_rel.sem", 
             color=varying, pattern_shape=varying, 
             color_discrete_sequence=px.colors.qualitative.D3, pattern_shape_sequence=["x", "+", "-"], 
             barmode="group", #text=[".1%<br><br> " for a in range(18)],
             hover_data=["Size","Material","Method","wd_rel.amin","wd_rel.median","wd_rel.amax"],
             title=f"Varying {varying} with " + criteria_to_str(criteria), 
             category_orders=category_orders, labels=labels, template="simple_white", 
             )

config = customize_figure(fig, width=1100, height=300)

fig.show(config=config)
save_figure(fig, file_name=f"Across {varying}- " + criteria_to_str(criteria) )

<IPython.core.display.Javascript object>

### Plot condoms

In [281]:
#criteria = {"Material Type":"Condom"}
criteria = {"Material Type":["Condom","None"]}
varying = "Material"

#colors = px.colors.qualitative.Plotly.copy() #[0:4]+["black"]
#colors = ['black']*10
#colors[8] = "black"
#colors = ['#636EFA', '#EF553B',"Black", '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
#colors={"Trojan":"red","gray":"blue","None":"green","Lifestyle":"green","Skyn":"blue"}
#df_sampled = df_agg_flat.loc[ np.all([df_agg[arg]==val for arg, val in criteria.items()], axis=0) ]
df_sampled = df_agg_flat.loc[ np.all([ (type(val)!=list and df_agg[arg]==val ) or np.in1d(df_agg[arg],val)  for arg, val in criteria.items()], axis=0) ]
df_sampled["color"] = df_sampled["Material"].copy().replace(colors)
colors = px.colors.qualitative.Safe #[0:4]+["black"]
fig = px.bar(df_sampled, 
             x="mmHg",y="wd_rel.mean", error_y="wd_rel.sem", 
             color=varying, pattern_shape=varying, 
             color_discrete_sequence= px.colors.qualitative.Safe, pattern_shape_sequence=["|", "-", "\\"], 
             barmode="group", #text=[".1%<br><br> " for a in range(18)],
             hover_data=["Size","Material","Method"],
             title=f"Varying {varying} with " + criteria_to_str(criteria), 
             category_orders=category_orders, labels=labels, template="simple_white", 
             )

config = customize_figure(fig, width=1100, height=300)

for idx, trace in enumerate(fig["data"]):
     trace["name"] = trace["name"].split()[-1]

fig.show(config=config)
#save_figure(fig, file_name=f"Across {varying}- " + criteria_to_str(criteria) )

<IPython.core.display.Javascript object>



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Plot vertical heights

In [175]:
criteria = {"mmHg":[0,1], "Spec Ang":[3,5]}
varying = "Material"

df_sampled = df_agg_flat.loc[ np.all([ (type(val)!=list and df_agg[arg]==val ) or np.in1d(df_agg[arg],val)  for arg, val in criteria.items()], axis=0) ]
df_sampled = df_sampled.sort_values(["Vertical Height.mean"]).reset_index()
df_sampled["Spec Ang"] = df_sampled["Spec Ang"].astype(str)  # makes discrete color plotting and string concatenation easier
df_sampled["name"] = df_sampled["Size"] + "-" + df_sampled["Material"] + "-"  + df_sampled["Material Type"] + "-"  + df_sampled["Method"] + "-"  + df_sampled["Spec Ang"]

extra_trials = speculum_df_raw.loc[speculum_df_raw["Filename"]=="None"].copy()
extra_trials = extra_trials.drop(extra_trials[extra_trials["Spec Ang"] == 4].index)
extra_trials["Vertical Height.mean"] = extra_trials["Vertical Height"]
extra_trials["Vertical Height.sem"] = None
with_extra = pd.concat([df_sampled,extra_trials])
with_extra = with_extra.drop(columns=[col for col in with_extra if col not in df_sampled.columns])

df_sampled = with_extra
df_sampled["Spec Ang"] = df_sampled["Spec Ang"].astype(str)  # makes discrete color plotting and string concatenation easier
df_sampled["name"] = df_sampled["Size"] + "-" + df_sampled["Material"] + "-"  + df_sampled["Material Type"] + "-"  + df_sampled["Method"] #+ "-"  + df_sampled["Spec Ang"]

names={
    "None-None-None-None-3": "None", #"None<br>(3 clicks)",
    "None-None-None-None-5": "None", #"None<br>(5 clicks)",
    "Unspecified-Durex-Condom-Precut-3": "<i>Durex</i><br>Condom",
    "Unspecified-Lifestyle-Condom-Precut-3": "<i>Lifestyle</i><br>Condom",
    "Unspecified-Skyn-Condom-Precut-3": "<i>Skyn</i><br>Condom",
    "Unspecified-Trojan-Condom-Precut-3": "<i>Trojan</i><br>Condom",
    "M-Vinyl-Glove-Middle-3": "Medium<br><i>Vinyl</i><br>Glove",
    "L-Nitrile-Glove-Middle-5": "<i>Large</i><br>Nitrile<br>Glove",
    "M-Nitrile-Glove-Middle-5": "Medium<br>Nitrile<br>Glove",
    "M-Nitrile-Glove-Two-5": "Medium<br>Nitrile<br>Glove,<br><i>Two-fingers</i>",
    "S-Nitrile-Glove-Middle-5": "<i>Small</i><br>Nitrile<br>Glove"
}
names={
    "None-None-None-None": "None", #"None<br>(3 clicks)",
    "Unspecified-Durex-Condom-Precut": "<i>Durex</i><br>Condom",
    "Unspecified-Lifestyle-Condom-Precut": "<i>Lifestyle</i><br>Condom",
    "Unspecified-Skyn-Condom-Precut": "<i>Skyn</i><br>Condom",
    "Unspecified-Trojan-Condom-Precut": "<i>Trojan</i><br>Condom",
    "M-Vinyl-Glove-Middle": "Medium<br><i>Vinyl</i><br>Glove",
    "L-Nitrile-Glove-Middle": "<i>Large</i><br>Nitrile<br>Glove",
    "M-Nitrile-Glove-Middle": "Medium<br>Nitrile<br>Glove",
    "M-Nitrile-Glove-Two": "Medium<br>Nitrile<br>Glove,<br><i>Two-fingers</i>",
    "S-Nitrile-Glove-Middle": "<i>Small</i><br>Nitrile<br>Glove",
    "S-Nitrile-Glove-Palm": "<i>Small</i><br>Nitrile<br>Glove,<br><i>Palm</i>",
    "M-Nitrile-Glove-Palm": "<i>Medium</i><br>Nitrile<br>Glove,<br><i>Palm</i>",
}
df_sampled["name_formatted"] = df_sampled["name"].replace(names, value=None)
#df_sampled["name"] = df_sampled["name_formatted"].replace(names)

fig = px.bar(df_sampled, 
             #x = np.argsort(df_sampled["Vertical Height.mean"]),
             x = "name_formatted",
             y="Vertical Height.mean", error_y="Vertical Height.sem", 
             category_orders=category_orders, labels=labels, template="simple_white", 
             hover_data=["Size","Material","Method","name"], #color = ["gray","gray","red","gray","gray"]
             color="Spec Ang", facet_col="Spec Ang", # facet_row="Material Type",
             )
fig.update_xaxes(matches=None)
fig.update_traces(texttemplate=""" <br><b>%{y:.2f}<br>cm</b>""", textposition="outside",)

fig.update_xaxes(linewidth=2, #showticklabels=False, nticks=0,
                 title=dict(text="Speculum Combination",font=dict(size=18, family="Arial Black")),
                 )
fig.update_yaxes(ticksuffix="cm", tickformat=".0f", tickwidth=2, range=(0,6),  nticks=6*2+1, ticklabelstep=2,
                mirror=True, linewidth=2,
                title=dict(text="Initial Height of <br>Speculum Opening",font=dict(size=18, family="Arial Black")), 
                showgrid=True, gridcolor="#AAA", 
                showspikes=True, spikemode="across", spikethickness=2, spikedash="solid", # ticklabelposition="inside top",
                )

fig.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')


fig.update_layout(showlegend=False)


config = customize_figure(fig, width=1100, height=500, by_mmHg=False)

fig.show(config=config)
save_figure(fig, file_name=f"Vertical Height Bar Plot" )

<IPython.core.display.Javascript object>

In [None]:

fig = px.bar(df_long.loc[ (df_long["Material"]=="Nitrile") & (df_long["Method"]=="Middle") ], 
             x="mmHg", y="wd_rel", 
             text_auto=".1%", barmode='group', color="Size", 
             title="Speculum View Width", 
             hover_data=["Size","Material","Method","Trial"],
             category_orders={"Size": ["S", "M", "L"],"Material":["Nitrile","Vinyl"],"Method":["Middle","Two"],"Speculum Type":["White","Green"]},
             labels={"wd_rel":"View width","mmHg":"Pressure (mmHg)","Material":"Glove Material"},
             )
fig.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:

fig = px.bar(df_long.loc[ (df_long["Material"]=="Nitrile") & (df_long["Method"]=="Middle") ],
             x="mmHg", y="wd_rel", facet_row="Size", text_auto=".1%",
             title="Speculum View Width", 
             hover_data=["Size","Material","Method","Trial"],
             category_orders={"Size": ["S", "M", "L"],"Material":["Nitrile","Vinyl"],"Method":["Middle","Two"],"Speculum Type":["White","Green"]},
             labels={"wd_rel":"View width","mmHg":"Pressure (mmHg)","Material":"Glove Material"},
             )
fig.show()