# Resistance Measurements of 2nd Batch of Sponge-Wedge Chips

## Summary Graphs

### Data
Electrical resistances from a total of n=3 sections of chips processed with Protocol 1 were measured. 

Figures from this notebook are in a summary slide deck named `2024-07-07_clearance_2nd_batch_v3.pptx`.

In [6]:
data_csv_filename = '2nd_batch_resistances_py.xlsx'

In [7]:
def _get_src_dir() -> str:
    return os.path.split(os.path.dirname(os.path.abspath('__file__')))[0]

In [8]:
import os
from pathlib import Path
os.chdir(_get_src_dir())
from src.utility import _Utility
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

In [9]:
data_path = os.path.join(_Utility.get_user_data_dir_path(), data_csv_filename)

In [22]:
print(data_path)

/home/mkrause/data/biofuel-cell/2nd_batch_resistances_py.xlsx


#### Read all sheets of the Excel file

In [23]:
try:
    xlsx = pd.read_excel(data_path, sheet_name=None)
    print(xlsx.keys())
except FileNotFoundError as e:
    print(f'Error: {e}')
    path = Path(data_path)
    if not _Utility.path_exists(path):
        print(f"path {path} doesn't exist")
    if not _Utility.dir_exists(path):
        print(f"directory 'data' doesn't exist")
    if not _Utility.file_exists(path):
        print(f"file {data_csv_filename} doesn't exist")

Error: [Errno 2] No such file or directory: '/home/mkrause/data/biofuel-cell/2nd_batch_resistances_py.xlsx'
path /home/mkrause/data/biofuel-cell/2nd_batch_resistances_py.xlsx doesn't exist
directory 'data' doesn't exist
file 2nd_batch_resistances_py.xlsx doesn't exist


Add a 'Sheet' column to each dataframe. This is necessary because the sheet names are not a column in the Excel file.

In [None]:
for sheet_name in xlsx.keys():
    xlsx[sheet_name]['Sheet'] = sheet_name

Concatenate sheets of positive controls into one DataFrame

In [None]:
df = pd.concat(xlsx[frame] for frame in xlsx.keys() if 'positive' in frame)

In [None]:
df

## Plot all data

### Plot each average of a 100 samples with its corresponding standard deviation.

In [None]:
x_label = 'Chip Section Index'
y_label1 = 'Average resistance of 100 observations ($\Omega$)'
y_label2 = 'Resistance ($\Omega$)'

In [None]:
def avg_with_stdev_for_each_channel():
    # Get the data and errors from the DataFrame.
    averages = df['R_avg [Ohm]']
    std_devs = df['R_stdev [Ohm]']

    # Create an array with the positions of each bar on the x-axis.
    x_pos = np.arange(len(averages))

    # 'chip_id' is the column with group ids.
    chip_ids: np.ndarray = df['chip_id'].unique()

    # Create a color dictionary for unique groups.
    colors: dict = {chip_id: plt.cm.rainbow(np.linspace(0, 1, len(chip_ids))[i]) for i, chip_id in enumerate(chip_ids)}

    # Create a bar plot with error bars by plotting each individual bar, one at a time.
    for i in range(len(df)):
        plt.bar(x_pos[i], averages.iloc[i], yerr=std_devs.iloc[i], align='center', alpha=0.7, ecolor='black', capsize=10,
               color=colors[df['chip_id'].iloc[i]])

    # Set the y-axis to be logarithmic.
    plt.yscale('log')

    # Customize the plot.
    plt.title('Average Values with Standard Deviation (total is n = 3 chips)')
    plt.ylabel(y_label1)
    plt.xlabel(x_label)
    plt.xticks(x_pos, df['chip_id'])
    plt.tight_layout()

    # Save figure to disk.
    filename = f'{_Utility.get_filename_only(filename=data_csv_filename)}.png'
    fig_filename = os.path.join(_Utility.get_user_data_dir_path(), filename)
    plt.savefig(fig_filename)

    # Display the plot.
    plt.show()

In [None]:
avg_with_stdev_for_each_channel()

### Create average of each chip
Create averages of the mean values for each microchannel measurement. That results in a mean of means. Then compute the standard error of that mean.

In [None]:
def avg_section():
    means = df.groupby('chip_id')['R_avg [Ohm]'].mean()
    stderr = df.groupby('chip_id')['R_avg [Ohm]'].apply(stats.sem)
    groups = means.index

    # Create a color dictionary for unique groups.
    colors: dict = {group: plt.cm.rainbow(np.linspace(0, 1, len(groups))[i]) for i, group in enumerate(groups)}

    # Create a bar plot.
    for i in range(len(groups)):
        plt.bar(i, means.iloc[i], yerr=stderr.iloc[i], color=colors[groups[i]], capsize=5)

    # Set x-ticks to the group names.
    plt.xticks(np.arange(len(groups)), groups)

    # Set the y-axis to be logarithmic.
    plt.yscale('log')

    # Customize the plot.
    plt.title('Per-chip Average w/ standard err. (total is n = 3 chips)')
    plt.ylabel(y_label2)
    plt.xlabel(x_label)
    plt.tight_layout()

    filename = f'{_Utility.get_filename_only(filename=data_csv_filename)}_per_chip.png'
    fig_filename = os.path.join(_Utility.get_user_data_dir_path(), filename)
    plt.savefig(fig_filename)

    # Display the plot.
    plt.show()

In [None]:
avg_section()

### Detecting (and removing) outliers

Check if any values are outliers, possibly because the channels the data originated from, were not conducting (something I didn't pay enough attention to when I did the measurement). See docstring of method on how outliers are defined.

In [None]:
df_without_outliers = df.groupby('chip_id')['R_avg [Ohm]'].apply(_Utility.remove_outliers, dataset_name='chip-section').reset_index()

In [None]:
def avg_section_no_outliers():
    means = df_without_outliers.groupby('chip_id')['R_avg [Ohm]'].mean()
    stderr = df_without_outliers.groupby('chip_id')['R_avg [Ohm]'].apply(stats.sem)
    groups = means.index
    
    # Create a color dictionary for unique groups.
    colors: dict = {group: plt.cm.rainbow(np.linspace(0, 1, len(groups))[i]) for i, group in enumerate(groups)}

    # Create a bar plot.
    for i in range(len(groups)):
        plt.bar(i, means.iloc[i], yerr=stderr.iloc[i], color=colors[groups[i]], capsize=5)

    # Set x-ticks to the group names.
    plt.xticks(np.arange(len(groups)), groups)

    # Set the y-axis to be logarithmic.
    plt.yscale('log')


    # Customize the plot.
    plt.title('Per-chip Average without outliers (total is n = 3 chips)')
    plt.ylabel(y_label2)
    plt.xlabel(x_label)
    plt.tight_layout()

    filename = f'{_Utility.get_filename_only(filename=data_csv_filename)}_per_chip_no_outliers.png'
    fig_filename = os.path.join(_Utility.get_user_data_dir_path(), filename)
    plt.savefig(fig_filename)

    # Display the plot.
    plt.show()

In [None]:
avg_section_no_outliers()

#### Get a few negative controls

In [None]:
df = pd.concat(xlsx[frame] for frame in xlsx.keys() if 'negative' in frame)
df.dropna(inplace=True)

In [None]:
df

In [None]:
print(xlsx.keys())

In [None]:
sheet_val = [protocol for protocol in xlsx.keys() if '01' in protocol and  'negative' in protocol][0]
new_col_name = 'chip_id'
df[new_col_name] = 0
df

In [None]:
print(sheet_val)
df[new_col_name] = df.apply(_Utility.put_value_in_row, condition_column_name='Sheet', condition=sheet_val, value=1, alt_value=3, axis=1)

In [None]:
df

In [None]:
avg_with_stdev_for_each_channel()

In [None]:
avg_section()

In [None]:
df_without_outliers = df.groupby('chip_id')['R_avg [Ohm]'].apply(_Utility.remove_outliers, dataset_name='chip-section').reset_index()
avg_section_no_outliers()