# **IFM Tag Calibration Script**

This Jupyter notebook was created to calculate and deliver metrics about thermal characteristics of IFM's T-sensing Fibers for the purposes of calibrating and tesing their performance prior to delivery.

#### **How to use:**

In the Cell below, fill in the capitalized variables. TAGS is a list of last-four-digit EPCs to process, and DATA_DIREC is the folder where data is located. The folder named in DATA_DIREC should be filled with individual folders containing two files, one being the tags.csv and the other being the RSSI.csv.

Then, run every cell, and the output files will be in a folder in the same directory as this notebook, titled by the variable OUTPUT_NAME. 

In [None]:
# List of strings of LAST FOUR DIGITS of tag EPCs to be used in calculations.
TAGS = [
    # '2B42', '3943', '3242'
    '3734', '3842', '0B5D'
]

# Name of the folder where outputted files will be placed.
OUTPUT_NAME = 'Tag Results'

# Name of the folder where
DATA_DIREC = 'DATA/05032023'

In [None]:
# NECESSARY IMPORTS FOR SCRIPT TO RUN
# If error occurs in this cell, open Terminal/Command Prompt and pip install "package name"
import numpy as np
import matplotlib.pyplot as plt
import csv, datetime, os
import matplotlib.dates as mdates
import pandas as pd
from sklearn.metrics import r2_score
import pandas as pd
from tqdm import tqdm


# HELPER FUNCTIONS FOR LOADING DATA
# These should not cause errors--if they do, let me know.

def MakeDataFromText(filepath:str):
    """
    Helper function to parse text files in IFM proprietary format.
    """
    with open(filepath, 'r') as file:
        reader = csv.reader(file)

        db = dict()                          # Dictionary that will hold k,v pairs of 
        recent_EPC = ''                      # The most recent EPC that was read, and data is being assigned to
        Time_Or_Data = True                  # Time is true, Data is false
        reached_24 = False

        for l in reader:                     # Iterate over entire text file
            line = l[0]                      # Each line only has one token, so we select it
            
            if len(line)==24:                # If it is an EPC
                db[line] = [[],[]]           # Create new k,v pair in database
                recent_EPC = line            # Current EPC to add to is selected
            
            elif line == '[':                # If start bracket, beginning to collect an array of data
                data = []

            elif line == ']':                # End bracket means end of data array
                if len(data)!=0:
                    if Time_Or_Data:
                        db[recent_EPC][0] = data
                        Time_Or_Data = False
                    else:
                        db[recent_EPC][1] = data
                        Time_Or_Data = True
                data = []

            elif line.count(":")==2:
                h,m,s = line.split(':')      # Parse timestamp into hours, minutes, seconds

                if int(h)==23:
                    reached_24 = True        # If end of day reached, must move to Day 2

                if reached_24 and int(h) < 12:
                    day=2
                else:
                    day=1

                data.append(datetime.datetime(1970, 2, day, int(h), int(m), int(s)))
            
            elif line ==' ':               # If blank space, then we skip to parse next line
                pass

            else:                          # If it is a data point
                try:
                    data.append(float(line))
                except:
                    pass                   # Error catching case just in case, so annoying errors don't crash program
    return db

def MakeDict(data:list):
    """
    """
    t1234 = dict()
    for d in data:
        for k,v in d.items():
            if k in t1234: # if tag is already in dict
                for i in range(len(v[0])):
                    if v[0][i] in t1234[k]:
                        t1234[k][v[0][i]].append(v[1][i])
                    else:
                        t1234[k][v[0][i]] = [v[1][i]]
            else:             # if tag is not in dict
                t1234[k] = dict()
                for i in range(len(v[0])):
                    t1234[k][v[0][i]] = [v[1][i]]
    for k,v in t1234.items():
        for k2,v2 in v.items():
            t1234[k][k2] = sum(v2)/len(v2)
    return t1234

In [None]:
############################################################################################################################
# DO NOT CHANGE CODE IN THIS SECTION, FOLLOW ERROR MESSAGE #################################################################
############################################################################################################################

try:    os.mkdir(OUTPUT_NAME) # Create output folder if it does not exist
except: pass

try:    os.mkdir(os.path.join(OUTPUT_NAME, 'Plots'))
except: pass

DATA_NAMES, TEMP_LIST, RSSI_LIST = [], [], []
for D__ in os.listdir(DATA_DIREC):
    DATA_NAMES.append(D__)         # List of all data file names in DATA_DIREC

    path = os.path.join(DATA_DIREC, D__)
    t,r = dict(), dict()
    for prefix in os.listdir(path):
        filepath = os.path.join(path, prefix)
        if   prefix[:4]=='tags':
            TEMP_LIST.append( MakeDict( [MakeDataFromText(filepath)] ) ) # Create a temperature dictionary (hash table)
        elif prefix[:4]=='RSSI':
            RSSI_LIST.append( MakeDict( [MakeDataFromText(filepath)] ) ) # Create an RSSI dictionary (hash table)

    # print(len(TEMP_LIST), len(RSSI_LIST))
    assert len(TEMP_LIST)==len(RSSI_LIST)==len(DATA_NAMES), "Format Error: {} does not contain a tags_NUM.csv and RSSI_NUM.csv".format(path)

TEMP_LIST, RSSI_LIST, DATA_NAMES = np.array(TEMP_LIST), np.array(RSSI_LIST), np.array(DATA_NAMES) # NP arrays for speed
############################################################################################################################

### **Excel Code**

This section of the notebook creates a file called "tags_coefficients.xlsx", in Excel sheet format, in the directory OUTPUT_NAME chosen at the start of the notebook file. This Excel has three sheets, one for All RSSIs, one for RSSI<=20, and RSSI>20. It includes data for when RSSI is exactly equal to ten. Here is a list of the action items it fulfills:

<ul>
    <li>Coefficients for Linear Fit on Temp. vs RSSI</li>
    <li>Coefficients for Quadratic Fit on Temp. vs RSSI</li>
    <li>Average and Standard Deviation of Coefficients</li>
    <li>Average Temp. and Calculated from Linear Fit Temp. at RSSI=10</li>
    <li>Difference between Avg. & Calc. Temp @ RSSI=10 and Ground Truth Water Temp.</li>
</ul>

In [None]:
with pd.ExcelWriter(OUTPUT_NAME + "/tags_coefficients.xlsx", engine='xlsxwriter') as writer:
    for SHEETS in range(3):  # Three Sheets, for All RSSIs, RSSI<=20, and RSSI>20
        list_dfs = []
        for TAG in TAGS:
            r2_l, x2_l, x_l, c_l, r2_q, x2_q, x_q, c_q  = [], [], [], [], [], [], [], [] # For not RSSI=10 data
            avg_10, calc_10, diffavg_10, diffcalc_10 = [], [], [], []

            for u in range(len(DATA_NAMES)):
                found = False
                for k,v in TEMP_LIST[u].items():
                    if k[-4:]==TAG:
                        found = True
                        st, sr = sorted( TEMP_LIST[u][k].items() ), sorted( RSSI_LIST[u][k].items() )
                        dt, tt, dr = [ j[1] for j in st ], [ j[0] for j in st ], [ j[1] for j in sr ]

                        # Data Temp Calculation
                        tens, decimals = DATA_NAMES[u].split('p')
                        tens = int(tens[:2])
                        decimals = float(decimals[:1])
                        TEMP_VAL = tens + decimals/10.0

                        # Collect Data for each RSSI
                        means, rssi = [], []
                        c1 = { i:[] for i in range(26) }
                        for i in range(len(dt)): c1[dr[i]].append(dt[i])
                        for k,v in c1.items():
                            if SHEETS==0:
                                if len(v)>0 and k<21:
                                    mean = sum(v)/len(v); std = np.std(v); rssi.append(k); means.append(mean)
                            if SHEETS==1:
                                if len(v)>0 and k>20:
                                    mean = sum(v)/len(v); std = np.std(v); rssi.append(k); means.append(mean)
                            if SHEETS==2:
                                if len(v)>0:
                                    mean = sum(v)/len(v); std = np.std(v); rssi.append(k); means.append(mean)

                        try:
                            # Making Best Fits, Linear & Quadratic
                            model1 = np.poly1d( np.polyfit(rssi, means,  1) )
                            model2 = np.poly1d( np.polyfit(rssi, means,  2) )
                            x1_, y1 = zip( *sorted( zip(rssi, model1(rssi)) ) )
                            x2_, y2 = zip( *sorted( zip(rssi, model2(rssi)) ) )
                            strmodel1 = ", " + str(round(model1[1], 4))+"x + "+str(round(model1[0], 4))
                            strmodel2 = ", " + str(round(model2[2], 4))+"x^2 + "+str(round(model2[1], 4))+"x + "+str(round(model2[0], 4))
                            y1_r2 = r2_score(means, y1)
                            y2_r2 = r2_score(means, y2)

                            r2_l.append( round(y1_r2, 3) ); r2_q.append( round(y2_r2, 3) )
                            x2_l.append(np.nan);            x2_q.append( round(model2[2], 4) )
                            x_l.append( round(model1[1], 4) ); x_q.append( round(model2[1], 4) )
                            c_l.append( round(model1[0], 4) ); c_q.append( round(model2[0], 4) )
                        except:
                            r2_l.append(np.nan); r2_q.append(np.nan)
                            x2_l.append(np.nan); x2_q.append(np.nan)
                            x_l.append(np.nan);  x_q.append(np.nan)
                            c_l.append(np.nan);  c_q.append(np.nan)
                        
                        # RSSI=10 Calculations
                        if SHEETS in [0,2]:
                            try:    avg_10.append( round(np.mean(c1[10]), 3) )
                            except: avg_10.append(np.nan)
                            try:    
                                model1 = np.poly1d( np.polyfit(rssi, means,  1) )
                                calc_10.append( round(model1(10.0), 3) )
                            except: calc_10.append(np.nan)
                            try:    diffavg_10.append( round(TEMP_VAL - np.mean(c1[10]), 3) )
                            except: diffavg_10.append(np.nan)
                            try:  
                                model1 = np.poly1d( np.polyfit(rssi, means,  1) )  
                                diffcalc_10.append( round(TEMP_VAL - model1(10.0), 3) )
                            except: diffcalc_10.append(np.nan)
                        else:
                            avg_10.append(np.nan)
                            calc_10.append(np.nan)
                            diffavg_10.append(np.nan)
                            diffcalc_10.append(np.nan)

                if not found:
                    r2_l.append('Tag Not Found'); r2_q.append('Tag Not Found')
                    x2_l.append('Tag Not Found'); x2_q.append('Tag Not Found')
                    x_l.append('Tag Not Found');  x_q.append('Tag Not Found')
                    c_l.append('Tag Not Found');  c_q.append('Tag Not Found')
                    avg_10.append('Tag Not Found')
                    calc_10.append('Tag Not Found')
                    diffavg_10.append('Tag Not Found')
                    diffcalc_10.append('Tag Not Found')     
            

            # End of Iterating over Data; Excel Creation/Formatting Section

            index = [ DATA_NAMES[o] + " Linear" for o in range(len(DATA_NAMES)) ] +\
                    [ 'Linear Average, Std' ] +\
                    [ DATA_NAMES[o] + " Quadratic" for o in range(len(DATA_NAMES)) ] +\
                    [ 'Quadratic Average, Std' ]

            try:    x2_l.append( str(round(np.mean(x2_l), 3)) + ", " + str(round(np.std(x2_l), 6)) )
            except: x2_l.append('error in Avg/STD')
            try:    x_l.append( str(round(np.mean(x_l), 3)) + ", " + str(round(np.std(x_l), 6)) )
            except: x_l.append('error in Avg/STD')
            try:    x2_q.append( str(round(np.mean(x2_q), 3)) + ", " + str(round(np.std(x2_q), 6)) )
            except: x2_q.append('error in Avg/STD')
            try:    x_q.append( str(round(np.mean(x_q), 3)) + ", " + str(round(np.std(x_q), 6)) )
            except: x_q.append('error in Avg/STD')

            r2_l.append('n/a')
            c_l.append('n/a')
            r2_q.append('n/a')
            c_q.append('n/a')

            avg_10 = avg_10 + [ 'n/a' for i in range(len(index)-len(avg_10))]
            calc_10 = calc_10 + [ 'n/a' for i in range(len(index)-len(calc_10))]
            diffavg_10 = diffavg_10 + [ 'n/a' for i in range(len(index)-len(diffavg_10))]
            diffcalc_10 = diffcalc_10 + [ 'n/a' for i in range(len(index)-len(diffcalc_10))]

            smalldf = pd.DataFrame(
                data={ 
                    'index': index,
                    TAG + ', R2': r2_l + r2_q,
                    TAG + ', x2': x2_l + x2_q,
                    TAG + ', x': x_l + x_q,
                    TAG + ', c': c_l + c_q,
                    TAG + ', Avg RSSI=10': avg_10,
                    TAG + ', Calc RSSI=10': calc_10,
                    TAG + ', Water Minus Avg': diffavg_10,
                    TAG + ', Water Minus Calc': diffcalc_10,
                },
                columns=[
                    'index', TAG + ', R2', TAG + ', x2', TAG + ', x', TAG + ', c',
                    TAG + ', Avg RSSI=10', TAG + ', Calc RSSI=10', TAG + ', Water Minus Avg', TAG + ', Water Minus Calc'
                ]
            )

            smalldf.set_index('index')
            list_dfs.append(smalldf)
        
        df = list_dfs[0]
        for t__ in list_dfs[1:]:
            df = pd.merge(df, t__, on='index')
        df = df.set_index('index')

        if SHEETS==0:
            df.to_excel(writer, sheet_name='RSSI<21')
        if SHEETS==1:
            df.to_excel(writer, sheet_name='RSSI>20')
        if SHEETS==2:
            df.to_excel(writer, sheet_name='All RSSI')

### **Plotting Code**

<ul>
    <li></li>
    <li></li>
</ul>

In [None]:
%%capture

RSSI_LABEL = [ 'All RSSIs', 'RSSI less than or equal to 20', 'RSSI greater than 20' ]

for a in range(len(DATA_NAMES)):  # Iterate over all data files
    for RSSI_VAL in range(3):     # 0 = All RSSI, 1 = RSSI<=20, 2 = RSSI>20

        fig, axs = plt.subplots(len(TAGS), 1, figsize=(len(TAGS)*8, 9), sharex=True)

        for b in range(len(TAGS)):
            axs[b].set_title(TAGS[b] + ", " + DATA_NAMES[a] + ", " + RSSI_LABEL[RSSI_VAL])
            axs[b].set_xlabel("RSSI (dBm)")
            axs[b].set_ylabel("Temperature (C)")

            for k,v in TEMP_LIST[a].items():
                if k[-4:]==TAGS[b]:
                    st, sr = sorted( TEMP_LIST[a][k].items() ), sorted( RSSI_LIST[a][k].items() )
                    dt, tt, dr = [ j[1] for j in st ], [ j[0] for j in st ], [ j[1] for j in sr ]

                    # Making Standard Deviation and Error Bars
                    means, rssi = [], []
                    c1 = { i:[] for i in range(26) }
                    for i in range(len(dt)): 
                        c1[dr[i]].append(dt[i])
                    
                    for k,v in c1.items():
                        if   RSSI_VAL==0 and len(v)>2:
                            mean = sum(v)/len(v); std = np.std(v); rssi.append(k); means.append(mean)
                            axs[b].scatter(k, mean, marker='o',color='b')
                            axs[b].errorbar(k, mean, yerr=std, color='b')
                            axs[b].text(
                                k, mean+std, str(round(std, 3)), fontsize=10, color="b", horizontalalignment='center', verticalalignment='bottom'
                            )
                        elif RSSI_VAL==1 and len(v)>2 and RSSI_VAL<=20:
                            mean = sum(v)/len(v); std = np.std(v); rssi.append(k); means.append(mean)
                            axs[b].scatter(k, mean, marker='o',color='b')
                            axs[b].errorbar(k, mean, yerr=std, color='b')
                            axs[b].text(
                                k, mean+std, str(round(std, 3)), fontsize=10, color="b", horizontalalignment='center', verticalalignment='bottom'
                            )
                        elif RSSI_VAL==2 and len(v)>2 and RSSI_VAL>20:
                            mean = sum(v)/len(v); std = np.std(v); rssi.append(k); means.append(mean)
                            axs[b].scatter(k, mean, marker='o',color='b')
                            axs[b].errorbar(k, mean, yerr=std, color='b')
                            axs[b].text(
                                k, mean+std, str(round(std, 3)), fontsize=10, color="b", horizontalalignment='center', verticalalignment='bottom'
                            )

                        # try:
                        #     axs[b].scatter(k, mean, marker='o',color='b')
                        #     axs[b].errorbar(k, mean, yerr=std, color='b')
                        #     axs[b].text(
                        #         k, mean+std, str(round(std, 3)), fontsize=10, color="b", horizontalalignment='center', verticalalignment='bottom'
                        #     )
                        # except:
                        #     pass

                    if RSSI_VAL==0: axs[b].set_xticks(range(27))
                    if RSSI_VAL==1: axs[b].set_xticks(range(21))
                    if RSSI_VAL==2: axs[b].set_xticks(range(21,27))

                    axs[b].xaxis.set_visible(True)
                    
                    # axs[b].set_title(TAGS[b] + ", " + RSSI_LABEL[RSSI_VAL])
                    # axs[b].set_xlabel("RSSI (dBm)")
                    # axs[b].set_ylabel("Temperature (C)")

                    fig.subplots_adjust(hspace=0.4)
                            
                    fig.savefig(OUTPUT_NAME + "/Plots/" + DATA_NAMES[a] + ", " + TAGS[b] + ", " + RSSI_LABEL[RSSI_VAL] + ".png")
                    # fig.clf()
                    # fig.clear()
                    # plt.close()