In [None]:
#Data extraction from .raw file and plot graphs

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from numpy_ext import rolling_apply
import datetime
import time
import ipywidgets as widgets
from ipywidgets import widgets, Layout
from IPython.display import display, clear_output
from matplotlib.pyplot import figure
import voila
import io

print("Upload first .raw file (Sample A, with gradient/deviation analysis):")
uploader = widgets.FileUpload(accept='.raw', multiple=False)
run_button = widgets.Button(description='Run analysis')
gradient_numerator = widgets.BoundedIntText(
    value=10,
    min=0,
    max=25,
    step=1,
    layout=Layout(width='45px'),
    disabled=False
)
label1 = widgets.Label('nT: gradient threshold (_/min)', layout=Layout(width='500px'))
box1 = widgets.HBox([gradient_numerator, label1])
gradient_denominator = widgets.BoundedIntText(
    value=10,
    min=0,
    max=10,
    step=1,
    layout=Layout(width='45px'),
    disabled=False
)
label2 = widgets.Label('min: gradient threshold (nT/_)', layout=Layout(width='500px'))
box2 = widgets.HBox([gradient_denominator, label2])
variation_from_chord = widgets.BoundedIntText(
    value=10,
    min=0,
    max=10,
    step=1,
    layout=Layout(width='45px'),
    disabled=False
)
label3 = widgets.Label('nT: threshold for variation from 600s chord', layout=Layout(width='500px'))
box3 = widgets.HBox([variation_from_chord, label3])
display(uploader)
out = widgets.Output()
display(box1)
display(box2)
display(box3)
print("Upload second .raw file (Sample B):")
uploader2 = widgets.FileUpload(accept='.raw', multiple=False)
display(uploader2)
display(run_button, out)

def calc_slope(x):
    slope = np.polyfit(range(len(x)), x, 1)[0]
    slope = slope/6
    return slope

def getSec(s):
    datee = datetime.datetime.strptime(s, "%H%M%S")
    return datee.hour * 3600 + datee.minute * 60 + datee.second

def run(_):
    with out:
        clear_output()
        print("Working...")
        input_file = list(uploader.value.values())[0]
        content = input_file['content']
        content = io.StringIO(content.decode('utf-8'))
        df = pd.read_csv(content, sep=" ", header = None)
        df = df.iloc[: , 4:]
        df = df.fillna(0)
        dataframe = df.astype(str) 
        dataframe = dataframe[~dataframe[5].str.contains("?", regex=False)]
        dataframe = dataframe.astype(float)
        dataframe = dataframe.astype(int)
        dataframe = dataframe.where(dataframe > 9999, np.nan)
        dataframe_final = dataframe.apply(lambda x: pd.Series(x.dropna().to_numpy()), axis=1)
        dataframe_final = dataframe_final.set_axis(dataframe.columns[:dataframe_final.shape[1]], axis=1).reindex(dataframe.columns, axis=1)
        dataframe_final.dropna(axis=1, how='all', inplace = True)
        dataframe_final[dataframe_final.columns[1]] = dataframe_final[dataframe_final.columns.values[1]]/10
        dataframe_final.columns = ["Time", "Magnetic_Readings"]

        dataframe_final["Time"] = dataframe_final["Time"].astype(str)
        for i in range(len(dataframe_final)):
            dataframe_final["Time"].values[i] = dataframe_final["Time"].values[i][:-2]
            dataframe_final["Time"].values[i] = getSec(dataframe_final["Time"].values[i])

        dataframe_final["Time"] = dataframe_final["Time"].astype(int)
        df_time = pd.read_csv("https://raw.githubusercontent.com/pc3541/Magnetic-Readings-Data-Visualization/main/Time.csv")
        df_merged = pd.merge(df_time, dataframe_final, how='left')
        
        input_file2 = list(uploader2.value.values())[0]
        content2 = input_file2['content']
        content2 = io.StringIO(content2.decode('utf-8'))
        df2 = pd.read_csv(content2, sep=" ", header = None)
        df2 = df2.iloc[: , 4:]
        df2 = df2.fillna(0)
        dataframe2 = df2.astype(str) 
        dataframe2 = dataframe2[~dataframe2[5].str.contains("?", regex=False)]
        dataframe2 = dataframe2.astype(float)
        dataframe2 = dataframe2.astype(int)
        dataframe2 = dataframe2.where(dataframe2 > 9999, np.nan)
        dataframe_final2 = dataframe2.apply(lambda x: pd.Series(x.dropna().to_numpy()), axis=1)
        dataframe_final2 = dataframe_final2.set_axis(dataframe2.columns[:dataframe_final2.shape[1]], axis=1).reindex(dataframe2.columns, axis=1)
        dataframe_final2.dropna(axis=1, how='all', inplace = True)
        dataframe_final2[dataframe_final2.columns[1]] = dataframe_final2[dataframe_final2.columns.values[1]]/10
        dataframe_final2.columns = ["Time", "Magnetic_Readings"]

        dataframe_final2["Time"] = dataframe_final2["Time"].astype(str)
        for i in range(len(dataframe_final2)):
            dataframe_final2["Time"].values[i] = dataframe_final2["Time"].values[i][:-2]
            dataframe_final2["Time"].values[i] = getSec(dataframe_final2["Time"].values[i])

        dataframe_final2["Time"] = dataframe_final2["Time"].astype(int)
        df_merged2 = pd.merge(df_time, dataframe_final2, how='left')

        dataframe_final["Gradients"] = abs((dataframe_final["Magnetic_Readings"].rolling(gradient_denominator.value*10).apply(calc_slope))*(gradient_denominator.value*60))
        df_merged_slopes = pd.merge(df_merged, dataframe_final, how='left')
        plt.figure(figsize=(20,4))
        plt.scatter(df_merged_slopes["Time"], df_merged_slopes["Gradients"], 0.25, "black")
        plt.xlabel("Time (sec)")
        plt.ylabel("Gradient (nT/" + str(gradient_denominator.value) + " min)")
        plt.title("Gradients vs. Time")
        plt.axhline(y=gradient_numerator.value, color='r', linestyle='-', label=("Threshold: " + str(gradient_numerator.value) + " nt/" + str(gradient_denominator.value) + " min"))
        plt.legend(loc = 'upper left')
        plt.show()

        dataframe_final["600s Chord"] = abs(dataframe_final['Magnetic_Readings'].rolling(100, center=True).apply(lambda x: x.iloc[0]+x.iloc[-1]))/2
        df_merged_chord = pd.merge(df_merged, dataframe_final, how="left")
        df_merged_chord["Variation From 600s Chord"] = abs(df_merged_chord['Magnetic_Readings'] - df_merged_chord["600s Chord"])
        plt.figure(figsize=(20,4))
        plt.scatter(df_merged_chord["Time"], df_merged_chord["Variation From 600s Chord"], 0.25, "black")
        plt.xlabel("Time (sec)")
        plt.ylabel("Variation From 600s Chord (nT)")
        plt.title("Variation From 600s Chord vs. Time")
        plt.axhline(y=variation_from_chord.value, color='r', linestyle='-', label=("Threshold: " + str(variation_from_chord.value) + " nt"))
        plt.legend(loc = 'upper left')
        plt.show()

        aberrant = pd.DataFrame(columns=df_merged_chord.columns)
        cond = df_merged_chord["Variation From 600s Chord"] > variation_from_chord.value
        rows = df_merged.loc[cond, :]
        aberrant = pd.concat([aberrant, rows], ignore_index=True)
        cond = df_merged_slopes["Gradients"] > gradient_numerator.value
        rows = df_merged.loc[cond, :]
        aberrant = pd.concat([aberrant, rows], ignore_index=True)

        if (df_merged_slopes["Gradients"] > gradient_numerator.value).any() == True or (df_merged_chord["Variation From 600s Chord"] > variation_from_chord.value).any() == True:
            plt.figure(figsize=(20,4))
            plt.scatter(df_merged["Time"], df_merged["Magnetic_Readings"], 0.25, "black")
            plt.scatter(aberrant["Time"], aberrant["Magnetic_Readings"], 0.25, "red")
            plt.xlabel("Time (sec)")
            plt.ylabel("Magnetic Readings (nT)")
            plt.title("Magnetic Readings vs. Time")
            plt.show()
        else:
            plt.figure(figsize=(20,4))
            plt.scatter(df_merged["Time"], df_merged["Magnetic_Readings"], 0.25, "black", label="Sample A")
            plt.scatter(df_merged2["Time"], df_merged2["Magnetic_Readings"], 0.25, "grey", label="Sample B")
            plt.xlabel("Time (sec)")
            plt.ylabel("Magnetic Readings (nT)")
            plt.title("Magnetic Readings vs. Time")
            plt.legend(loc = 'upper left')
            plt.show()

run_button.on_click(run)