In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from random import randint
import plotly.express as px
import os
import re
import plotly.graph_objects as go

In [None]:
data_path = "/home/lucymartin/Documents/kiln/high_freq_dat/"
# create data with double header for firing no. and location of measurement
data_files = []
# search for files at the data location and create the full file path
for file in os.listdir(data_path):
    if file.endswith(".CSV"):
        data_files.append(os.path.join(data_path, file))

In [None]:
# sort files in the correct order for ease as data fruns over from one file to the next
def atoi(text):
    return int(text) if text.isdigit() else text
def natural_keys(text):
    '''
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    '''
    return [ atoi(c) for c in re.split(r'(\d+)', text) ]
data_files.sort(key = natural_keys)

In [None]:
print(data_files)

In [None]:
# sort out the data, annoyingly in some of the files there is one of the column headers missing, this 
# shifts everything over and causes the data to be read incorrectly. 
pdlist = []
for file in data_files:
    print(file)
    # header is the first column in the csv file
    headers = pd.read_csv(file, nrows=0).columns
    if len(headers) > 12:
        # correct header
        data = pd.read_csv(file, header=None, index_col = False, skiprows = 1, names=headers)
    else:
        # incorrect header - need to modify.
        headers = np.array(headers)
        headers = np.insert(np.array(headers), 7, "Model")
        data = pd.read_csv(file, header=None, index_col = False, skiprows = 1, names=headers)
    pdlist.append(data)

In [None]:
pdlist

In [None]:
# concat the dataframes from each datafile
total_df  = pd.concat(pdlist)
total_df = total_df.reset_index()
# format data so that it's easier to deal with
total_df["Temp C"] =pd.to_numeric(total_df["Temp C"], errors='coerce')
total_df["mm"] = total_df["mm"].map("{:02}".format)
total_df["mm.1"] = total_df["mm.1"].map("{:02}".format)
total_df["hh"] = total_df["hh"].map("{:02}".format)
total_df["dd"] = total_df["dd"].map("{:02}".format)
total_df["ss"]= total_df["ss"].map("{:06.3f}".format)
total_df["date_time_string"] = total_df["mm"].astype("string") + total_df["dd"].astype("string") + total_df["hh"].astype("string") + total_df["mm.1"].astype("string") + total_df["ss"].astype("string")
total_df.rename(columns={'mm': 'month', 'dd': 'day', 'hh': 'hour', 'mm.1': 'minute', 'ss': 'second', 'yyyy': 'year'}, inplace=True)
total_df["date_time"] = pd.to_datetime(total_df[["year", "month", "day", "hour", "minute", "second"]])
total_df = total_df.drop(columns = ["index"])

In [None]:
total_df.head()

In [None]:
# way too many data points to plot. take a first look at the data every 10000 data points.
total_df_to_plot = total_df.iloc[::10000, :]

In [None]:
px.scatter(total_df_to_plot, y = "Temp C", x = "date_time")

Now really want to split the data into a dataframe for each firing. 

In [None]:
firing_1 = total_df.loc[(total_df["date_time"] <"2020-08-22 17:12:50.517")]
firing_2 = total_df.loc[(total_df["date_time"] <"2020-08-30 17:12:50.517") & (total_df["date_time"] >"2020-08-22 17:12:50.517")]
firing_3 = total_df.loc[(total_df["date_time"] >"2020-08-30 17:12:50.517")]

In [None]:
firing_2 = firing_2.reset_index()
firing_3 = firing_3.reset_index()

In [None]:
# find index where firing first crosses 1100
crossing_index = firing_1.loc[(firing_1["Temp C"]>1100)].index[0]
crossing_index2 = firing_2.loc[(firing_2["Temp C"]>1100)].index[0]
crossing_index3 = firing_3.loc[(firing_3["Temp C"]>1100)].index[0]

In [None]:
# take 2200 data points after the threshold is first crossed and every third data point
above_df_1 = firing_1.iloc[crossing_index: crossing_index+2200]#.iloc[::3, :]
above_df_2 = firing_2.iloc[crossing_index2: crossing_index2+2200]#.iloc[::3, :]
above_df_3 = firing_3.iloc[crossing_index3: crossing_index3+2200]#.iloc[::3, :]

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = above_df_1["date_time"], y = above_df_1["Temp C"]))
fig.update_layout(title = "Firing 1, 15 minutes after crossing 1100", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = above_df_2["date_time"], y = above_df_2["Temp C"]))
fig.update_layout(title = "Firing 2, 15 minutes after crossing 1100", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = above_df_3["date_time"], y = above_df_3["Temp C"]))
fig.update_layout(title = "Firing 3, 15 minutes after crossing 1100", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
# find index where firing first crosses 800
crossing_index_800 = firing_1.loc[(firing_1["Temp C"]>800)].index[0]
crossing_index2_800 = firing_2.loc[(firing_2["Temp C"]>800)].index[0]
# take 2nd index above 800 here - brief temp blip early on gave a 800 value.
crossing_index3_800 = firing_3.loc[(firing_3["Temp C"]>800)].index[1]
# take 2200 data points after the threshold is first crossed and every third data point
above_df_1_800 = firing_1.iloc[crossing_index_800: crossing_index_800+2200]#.iloc[::3, :]
above_df_2_800 = firing_2.iloc[crossing_index2_800: crossing_index2_800+2200]#.iloc[::3, :]
above_df_3_800 = firing_3.iloc[crossing_index3_800: crossing_index3_800+2200]#.iloc[::3, :]

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = above_df_1_800["date_time"], y = above_df_1_800["Temp C"]))
fig.update_layout(title = "Firing 1, 15 minutes after crossing 800", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = above_df_2_800["date_time"], y = above_df_2_800["Temp C"]))
fig.update_layout(title = "Firing 2, 15 minutes after crossing 800", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = above_df_3_800["date_time"], y = above_df_3_800["Temp C"]))
fig.update_layout(title = "Firing 3, 15 minutes after crossing 800", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y = above_df_1_800["Temp C"]))
fig.add_trace(go.Scatter(y = above_df_2_800["Temp C"]))
fig.add_trace(go.Scatter(y = above_df_3_800["Temp C"]))
fig.update_layout(title = "Firing 1, 2, and 3, 15 minutes after crossing 800", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y = above_df_1["Temp C"]))
fig.add_trace(go.Scatter(y = above_df_2["Temp C"]))
fig.add_trace(go.Scatter(y = above_df_3["Temp C"]))
fig.update_layout(title = "Firing 1, 2, and 3, 15 minutes after crossing 1100", xaxis_title = "Date and time", yaxis_title = "Temperature (°C)")
fig.show()

Want a way to look at what's actually going on in these regions, fourier transform seems the best choice.

In [None]:
from scipy.fft import fft, ifft, fftfreq

In [None]:
firing_1_above_1100 = firing_1.iloc[crossing_index: crossing_index+2200]["Temp C"].to_numpy()
# number of samples
N_1_above_1100 = len(firing_1.iloc[crossing_index: crossing_index+2200]["Temp C"].to_numpy())
# sample spacing
T_1_above_1100 = float(firing_1.iloc[crossing_index: crossing_index+2200]["date_time"].to_numpy()[1]\
    - firing_1.iloc[crossing_index: crossing_index+2200]["date_time"].to_numpy()[0])/1000000000
#print(T_1_above_1100)
yf_1_above_1100 = fft(firing_1_above_1100)
xf_1_above_1100 = fftfreq(N_1_above_1100, T_1_above_1100)[:N_1_above_1100//2]

firing_2_above_1100 = firing_2.iloc[crossing_index2: crossing_index2+2200]["Temp C"].dropna().to_numpy()
# number of samples
N_2_above_1100 = len(firing_2.iloc[crossing_index2: crossing_index2+2200]["Temp C"].to_numpy())
# sample spacing
T_2_above_1100 = float(firing_2.iloc[crossing_index2: crossing_index2+2200]["date_time"].to_numpy()[1]\
    - firing_2.iloc[crossing_index2: crossing_index2+2200]["date_time"].to_numpy()[0])/1000000000
#print(T_1_above_1100)
yf_2_above_1100 = fft(firing_2_above_1100)
xf_2_above_1100 = fftfreq(N_2_above_1100, T_2_above_1100)[:N_2_above_1100//2]

firing_3_above_1100 = firing_3.iloc[crossing_index3: crossing_index3+2200]["Temp C"].to_numpy()
# number of samples
N_3_above_1100 = len(firing_3.iloc[crossing_index3: crossing_index3+2200]["Temp C"].to_numpy())
# sample spacing
T_3_above_1100 = float(firing_3.iloc[crossing_index3: crossing_index3+2200]["date_time"].to_numpy()[1]\
    - firing_3.iloc[crossing_index3: crossing_index3+2200]["date_time"].to_numpy()[0])/1000000000
#print(T_1_above_1100)
yf_3_above_1100 = fft(firing_3_above_1100)
xf_3_above_1100 = fftfreq(N_3_above_1100, T_3_above_1100)[:N_3_above_1100//2]

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = xf_1_above_1100, y = 2.0/N_1_above_1100 * np.abs(yf_1_above_1100[0:N_1_above_1100//2])))
fig.add_trace(go.Scatter(x = xf_2_above_1100, y = 2.0/N_2_above_1100 * np.abs(yf_2_above_1100[0:N_2_above_1100//2])))
fig.add_trace(go.Scatter(x = xf_3_above_1100, y = 2.0/N_3_above_1100 * np.abs(yf_3_above_1100[0:N_3_above_1100//2])))
fig.update_layout(title = "FFT")
fig.show()

In [None]:
no_samples = 2200
firing_1_above_800 = firing_1.iloc[crossing_index_800: crossing_index_800+no_samples]["Temp C"].to_numpy()
# number of samples
N_1_above_800 = len(firing_1.iloc[crossing_index_800: crossing_index_800+no_samples]["Temp C"].to_numpy())
# sample spacing
T_1_above_800 = float(firing_1.iloc[crossing_index_800: crossing_index_800+no_samples]["date_time"].to_numpy()[1]\
    - firing_1.iloc[crossing_index_800: crossing_index_800+no_samples]["date_time"].to_numpy()[0])/1000000000
#print(T_1_above_800)
yf_1_above_800 = fft(firing_1_above_800)
xf_1_above_800 = fftfreq(N_1_above_800, T_1_above_800)[:N_1_above_800//2]

firing_2_above_800 = firing_2.iloc[crossing_index2_800: crossing_index2_800+no_samples]["Temp C"].dropna().to_numpy()
# number of samples
N_2_above_800 = len(firing_2.iloc[crossing_index2_800: crossing_index2_800+no_samples]["Temp C"].to_numpy())
# sample spacing
T_2_above_800 = float(firing_2.iloc[crossing_index2_800: crossing_index2_800+no_samples]["date_time"].to_numpy()[1]\
    - firing_2.iloc[crossing_index2_800: crossing_index2_800+no_samples]["date_time"].to_numpy()[0])/1000000000
#print(T_1_above_800)
yf_2_above_800 = fft(firing_2_above_800)
xf_2_above_800 = fftfreq(N_2_above_800, T_2_above_800)[:N_2_above_800//2]

firing_3_above_800 = firing_3.iloc[crossing_index3_800: crossing_index3_800+no_samples]["Temp C"].to_numpy()
# number of samples
N_3_above_800 = len(firing_3.iloc[crossing_index3_800: crossing_index3_800+no_samples]["Temp C"].to_numpy())
# sample spacing
T_3_above_800 = float(firing_3.iloc[crossing_index3_800: crossing_index3_800+no_samples]["date_time"].to_numpy()[1]\
    - firing_3.iloc[crossing_index3_800: crossing_index3_800+no_samples]["date_time"].to_numpy()[0])/1000000000
#print(T_1_above_800)
yf_3_above_800 = fft(firing_3_above_800)
xf_3_above_800 = fftfreq(N_3_above_800, T_3_above_800)[:N_3_above_800//2]

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = xf_1_above_800, y = 2.0/N_1_above_800 * np.abs(yf_1_above_800[0:N_1_above_800//2])))
fig.add_trace(go.Scatter(x = xf_2_above_800, y = 2.0/N_2_above_800 * np.abs(yf_2_above_800[0:N_2_above_800//2])))
fig.add_trace(go.Scatter(x = xf_3_above_800, y = 2.0/N_3_above_800 * np.abs(yf_3_above_800[0:N_3_above_800//2])))
fig.update_layout(title = "FFT")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y =firing_2["Temp C"]))
fig.add_trace(go.Scatter(y =firing_3["Temp C"]))
fig.update_layout(title = "Comparison of firings 2 and 3", xaxis_title = "Data point", yaxis_title = "Temperature (°C)")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y =firing_2["Temp C"], x = firing_2["date_time"]))
fig.update_layout(title = "Firing 2 with date_time", xaxis_title = "Data point", yaxis_title = "Temperature (°C)")
fig.show()