This would be an explanation of how the data would be clearly understood and analysed for anomaly detection and time remaining to reach an anomaly.

1. Data compilation and new feature creation

In [None]:
import pandas as pd
import numpy as np
import os

##Path to obtain file
def create_compiled(path):
    ## List down all file name that store data
    files = os.listdir(path)

    ##Create a second dataframe to create relational databases. The second database is to store the information obtained from the file name.
    date_frame = pd.DataFrame(columns=["date_key",'month', 'day', 'hour', 'sample_Number', 'mode'])

    ## Main data frame that will store the cutter and film data.
    mainframe = pd.DataFrame()

    ## Loop through every file in path stored
    for count, file in enumerate(files):

        ## Read csv file at file
        file_directory = pd.read_csv("{}/{}".format(path, file))

        ## Create date key column as the key to date relational database to reduce space taken by main database.
        file_directory["Date_key"] = count

        ## Create column that calculates the change of poisiton from previous row
        file_directory["Cutter_Change_of_position"]=file_directory["pCut::CTRL_Position_controller::Actual_position"].diff()
        file_directory["SvolFilm_Change_of_position"] = file_directory["pSvolFilm::CTRL_Position_controller::Actual_position"].diff()

        ## Create column that calculates the change of speed from previous row
        file_directory["Cutter_Change_of_speed"]=file_directory["pCut::CTRL_Position_controller::Actual_speed"].diff()
        file_directory["SvolFilm_Change_of_speed"] = file_directory["pSvolFilm::CTRL_Position_controller::Actual_speed"].diff()

        ## Create column that calculates the change of position speed from previous row
        file_directory["Cutter_position_to_speed_ratio"] = file_directory["Cutter_Change_of_position"] / file_directory["pCut::CTRL_Position_controller::Actual_speed"]
        file_directory["SvolFilm_position_to_speed_ratio"] = file_directory["SvolFilm_Change_of_position"] / file_directory["pSvolFilm::CTRL_Position_controller::Actual_speed"]

        ## Make the first row of the following columns into zero as there are no comparison with previous data.
        file_directory.loc[0,"Cutter_Change_of_position"]=0
        file_directory.loc[0, "SvolFilm_Change_of_position"] = 0
        file_directory.loc[0,"Cutter_Change_of_speed"]=0
        file_directory.loc[0, "SvolFilm_Change_of_speed"] = 0
        file_directory.loc[0, "Cutter_position_to_speed_ratio"] = 0
        file_directory.loc[0, "SvolFilm_position_to_speed_ratio"] = 0

        ## Create data that calculate the change of position for each run. This reduces error that are created by large position that have been in place since the start of each run.
        file_directory["Cutter_cumulative_position"]=file_directory["pCut::CTRL_Position_controller::Actual_position"]-file_directory.loc[0,"pCut::CTRL_Position_controller::Actual_position"]
        file_directory["Svolfilm_cumulative_position"]=file_directory["pSvolFilm::CTRL_Position_controller::Actual_position"]-file_directory.loc[0,"pSvolFilm::CTRL_Position_controller::Actual_position"]

        ## Second relational database that stores the file name information.
        date_frame = date_frame.append(
            pd.Series([count,file[:2], file[3:5], file[6:8], file[13:16], file[21]], index=date_frame.columns),
            ignore_index=True)

        ## Check if main data frame is empty, if empty main pandas data frame's content is equal to the file currently explored, if not append to the main data frame
        if mainframe.empty:
            mainframe = file_directory
        else:
            mainframe = mainframe.append(file_directory)

    ## If path does not exist, create path
    if not os.path.exists("compiled_df"):
        os.makedirs("compiled_df")

    mainframe.to_csv("compiled_df/One year compiled.csv")
    date_frame.to_csv("compiled_df/date_data.csv")

##Call function to create file
create_compiled("one-year-industrial-component-degradation")

The following code is used to show the results of compiled data

In [None]:
import os
import sklearn
import pandas as pd
import scipy as sp
import numpy as np
import pandasql

Main_data=pd.read_csv("compiled_df/One year compiled.csv")
Date_data=pd.read_csv("compiled_df/date_data.csv")

##Pandassql query to use relational databse query
Query='''
SELECT  Main_data.timestamp,
        Main_data."pCut::Motor_Torque",
        Main_data."pCut::CTRL_Position_controller::Lag_error",
        Main_data."pCut::CTRL_Position_controller::Actual_position",
        Main_data."pCut::CTRL_Position_controller::Actual_speed",
        Main_data."pSvolFilm::CTRL_Position_controller::Actual_position",
        Main_data."pSvolFilm::CTRL_Position_controller::Actual_speed",
        Main_data."pSvolFilm::CTRL_Position_controller::Lag_error",
        Main_data."pSpintor::VAX_speed",
        Date_data.month 
        FROM Main_data 
        INNER JOIN Date_data ON Main_data.Date_key=Date_data.date_key 
        
'''
QUery_ans=pandasql.sqldf(Query,locals())
pd.set_option('display.max_columns', None)
print(QUery_ans.describe())


In [None]:
 timestamp  pCut::Motor_Torque  \
count  1.062912e+06        1.062912e+06   
mean   4.102069e+00       -1.206338e-01   
std    2.364827e+00        6.078708e-01   
min    4.000000e-03       -6.560303e+00   
25%    2.056000e+00       -3.696310e-01   
50%    4.104000e+00       -1.187128e-01   
75%    6.152000e+00        2.546913e-01   
max    8.199999e+00        3.856873e+00   

       pCut::CTRL_Position_controller::Lag_error  \
count                               1.062912e+06   
mean                               -5.472746e-05   
std                                 1.212122e-01   
min                                -1.888258e+00   
25%                                -2.201461e-02   
50%                                 6.456900e-04   
75%                                 2.380830e-02   
max                                 2.021531e+00   

       pCut::CTRL_Position_controller::Actual_position  \
count                                     1.062912e+06   
mean                                      3.371415e+08   
std                                       5.466868e+08   
min                                      -2.039056e+09   
25%                                       8.626082e+07   
50%                                       1.919407e+08   
75%                                       5.691639e+08   
max                                       1.911789e+09   

       pCut::CTRL_Position_controller::Actual_speed  \
count                                  1.062912e+06   
mean                                   1.945795e+03   
std                                    4.873922e+03   
min                                   -9.482574e+03   
25%                                   -1.908875e+03   
50%                                    2.595520e+03   
75%                                    5.441666e+03   
max                                    1.792831e+04   

       pSvolFilm::CTRL_Position_controller::Actual_position  \
count                                       1.062912e+06      
mean                                        1.488169e+08      
std                                         2.711355e+08      
min                                         1.936250e+05      
25%                                         2.136702e+07      
50%                                         7.259557e+07      
75%                                         1.602507e+08      
max                                         1.453671e+09      

       pSvolFilm::CTRL_Position_controller::Actual_speed  \
count                                       1.062912e+06   
mean                                        5.367032e+03   
std                                         3.382194e+03   
min                                        -2.013385e+01   
25%                                         3.032669e+03   
50%                                         4.582979e+03   
75%                                         5.823731e+03   
max                                         1.797202e+04   

       pSvolFilm::CTRL_Position_controller::Lag_error  pSpintor::VAX_speed  \
count                                    1.062912e+06         1.062912e+06   
mean                                     9.847591e-01         1.927328e+03   
std                                      3.437204e-01         6.559047e+02   
min                                     -9.143658e-01         0.000000e+00   
25%                                      8.384416e-01         1.500000e+03   
50%                                      9.711797e-01         1.800000e+03   
75%                                      1.105926e+00         2.280000e+03   
max                                      3.567880e+00         3.600000e+03   

              month  
count  1.062912e+06  
mean   5.271676e+00  
std    3.505212e+00  
min    1.000000e+00  
25%    2.000000e+00  
50%    4.000000e+00  
75%    8.000000e+00  
max    1.200000e+01  


The following code and results would explore the data to clearly understand the relationship between each data