# Predictive maintenance of Lathe machine

#### Importing important libraries and modules

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from scipy import signal

#### Importing Vibration Data and converting it to proper Time-series format.

In [2]:
#Experiment details
exp = pd.read_excel("Data/Experiment Summary.xlsx") 

In [12]:
dataframes = []  # To store the imported data from each file

for i in range(1, 61):
    file = f"Data/{i}.xlsx"
    df = pd.read_excel(file)  # Use pd.read_excel() for Excel files
    df = df.dropna(axis='columns', how='all')  
    df = df.dropna(axis='rows', how='all') 
    df.columns = ['Time', 'X', 'Y', 'Z']
    df = df.iloc[1:]  # Exclude the original header row from the data
    df['Time'] = pd.to_datetime(df['Time'], unit='s').dt.time  # Convert 'Time' column to datetime

    # Add experiment details from exp dataframe based on experiment number
    experiment_number = i  # Or any other way to determine the experiment number
    experiment_row = exp[exp['Experiment'] == experiment_number]
    if not experiment_row.empty:
        for column in experiment_row.columns[1:]:
            df[column] = experiment_row[column].values[0]

    dataframes.append(df)
    print(i)  # To keep an eye on progress


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


In [63]:
dataframes[9]

Unnamed: 0,Time,X,Y,Z,Rpm,Feed,Depth,Ra
1,00:00:00,-2.261678,-5.450551,1.244353,290,0.14,1.0,5.486
2,00:00:00.000976,0.824355,3.769528,2.168082,290,0.14,1.0,5.486
3,00:00:00.001953,-1.609855,5.30498,5.369853,290,0.14,1.0,5.486
4,00:00:00.002929,1.306778,-0.170362,-0.441178,290,0.14,1.0,5.486
5,00:00:00.003906,-2.342696,-1.256594,-4.986221,290,0.14,1.0,5.486
...,...,...,...,...,...,...,...,...
62972,00:01:01.495000,-0.961714,0.720716,0.508313,290,0.14,1.0,5.486
62973,00:01:01.496000,-1.628268,0.591841,-2.395362,290,0.14,1.0,5.486
62974,00:01:01.497000,-3.325955,-3.786224,-0.573665,290,0.14,1.0,5.486
62975,00:01:01.498000,-1.075875,1.007923,1.336358,290,0.14,1.0,5.486


In [20]:
merged_df = pd.concat(dataframes, ignore_index=True)

In [58]:
merged_df['X'] = pd.to_numeric(merged_df['X'], errors='coerce')
merged_df['Y'] = pd.to_numeric(merged_df['Y'], errors='coerce')
merged_df['Z'] = pd.to_numeric(merged_df['Z'], errors='coerce')

In [59]:
merged_df['Magnitude'] = np.sqrt(merged_df['X']**2 + merged_df['Y']**2 + merged_df['Z']**2)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

# Step 1: Prepare the Data
# Assuming X and y contain the features and target variable from the provided data
# Convert 'Time' column to numerical representation
X['Time'] = [t.hour * 3600 + t.minute * 60 + t.second + t.microsecond / 1e6 for t in X['Time']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 2: Define the CNN Model
model = Sequential()
model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

# Step 3: Train the CNN Model
model.fit(np.expand_dims(X_train_scaled, axis=-1), y_train, epochs=10, batch_size=32)

# Step 4: Evaluate the CNN Model
loss, mae = model.evaluate(np.expand_dims(X_test_scaled, axis=-1), y_test)
print('Mean Squared Error:', loss)
print('Mean Absolute Error:', mae)

# Make predictions
predictions = model.predict(np.expand_dims(X_test_scaled, axis=-1))


In [None]:
from sklearn.model_selection import train_test_split

X = merged_df.drop("Ra", axis=1) 
y = merged_df["Ra"]  # Target 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [40]:
X_train


Unnamed: 0,Time,X,Y,Z,Rpm,Feed,Depth
1784024,00:00:04.730500,0.610763,-1.197679,-1.122014,290,0.570,0.6
1030137,00:00:16.499000,9.10288,10.732455,-5.869468,700,0.140,0.4
1819355,00:00:03.735400,2.010158,3.625924,-2.329119,290,0.570,1.0
685038,00:00:21.988000,-0.685518,6.483265,3.938257,290,0.140,0.8
740965,00:00:12.604000,21.844737,28.797035,-7.330507,290,0.140,1.0
...,...,...,...,...,...,...,...
259178,00:00:56.604000,-3.174967,-1.26764,0.199177,190,0.140,0.6
1414414,00:00:16.270000,20.058667,-0.280826,-5.070866,290,0.285,1.0
131932,00:00:32.340000,-2.040721,2.6207,0.375826,190,0.140,0.4
671155,00:00:08.430700,-0.464561,4.852077,3.80209,290,0.140,0.8


In [None]:
dataframes = []  # To store the imported data from each file

for i in range(1, 61):
    file = f"Data/{i}.xlsx"
    df = pd.read_excel(file)  # Use pd.read_excel() for Excel files
    df = df.dropna(axis='columns', how='all')  
    df = df.dropna(axis='rows', how='all') 
    df.columns = ['Time', 'X', 'Y', 'Z']
    df = df.iloc[1:]  # Exclude the original header row from the data
    df['Time'] = pd.to_datetime(df['Time'], unit='s').dt.time  # Convert 'Time' column to datetime
    dataframes.append(df)
    print(i) #too keep an eye on progress


In [None]:
for i in dataframes:
    d['X'] = pd.to_numeric(d['X'], errors='coerce')
    d['Y'] = pd.to_numeric(d['Y'], errors='coerce')
    d['Z'] = pd.to_numeric(d['Z'], errors='coerce')
    

#### Feature Extraction

In [None]:
for d in dataframes:
    d['Magnitude'] = np.sqrt(d['X']**2 + d['Y']**2 + d['Z']**2)

In [None]:
#view imported data.

for i in range(60):
    print(i)
    print(dataframes[i].head(5), dataframes[i].shape)

In [None]:
# Time_domain plot


def time_domain(num):
    df = dataframes[num]

    fig = px.line(df, x='Time', y=['Magnitude'], title='Vibration Sensor Data', labels={'value':'acceleration'})
    
    fig.update_layout(
        height=600,
        showlegend=True,
        paper_bgcolor= 'darkgrey')
    fig.show()
    

    
# Calculating PSD

def calculate_psd(dataframe, fs=1000):
    time = dataframe['Time']
    x = dataframe['Magnitude']

    time_seconds = [(t.hour * 3600 + t.minute * 60 + t.second + t.microsecond / 1e6) for t in time]

    window = np.hanning(len(time_seconds))
    x_windowed = x * window
 

    f, psd_x = signal.periodogram(x_windowed, fs)


    return f, psd_x



In [None]:
#Plotting PSD


def freq_domain(frequencies, psd_x):
    # Convert complex PSD values to magnitude
    psd_x_mag = np.abs(psd_x)

    # Create line plots for X, Y, and Z axes
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=frequencies, y=psd_x_mag, mode='lines', name='Magnitude'))

    fig.update_layout(
        title='Power Spectral Density',
        xaxis=dict(title='Frequency'),
        yaxis=dict(title='acceleration', type='log', range=[np.log10(1), np.log10(max(psd_x_mag))]),
        showlegend=True,
        paper_bgcolor= 'darkgrey',
        height=600


    )

    fig.show()




In [None]:

# Calculating PSD

from scipy import signal
from scipy.signal import welch

def calculate_psd(dataframe, fs=1000, w=1):
    time = dataframe['Time']
    x = dataframe['X']
    y = dataframe['Y']
    z = dataframe['Z']

    # Convert time values to seconds
    time_seconds = [(t.hour * 3600 + t.minute * 60 + t.second + t.microsecond / 1e6) for t in time]

    # Apply Hanning window function
    window = np.hanning(len(time_seconds))
    x_windowed = x * window
    y_windowed = y * window
    z_windowed = z * window

    # Calculate PSD using periodogram
    f, psd_x = welch(x_windowed, fs, nperseg=len(time_seconds)/w)
    _, psd_y = welch(y_windowed, fs, nperseg=len(time_seconds)/w)
    _, psd_z = welch(z_windowed, fs, nperseg=len(time_seconds)/w)

    return f, psd_x, psd_y, psd_z



In [None]:
# Time_domain plot


def time_domain(num):
    df = dataframes[num]

    # Create a line plot using Plotly
    fig = px.line(df, x='Time', y=['X', 'Y', 'Z'], title='Vibration Sensor Data', labels={'value':'acceleration'})
    
    # Display the plot
    fig.update_layout(
        height=600,
        showlegend=True,
        paper_bgcolor= 'darkgrey')
    fig.show()
    


def freq_domain(frequencies, psd_x, psd_y, psd_z):
    # Convert complex PSD values to magnitude
    psd_x_mag = np.abs(psd_x)
    psd_y_mag = np.abs(psd_y)
    psd_z_mag = np.abs(psd_z)

    # Create line plots for X, Y, and Z axes
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=frequencies, y=psd_x_mag, mode='lines', name='X'))
    fig.add_trace(go.Scatter(x=frequencies, y=psd_y_mag, mode='lines', name='Y'))
    fig.add_trace(go.Scatter(x=frequencies, y=psd_z_mag, mode='lines', name='Z'))

    fig.update_layout(
        title='Power Spectral Density',
        xaxis=dict(title='Frequency'),
        yaxis=dict(title='acceleration'),
        showlegend=True,
        paper_bgcolor= 'darkgrey',
        height=600


    )

    fig.show()

### Time Domain, Frequency Domain and time vs frequency charts:

##### RPM: 190

In [None]:
from scipy.signal import find_peaks

# Calling the function with the DataFrames
for j in range(5):
    for i in range(j, 60, 20):
        print(exp.iloc[i])
        new = time_domain(i) 
        experiment, x, y, z = calculate_psd(dataframes[i], fs=500, w=32)
        plo = freq_domain(experiment, x, y, z)
        
        print()
    