# Importing packages

In [None]:
from scipy.stats import kurtosis
from scipy.signal import lfilter
#from nitime.algorithms.autoregressive import AR_est_YW
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append('..')
sys.path.append('../utils/')
from utils.EDA import*

In [None]:
plt.style.use('default')

plt.rcParams.update({
    'font.size': 20,
    'axes.linewidth': 2,
    'axes.titlesize': 20,
    'axes.edgecolor': 'black',
    'axes.labelsize': 18,
    'axes.grid': True,
    'lines.linewidth': 1.5,
    'lines.markersize': 6,
    'figure.figsize': (20, 8),
    'xtick.labelsize': 16,
    'ytick.labelsize': 16,
    'font.family': 'Times New Roman',
    'legend.fontsize': 13,
    'legend.framealpha': 0.8,
    'legend.edgecolor': 'black',
    'legend.shadow': False,
    'legend.fancybox': True,
    'legend.frameon': True,
})

In [None]:
path_to_dataset = "Datasets/M01_OP6_Features.parquet"
df = pd.read_parquet(path_to_dataset)

In [None]:
df.columns

# Visualizing the data after the FE step

### 1. Rolling Mean

In [None]:
columns_to_plot = ['X_Rolling Mean', 'Y_Rolling Mean', 'Z_Rolling Mean']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 2. Rolling Variance

In [None]:
columns_to_plot = ['X_Rolling Variance', 'Y_Rolling Variance', 'Z_Rolling Variance']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

In [None]:
plot_by_code_index_matplotlib(df, process='OP06', machine = 'M01',axis='Y_Rolling Variance', decimation_factor=1)

### 3. Rolling Median

In [None]:
columns_to_plot = ['X_Rolling Median', 'Y_Rolling Median', 'Z_Rolling Median']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 4. Rolling Std

In [None]:
columns_to_plot = ['X_Rolling Std', 'Y_Rolling Std', 'Z_Rolling Std']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 5. Rolling RMS

In [None]:
columns_to_plot = ['X_Rolling RMS', 'Y_Rolling RMS', 'Z_Rolling RMS']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 6. Rolling Impulse factor

In [None]:
columns_to_plot = ['X_Rolling Impulse Factor', 'Y_Rolling Impulse Factor', 'Z_Rolling Impulse Factor']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 7. Margin Factor

In [None]:
columns_to_plot = ['X_Rolling Margin Factor', 'Y_Rolling Margin Factor', 'Z_Rolling Margin Factor']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 8. Rolling Skewness

In [None]:
columns_to_plot = ['X_Rolling Skewness', 'Y_Rolling Skewness', 'Z_Rolling Skewness']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 8. Wavelet

In [None]:
columns_to_plot = ['X_D1', 'Y_D1', 'Z_D1']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

### 9. Jerk

In [None]:
columns_to_plot = ['X_Jerk', 'Y_Jerk', 'Z_Jerk']

# Call the function to visualize these columns
plot_selected_columns(df, process='OP06', machine='M01', columns_to_plot=columns_to_plot, decimation_factor=1)

In [None]:
# Visualy selected features - free-style

selected_columns = ['Time', 'Month', 'Year', 'Machine', 'Process', 'Label', 'Unique_Code',
       'Period', 'X_X_axis', 'X_Rolling Mean', 'X_Rolling Median',
       'X_Rolling Variance', 'X_Rolling Skewness', 'X_Rolling Impulse Factor',
       'X_Rolling Margin Factor', 'Y_Y_axis', 'Y_Rolling Mean', 'Y_Rolling Median',
       'Y_Rolling Variance', 'Y_Rolling Skewness','Y_Rolling Impulse Factor',
       'Y_Rolling Margin Factor', 'Z_Z_axis', 'Z_Rolling Mean',
       'Z_Rolling Median','Z_Rolling Variance', 'Z_Rolling Skewness', 
        'Z_Rolling Impulse Factor','Z_Rolling Margin Factor']


In [None]:
import plotly.express as px

def plot_scatter_matrix(df, machine, process, cols, sample_frac=0.05, random_state=0):
    """
    Plots a scatter matrix for specified columns for a given process and machine in a DataFrame,
    highlighting different 'Unique_Code' values.

    Parameters:
    - df: DataFrame containing the data.
    - machine: String representing the machine to filter by.
    - process: String representing the process to filter by.
    - cols: List of column names to include in the scatter matrix.
    - sample_frac: Fraction of the DataFrame to sample (default 0.05).
    - random_state: Seed for random number generation (default 0).
    """

    # Filter data by machine and process
    df_filtered = df[(df['Machine'] == machine) & (df['Process'] == process)]

    # Ensure only columns that exist in the DataFrame are used
    cols = [col for col in cols if col in df_filtered.columns]

    # Determine the column order for 'Unique_Code'
    unique_code_order = df_filtered['Unique_Code'].unique()

    # Create the scatter matrix
    fig = px.scatter_matrix(df_filtered.sample(frac=sample_frac, random_state=random_state),
                            dimensions=cols, color='Unique_Code',
                            category_orders={'Unique_Code': list(unique_code_order)})

    # Update layout
    fig.update_layout(width=1200, height=800, legend_title_font_size=22)

    # Update trace characteristics
    fig.update_traces(marker=dict(size=5), diagonal_visible=False, showupperhalf=False)

    # Display the figure
    fig.show()

In [None]:
selected_columns = ['X_axis', 'Y_axis', 'Z_axis']
plot_scatter_matrix(df, machine='M01', process='OP06', cols=selected_columns, sample_frac=0.2, random_state=42)