# OPC

In [None]:

# Install compatible versions
!pip install numpy==1.24.4  # Critical for CatBoost compatibility
!pip install catboost==1.2.7
!pip install tensorflow==2.12.0  # Optional (if you need TF)

# Force restart the runtime (essential!)
import os
os.kill(os.getpid(), 9)  # Or manually restart via Colab's UI

Collecting numpy==1.24.4
  Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pymc 5.21.2 requires numpy>=1.25.0, but you have numpy 1.24.4 which is incompatible.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 1.24.4 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.4 which is incompatible.
jax 0.5.2 requi

Collecting catboost==1.2.7
  Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7
Collecting tensorflow==2.12.0
  Downloading tensorflow-2.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow==2.12.0)
  Downloading gast-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting keras<2.13,>=2.12.0 (from tensorflow==2.12.0)
  Downloading keras-2.12.0-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting numpy<1.24,>=1.22 (from tensorflow==2.12.0)
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)


In [None]:
from catboost import CatBoostClassifier


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, LeakyReLU
from tensorflow.keras.optimizers import Adam
from sklearn.decomposition import PCA


In [None]:
df = pd.read_excel('CRACK DETECTION TRAIL FEB 2025.xlsx')

X = df.drop(columns=['Conductance At FAILURE']).values
y = df['Conductance At FAILURE'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

pca = PCA(n_components=0.95)

X_pca = pca.fit_transform(X)


In [None]:
initial_threshold = 0.7
Target_mean = df['Conductance At FAILURE'].mean()
Target_std = df['Conductance At FAILURE'].std()
print(f" At FAILURE Mean: {Target_mean}")
print(f"Conductance At FAILURE Deviation: {Target_std}")
adjusted_threshold = Target_mean + Target_std

y_binary = np.where(y > adjusted_threshold, 1, 0)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_binary, test_size=0.2, random_state=42)

In [None]:
input_dim = X_train.shape[1]
encoding_dim = 16

In [None]:
input_layer = Input(shape=(input_dim,))
x = Dense(64)(input_layer)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
x = Dropout(0.2)(x)
x = Dense(32)(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
encoded = Dense(encoding_dim, activation="linear")(x)  # Bottleneck layer

x = Dense(32)(encoded)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
x = Dense(64)(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
decoded = Dense(input_dim, activation="linear")(x)

In [None]:
autoencoder = Model(input_layer, decoded)
encoder = Model(input_layer, encoded)

In [None]:
autoencoder.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
autoencoder.fit(X_train, X_train, epochs=100, batch_size=16, shuffle=True, validation_data=(X_test, X_test))

In [None]:
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)

In [None]:
catboost_model = CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.03, loss_function='Logloss', verbose=100)
catboost_model.fit(X_train_encoded, y_train)

In [None]:
# prompt: classification table

from sklearn.metrics import classification_report, confusion_matrix

y_pred = catboost_model.predict(X_test_encoded)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


In [None]:
# prompt: roc curve

from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# ... (Your existing code) ...

y_pred_proba = catboost_model.predict_proba(X_test_encoded)[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


In [None]:

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Values', marker='o')
plt.plot(y_pred, label='Predicted Values', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Target Variable')
plt.title('Actual vs. Predicted Values')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# prompt: give cracked value and predict crack or not from the model

import numpy as np
# Assuming 'cracked_value' is a single data point or an array of data points
# that needs to be preprocessed (scaled and PCA-transformed) in the same way as the training data.


# cracked_value should have the same number of features as the original data (11 in this case)
cracked_value = np.array([[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1]]) # Example: Adjusted cracked_value with 11 features. Replace with your actual cracked value ensuring it has the correct number of features


# Preprocess the cracked value
cracked_value_scaled = scaler.transform(cracked_value)
cracked_value_pca = pca.transform(cracked_value_scaled)

# Encode using the trained encoder
cracked_value_encoded = encoder.predict(cracked_value_pca)

# Predict using the trained CatBoost model
prediction = catboost_model.predict(cracked_value_encoded)


# Interpretation
if prediction[0] == 1:
  print("The model predicts a crack.")
else:
  print("The model predicts no crack.")

In [None]:
# prompt: print classification matrix with labels\

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

# ... (Your existing code) ...

# Assuming y_test and y_pred are defined as before


cm = confusion_matrix(y_test, y_pred)

# Define class labels (replace with your actual labels)
class_names = ['No Crack', 'Crack']


fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
ax.figure.colorbar(im, ax=ax)

# Show all ticks and label them with the respective list entries
ax.set(xticks=np.arange(cm.shape[1]),
       yticks=np.arange(cm.shape[0]),
       xticklabels=class_names, yticklabels=class_names,
       title='Confusion Matrix',
       ylabel='True label',
       xlabel='Predicted label')

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
fmt = 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.show()


# PPC

In [None]:
df = pd.read_excel('PPC CRACK DETECTION 2025.xlsx')

X = df.drop(columns=['FAILURE']).values
y = df['FAILURE'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

pca = PCA(n_components=0.95)

X_pca = pca.fit_transform(X)


In [None]:
initial_threshold = 0.7
Target_mean = df['FAILURE'].mean()
Target_std = df['FAILURE'].std()
print(f" At FAILURE Mean: {Target_mean}")
print(f"Conductance At FAILURE Deviation: {Target_std}")
adjusted_threshold = Target_mean + Target_std

y_binary = np.where(y > adjusted_threshold, 1, 0)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_binary, test_size=0.2, random_state=42)

In [None]:
input_dim = X_train.shape[1]
encoding_dim = 16

In [None]:
input_layer = Input(shape=(input_dim,))
x = Dense(64)(input_layer)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
x = Dropout(0.2)(x)
x = Dense(32)(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
encoded = Dense(encoding_dim, activation="linear")(x)  # Bottleneck layer

x = Dense(32)(encoded)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
x = Dense(64)(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
decoded = Dense(input_dim, activation="linear")(x)

In [None]:
autoencoder = Model(input_layer, decoded)
encoder = Model(input_layer, encoded)

In [None]:
autoencoder.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
autoencoder.fit(X_train, X_train, epochs=100, batch_size=16, shuffle=True, validation_data=(X_test, X_test))

In [None]:
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)

In [None]:
catboost_model = CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.03, loss_function='Logloss', verbose=100)
catboost_model.fit(X_train_encoded, y_train)

In [None]:
# prompt: classification table

from sklearn.metrics import classification_report, confusion_matrix

y_pred = catboost_model.predict(X_test_encoded)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


In [None]:
# prompt: roc curve

from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# ... (Your existing code) ...

y_pred_proba = catboost_model.predict_proba(X_test_encoded)[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


In [None]:

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Values', marker='o')
plt.plot(y_pred, label='Predicted Values', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Target Variable')
plt.title('Actual vs. Predicted Values')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# prompt: give cracked value and predict crack or not from the model

import numpy as np
# Assuming 'cracked_value' is a single data point or an array of data points
# that needs to be preprocessed (scaled and PCA-transformed) in the same way as the training data.


# cracked_value should have the same number of features as the original data (11 in this case)
cracked_value = np.array([[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 7.8, 0.9, 1.0, 1.1]]) # Example: Adjusted cracked_value with 11 features. Replace with your actual cracked value ensuring it has the correct number of features


# Preprocess the cracked value
cracked_value_scaled = scaler.transform(cracked_value)
cracked_value_pca = pca.transform(cracked_value_scaled)

# Encode using the trained encoder
cracked_value_encoded = encoder.predict(cracked_value_pca)

# Predict using the trained CatBoost model
prediction = catboost_model.predict(cracked_value_encoded)


# Interpretation
if prediction[0] == 1:
  print("The model predicts a crack.")
else:
  print("The model predicts no crack.")

In [None]:
# prompt: print classification matrix with labels\

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

# ... (Your existing code) ...

# Assuming y_test and y_pred are defined as before


cm = confusion_matrix(y_test, y_pred)

# Define class labels (replace with your actual labels)
class_names = ['No Crack', 'Crack']


fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
ax.figure.colorbar(im, ax=ax)

# Show all ticks and label them with the respective list entries
ax.set(xticks=np.arange(cm.shape[1]),
       yticks=np.arange(cm.shape[0]),
       xticklabels=class_names, yticklabels=class_names,
       title='Confusion Matrix',
       ylabel='True label',
       xlabel='Predicted label')

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
fmt = 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.show()


# FRC

In [None]:
df1 = pd.read_excel('PPC CRACK DETECTION 2025.xlsx')

X = df.drop(columns=['FAILURE']).values
y = df['FAILURE'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

pca = PCA(n_components=0.95)

X_pca = pca.fit_transform(X)


In [None]:
initial_threshold = 0.7
Target_mean = df1['FAILURE'].mean()
Target_std = df1['FAILURE'].std()
print(f" At FAILURE Mean: {Target_mean}")
print(f"Conductance At FAILURE Deviation: {Target_std}")
adjusted_threshold = Target_mean + Target_std

y_binary = np.where(y > adjusted_threshold, 1, 0)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_binary, test_size=0.2, random_state=42)

In [None]:
input_dim = X_train.shape[1]
encoding_dim = 16

In [None]:
input_layer = Input(shape=(input_dim,))
x = Dense(64)(input_layer)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
x = Dropout(0.2)(x)
x = Dense(32)(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
encoded = Dense(encoding_dim, activation="linear")(x)  # Bottleneck layer

x = Dense(32)(encoded)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
x = Dense(64)(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
decoded = Dense(input_dim, activation="linear")(x)

In [None]:
autoencoder = Model(input_layer, decoded)
encoder = Model(input_layer, encoded)

In [None]:
autoencoder.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
autoencoder.fit(X_train, X_train, epochs=100, batch_size=16, shuffle=True, validation_data=(X_test, X_test))

In [None]:
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)

In [None]:
catboost_model = CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.03, loss_function='Logloss', verbose=100)
catboost_model.fit(X_train_encoded, y_train)

In [None]:
# prompt: classification table

from sklearn.metrics import classification_report, confusion_matrix

y_pred = catboost_model.predict(X_test_encoded)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


In [None]:
# prompt: roc curve

from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# ... (Your existing code) ...

y_pred_proba = catboost_model.predict_proba(X_test_encoded)[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


In [None]:

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Values', marker='o')
plt.plot(y_pred, label='Predicted Values', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Target Variable')
plt.title('Actual vs. Predicted Values')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# prompt: give random cracked value, predict crack or not from model

import numpy as np
# Assuming 'cracked_value' is a single data point or an array of data points
# that needs to be preprocessed (scaled and PCA-transformed) in the same way as the training data.

# Example:  A random cracked value with 11 features.
# Replace with your actual cracked value.
cracked_value = np.random.rand(1, 11)

# Preprocess the cracked value
cracked_value_scaled = scaler.transform(cracked_value)
cracked_value_pca = pca.transform(cracked_value_scaled)

# Encode using the trained encoder
cracked_value_encoded = encoder.predict(cracked_value_pca)

# Predict using the trained CatBoost model
prediction = catboost_model.predict(cracked_value_encoded)

# Interpretation
if prediction[0] == 1:
  print("The model predicts a crack.")
else:
  print("The model predicts no crack.")


In [None]:
# prompt: bar plot between crack and non cracked values

import matplotlib.pyplot as plt

# Assuming y_test and y_pred are already defined from your model's predictions
# ... (Your existing code) ...

# Count cracked and non-cracked instances in y_test and y_pred
cracked_actual = sum(y_test)
non_cracked_actual = len(y_test) - cracked_actual
cracked_predicted = sum(y_pred)
non_cracked_predicted = len(y_pred) - cracked_predicted

# Create the bar plot
categories = ['Cracked', 'Non-Cracked']
actual_values = [cracked_actual, non_cracked_actual]
predicted_values = [cracked_predicted, non_cracked_predicted]

x = range(len(categories))
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x, actual_values, width, label='Actual')
rects2 = ax.bar([i + width for i in x], predicted_values, width, label='Predicted')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Count')
ax.set_title('Actual vs. Predicted Crack Counts')
ax.set_xticks([i + width / 2 for i in x])
ax.set_xticklabels(categories)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)
fig.tight_layout()
plt.show()
