In [33]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import eli5
import warnings
warnings.filterwarnings('ignore')

In [34]:
df = pd.read_excel("data.xlsx")

In [35]:
df.head()

Unnamed: 0,S. No,Date,Time Interval (s),Solid Content,Slurry Bulk Density,Slurry Viscosity,Slurry Temperature,Degassing Time\n(Vacuum),Room Temperature,Humidity of Room,...,Bubbles,Inclusions & Spot,Blisters,Abrasion mark,Scratch,Chip,Ridges,Crack,Line mark,Wave mark
0,1,2023-07-10 00:00:00,11:12:30,60.52,1.7,675.0,25,15.0,22.3,60.5,...,,,,,,,,,,
1,2,,11:13:00,60.52,1.7,675.0,25,15.0,22.3,60.5,...,,,,,,,,,1.0,
2,3,,11:13:30,60.52,1.7,675.0,25,15.0,22.3,60.5,...,3.0,,,,,,,,,1.0
3,4,,11:14:00,60.52,1.7,675.0,25,15.0,22.3,60.5,...,,,,,,,,,,
4,5,,11:14:30,60.52,1.7,675.0,25,15.0,22.3,60.5,...,,,,,,,,,1.0,


In [36]:
df.keys()

Index(['S. No', 'Date', 'Time Interval (s)', 'Solid Content',
       'Slurry Bulk Density', 'Slurry Viscosity', 'Slurry Temperature',
       'Degassing Time\n(Vacuum)', 'Room Temperature', 'Humidity of Room',
       'Air Flow', '% Exhaust Open', 'Blade Gap', 'Casting Speed',
       'Casting Width', 'Slurry Height in casting Vessel',
       'Zone -1 Temperature', 'Zone -2 Temperature', 'Zone -3 Temperature',
       'Zone -4 Temperature', 'Weight of De-winding roll',
       'Weight of winding roll', 'LASER reading of Wet Tape Thickness',
       'Dried Green Tape Thickness', 'Unnamed: 24', 'Unnamed: 25',
       'Unnamed: 26', 'Thickness - average', 'Thickness Variation',
       'Thick & Thin Variation', 'Defect \n(0-4)', 'GBD of Tape',
       'Solid Content of Tape', 'Burr', 'Pits', 'Bubbles', 'Inclusions & Spot',
       'Blisters', 'Abrasion mark', 'Scratch', 'Chip', 'Ridges', 'Crack',
       'Line mark', 'Wave mark'],
      dtype='object')

In [37]:
x_cols = ['Solid Content','Slurry Bulk Density', 'Slurry Viscosity', 'Slurry Temperature','Degassing Time\n(Vacuum)', 'Room Temperature', 'Humidity of Room','Air Flow', '% Exhaust Open', 'Blade Gap', 'Casting Speed','Casting Width', 'Slurry Height in casting Vessel','Zone -1 Temperature', 'Zone -2 Temperature', 'Zone -3 Temperature','Zone -4 Temperature', 'Weight of De-winding roll','Weight of winding roll']
x = df[x_cols]

# BUBBLES

In [38]:
y_cols = ['Bubbles']
y = df[y_cols].notnull().astype('int')
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_smote = sc.fit_transform(X_train_smote)

In [39]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(random_state=16, max_iter=10000)
logreg.fit(X_train_smote, y_train_smote)

In [40]:
print("Test R^2 score: ", logreg.score(X_test, y_test))
print("Train R^2 score: ", logreg.score(X_train_smote, y_train_smote))

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predictions
y_pred = logreg.predict(X_test)

# Precision
precision = precision_score(y_test, y_pred)
print("precision:", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("recall:", recall)

# F1-score
f1 = f1_score(y_test, y_pred)
print("f1 score:", f1)

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_pred)
print("roc auc score:", roc_auc)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("confusion matrix", conf_matrix)

Test R^2 score:  0.6379310344827587
Train R^2 score:  0.7522935779816514
precision: 0.0
recall: 0.0
f1 score: 0.0
roc auc score: 0.5
confusion matrix [[37  0]
 [21  0]]


In [41]:
bubbles = eli5.show_weights(logreg , feature_names = x_cols)
bubbles

Weight?,Feature
0.647,Zone -1 Temperature
0.355,Room Temperature
0.202,Humidity of Room
0.151,Weight of winding roll
0.14,Blade Gap
0.135,Slurry Bulk Density
0.058,Air Flow
0.053,Slurry Viscosity
0.049,Slurry Temperature
0.019,Casting Width


# PITS

In [42]:
y_cols = ['Pits']
y = df[y_cols].notnull().astype('int')
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
sc = StandardScaler()
X_train_smote = sc.fit_transform(X_train_smote)

In [43]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(random_state=16, max_iter=10000)
logreg.fit(X_train_smote, y_train_smote)

In [44]:
print("Test R^2 score: ", logreg.score(X_test, y_test))
print("Train R^2 score: ", logreg.score(X_train_smote, y_train_smote))

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predictions
y_pred = logreg.predict(X_test)

# Precision
precision = precision_score(y_test, y_pred)
print("precision:", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("recall:", recall)

# F1-score
f1 = f1_score(y_test, y_pred)
print("f1 score:", f1_score)

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_pred)
print("roc auc score:", roc_auc)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("confusion matrix", conf_matrix)

Test R^2 score:  0.7413793103448276
Train R^2 score:  0.8191489361702128
precision: 0.0
recall: 0.0
f1 score: <function f1_score at 0x147a59ea0>
roc auc score: 0.5
confusion matrix [[43  0]
 [15  0]]


In [45]:
pits = eli5.show_weights(logreg , feature_names = x_cols)

In [46]:
pits

Weight?,Feature
1.238,Slurry Height in casting Vessel
1.209,Slurry Bulk Density
0.897,Room Temperature
0.526,Weight of winding roll
0.473,Slurry Viscosity
0.416,Zone -1 Temperature
0.334,Zone -2 Temperature
0.316,Zone -3 Temperature
0.293,Zone -4 Temperature
0.237,Air Flow


# Inclusion and spots

In [47]:
y_cols = ['Inclusions & Spot']
y = df[y_cols].notnull().astype('int')
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
sc = StandardScaler()
X_train_smote = sc.fit_transform(X_train_smote)

In [48]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(random_state=16, max_iter=10000)
logreg.fit(X_train_smote, y_train_smote)

In [49]:
print("Test R^2 score: ", logreg.score(X_test, y_test))
print("Train R^2 score: ", logreg.score(X_train_smote, y_train_smote))

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predictions
y_pred = logreg.predict(X_test)

# Precision
precision = precision_score(y_test, y_pred)
print("precision:", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("recall:", recall)

# F1-score
f1 = f1_score(y_test, y_pred)
print("f1 score:", f1_score)

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_pred)
print("roc auc score:", roc_auc)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("confusion matrix", conf_matrix)

Test R^2 score:  0.8620689655172413
Train R^2 score:  0.9589552238805971
precision: 0.8620689655172413
recall: 1.0
f1 score: <function f1_score at 0x147a59ea0>
roc auc score: 0.5
confusion matrix [[ 0  8]
 [ 0 50]]


In [50]:
inclusionAndSpots = eli5.show_weights(logreg , feature_names = x_cols)

In [51]:
inclusionAndSpots

Weight?,Feature
1.51,Room Temperature
1.441,Weight of winding roll
1.044,<BIAS>
0.917,Weight of De-winding roll
0.908,Slurry Bulk Density
0.498,Slurry Temperature
0.432,Zone -1 Temperature
0.305,Casting Speed
0.259,Zone -2 Temperature
0.149,Blade Gap


# Line Marks

In [52]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(random_state=16, max_iter=10000)
logreg.fit(X_train_smote, y_train_smote)

In [53]:
print("Test R^2 score: ", logreg.score(X_test, y_test))
print("Train R^2 score: ", logreg.score(X_train_smote, y_train_smote))

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predictions
y_pred = logreg.predict(X_test)

# Precision
precision = precision_score(y_test, y_pred)
print("precision:", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("recall:", recall)

# F1-score
f1 = f1_score(y_test, y_pred)
print("f1 score:", f1_score)

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_pred)
print("roc auc score:", roc_auc)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("confusion matrix", conf_matrix)

Test R^2 score:  0.8620689655172413
Train R^2 score:  0.9589552238805971
precision: 0.8620689655172413
recall: 1.0
f1 score: <function f1_score at 0x147a59ea0>
roc auc score: 0.5
confusion matrix [[ 0  8]
 [ 0 50]]


In [None]:
Linemarks= eli5.show_weights(logreg , feature_names = x_cols)
Linemarks

# Wave Marks

In [23]:
y_cols = ['Wave mark']
y = df[y_cols].notnull().astype('int')
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
sc = StandardScaler()
X_train_smote = sc.fit_transform(X_train_smote)

In [24]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(random_state=16, max_iter=10000)
logreg.fit(X_train_smote, y_train_smote)

  y = column_or_1d(y, warn=True)


In [25]:
print("Test R^2 score: ", logreg.score(X_test, y_test))
print("Train R^2 score: ", logreg.score(X_train_smote, y_train_smote))

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predictions
y_pred = logreg.predict(X_test)

# Precision
precision = precision_score(y_test, y_pred)
print("precision:", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("recall:", recall)

# F1-score
f1 = f1_score(y_test, y_pred)
print("f1 score:", f1_score)

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_pred)
print("roc auc score:", roc_auc)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("confusion matrix", conf_matrix)

Test R^2 score:  0.5862068965517241
Train R^2 score:  0.8076923076923077
precision: 0.5862068965517241
recall: 1.0
f1 score: <function f1_score at 0x147a59ea0>
roc auc score: 0.5
confusion matrix [[ 0 24]
 [ 0 34]]




In [32]:
WaveMarks= eli5.show_weights(logreg , feature_names = x_cols)
WaveMarks


Weight?,Feature
2.012,Slurry Viscosity
1.348,Weight of winding roll
1.219,Degassing Time (Vacuum)
1.128,Slurry Bulk Density
0.795,Room Temperature
0.629,Weight of De-winding roll
0.144,Slurry Height in casting Vessel
0.114,Casting Width
0.031,Zone -2 Temperature
-0.019,Zone -1 Temperature


# Ridges

In [27]:
y_cols = ['Ridges']
y = df[y_cols].notnull().astype('int')
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
sc = StandardScaler()
X_train_smote = sc.fit_transform(X_train_smote)

In [28]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(random_state=16, max_iter=10000)
logreg.fit(X_train_smote, y_train_smote)

  y = column_or_1d(y, warn=True)


In [29]:
print("Test R^2 score: ", logreg.score(X_test, y_test))
print("Train R^2 score: ", logreg.score(X_train_smote, y_train_smote))

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Predictions
y_pred = logreg.predict(X_test)

# Precision
precision = precision_score(y_test, y_pred)
print("precision:", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("recall:", recall)

# F1-score
f1 = f1_score(y_test, y_pred)
print("f1 score:", f1_score)

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_pred)
print("roc auc score:", roc_auc)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("confusion matrix", conf_matrix)

Test R^2 score:  0.29310344827586204
Train R^2 score:  0.8561643835616438
precision: 0.29310344827586204
recall: 1.0
f1 score: <function f1_score at 0x147a59ea0>
roc auc score: 0.5
confusion matrix [[ 0 41]
 [ 0 17]]




In [30]:
Ridges= eli5.show_weights(logreg , feature_names = x_cols)
Ridges


Weight?,Feature
2.012,Slurry Viscosity
1.348,Weight of winding roll
1.219,Degassing Time (Vacuum)
1.128,Slurry Bulk Density
0.795,Room Temperature
0.629,Weight of De-winding roll
0.144,Slurry Height in casting Vessel
0.114,Casting Width
0.031,Zone -2 Temperature
-0.019,Zone -1 Temperature


In [31]:
x_cols

['Solid Content',
 'Slurry Bulk Density',
 'Slurry Viscosity',
 'Slurry Temperature',
 'Degassing Time\n(Vacuum)',
 'Room Temperature',
 'Humidity of Room',
 'Air Flow',
 '% Exhaust Open',
 'Blade Gap',
 'Casting Speed',
 'Casting Width',
 'Slurry Height in casting Vessel',
 'Zone -1 Temperature',
 'Zone -2 Temperature',
 'Zone -3 Temperature',
 'Zone -4 Temperature',
 'Weight of De-winding roll',
 'Weight of winding roll']