In [1]:
import pandas as pd

In [2]:
df_A = pd.read_csv(r"C:\Users\priya\results\df_A.csv")
df_B = pd.read_csv(r"C:\Users\priya\results\df_B.csv")
df_C = pd.read_csv(r"C:\Users\priya\results\df_C.csv")
df_D = pd.read_csv(r"C:\Users\priya\results\df_D.csv")
df_A['particle_type_human'] = 'A'
df_B['particle_type_human'] = 'B'
df_C['particle_type_human'] = 'C'
df_D['particle_type_human'] = 'D'

In [3]:
df_all = pd.concat([df_A, df_B, df_C, df_D], ignore_index=True)

In [4]:
def classify_particle_soft_majority(row):
    conditions_A = [
        0 <= row['Area'] < 1000,
        0 <= row['AverageThickness'] < 10,
        0 <= row['ConvexArea'] < 1000,
        0 <= row['Ellipse.Radius1'] < 20,
        0 <= row['GeodesicDiameter'] < 30,
        0 <= row['InscrDisc.Radius'] < 10,
        0 <= row['MaxFeretDiam'] < 30,
        0 <= row['OBox.Length'] < 40,
        0 <= row['OBox.Width'] < 30,
        0 <= row['Perimeter'] < 100
    ]

    conditions_B = [
        1000 <= row['Area'] < 2000,
        10 <= row['AverageThickness'] < 20,
        1000 <= row['ConvexArea'] < 2500,
        20 <= row['Ellipse.Radius1'] < 30,
        30 <= row['GeodesicDiameter'] < 70,
        10 <= row['InscrDisc.Radius'] < 15,
        30 <= row['MaxFeretDiam'] < 60,
        40 <= row['OBox.Length'] < 60,
        30 <= row['OBox.Width'] < 40,
        100 <= row['Perimeter'] < 200
    ]

    conditions_C = [
        2000 <= row['Area'] < 6000,
        20 <= row['AverageThickness'] < 40,
        2500 <= row['ConvexArea'] < 5000,
        30 <= row['Ellipse.Radius1'] < 40,
        70 <= row['GeodesicDiameter'] < 100,
        15 <= row['InscrDisc.Radius'] < 30,
        60 <= row['MaxFeretDiam'] < 100,
        60 <= row['OBox.Length'] < 120,
        40 <= row['OBox.Width'] < 70,
        200 <= row['Perimeter'] < 300
    ]

    conditions_D = [
        row['Area'] >= 6000,
        row['Perimeter'] >= 300
    ]

    # Count how many conditions are True per group
    scores = {
        'A': sum(conditions_A),
        'B': sum(conditions_B),
        'C': sum(conditions_C),
        'D': sum(conditions_D)
    }

    # Assign class with ≥ 5 matching conditions
    for cls, score in scores.items():
        if score >= 5:
            return cls

    return 'Unclassified'


In [5]:
def is_class_D(row):
    conditions_D = [
        row['Area'] >= 6000,
        row['Perimeter'] >= 300
    ]

    return int(sum(conditions_D) >= 1)  # 1 = Class D, 0 = Not D


In [6]:
df_all['logic_class'] = df_all.apply(classify_particle_soft_majority, axis=1)
df_all['is_D_predicted'] = df_all.apply(is_class_D, axis=1)


In [10]:
# Count how many logic classifications match human labels
total_correct = (df_all['logic_class'] == df_all['particle_type_human']).sum()

# Compute accuracy
accuracy = total_correct / len(df_all)

print("Logic classification accuracy:", round(accuracy, 4))


Logic classification accuracy: 0.429


In [8]:
df_all

Unnamed: 0,Label,PixelCount,Area,Perimeter,Circularity,EulerNumber,Box.X.Min,Box.X.Max,Box.Y.Min,Box.Y.Max,...,Tortuosity,InscrDisc.Center.X,InscrDisc.Center.Y,InscrDisc.Radius,AverageThickness,GeodesicElongation,Solidity,particle_type_human,logic_class,is_D_predicted
0,377,375.0,375,98.907,0.482,1,122,149,453,486,...,0.997,132,462,7,7.270,2.787,0.866051,A,A,0
1,370,281.0,281,89.427,0.442,1,97,128,440,458,...,1.025,110,445,6,5.638,2.735,0.753351,A,A,0
2,289,805.0,805,177.378,0.322,1,64,110,314,349,...,1.017,88,329,10,8.062,2.341,0.746753,A,B,0
3,314,336.0,336,93.869,0.479,1,98,119,353,386,...,1.014,106,359,7,6.837,2.544,0.724138,A,A,0
4,324,572.0,572,114.441,0.549,1,138,172,374,402,...,0.990,151,386,10,9.677,1.841,0.856287,A,A,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,2,780.0,780,104.500,0.898,1,185,216,5,41,...,1.024,200,21,15,17.464,1.267,0.972569,D,B,0
327,4,599.0,599,101.589,0.729,0,229,257,32,64,...,0.998,243,46,12,12.593,1.442,0.922958,D,A,0
328,17,15048.0,15048,588.635,0.546,-3,123,289,62,223,...,0.999,182,169,49,39.339,1.943,0.887735,D,Unclassified,0
329,52,6254.0,6254,313.366,0.800,-5,270,361,237,326,...,0.984,316,282,42,27.466,1.126,0.978870,D,Unclassified,0


In [9]:
df_all['is_D_actual'] = (df_all['particle_type_human'] == 'D').astype(int)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

accuracy = accuracy_score(df_all['is_D_actual'], df_all['is_D_predicted'])
print(f"✅ Accuracy of D vs not-D logic: {accuracy:.2f}")

print("\n📊 Confusion Matrix:")
print(confusion_matrix(df_all['is_D_actual'], df_all['is_D_predicted']))

print("\n📝 Classification Report:")
print(classification_report(df_all['is_D_actual'], df_all['is_D_predicted'], target_names=['Not D', 'D']))

✅ Accuracy of D vs not-D logic: 0.94

📊 Confusion Matrix:
[[310   0]
 [ 21   0]]

📝 Classification Report:
              precision    recall  f1-score   support

       Not D       0.94      1.00      0.97       310
           D       0.00      0.00      0.00        21

    accuracy                           0.94       331
   macro avg       0.47      0.50      0.48       331
weighted avg       0.88      0.94      0.91       331



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
