In [1]:
import pandas as pd
from sklearn.utils import resample
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, confusion_matrix

In [7]:
df = pd.read_excel(r'C:\Users\pawvaibh\Downloads\With_ProductId.xlsx')

In [8]:
print(df['CategoryName'].value_counts())

CategoryName
Instrument        5501
Power System       943
Rigid Scope        845
Flexible Scope     274
Video              103
Ultrasound          74
Ophthalmic          24
Endocam             13
Name: count, dtype: int64


In [10]:

instrument_df = df[df['CategoryName'] == 'Instrument']
instrument_downsampled = resample(instrument_df, replace=False, n_samples=3500, random_state=42)

video_df = df[df['CategoryName'] == 'Video']
video_upsampled = resample(video_df, replace=True, n_samples=200, random_state=42)

ultrasound_df = df[df['CategoryName'] == 'Ultrasound']
ultrasound_upsampled = resample(ultrasound_df, replace=True, n_samples=200, random_state=42)

ophthalmic_df = df[df['CategoryName'] == 'Ophthalmic']
ophthalmic_upsampled = resample(ophthalmic_df, replace=True, n_samples=200, random_state=42)

endocam_df = df[df['CategoryName'] == 'Endocam']
endocam_upsampled = resample(endocam_df, replace=True, n_samples=200, random_state=42)

# Concatenate the upsampled/downsampled categories with the original dataframe
df = pd.concat([instrument_downsampled, df[df['CategoryName'] != 'Instrument'], video_upsampled, ultrasound_upsampled, ophthalmic_upsampled, endocam_upsampled])

# Verify the counts after upsampling
print(df['CategoryName'].value_counts())

CategoryName
Instrument        3500
Power System       943
Rigid Scope        845
Video              303
Flexible Scope     274
Ultrasound         274
Ophthalmic         224
Endocam            213
Name: count, dtype: int64


In [19]:
df = pd.read_excel(r'C:\Users\pawvaibh\Downloads\New_2.xlsx')

In [20]:
print(df['CategoryName'].value_counts())

CategoryName
Instrument        3000
Power System       799
Rigid Scope        699
Video              245
Ultrasound         221
Flexible Scope     219
Ophthalmic         171
Endocam            171
Name: count, dtype: int64


In [21]:
# Combine categories except "Instrument" into one category called "Others"
def combine_categories(df):
    df.loc[~df['CategoryName'].isin(['Instrument']), 'CategoryName'] = 'Others'
    return df

def segregate(df):
    X = df.drop(columns=['CategoryName'])
    y = df['CategoryName']
    return X, y
    
# Train and evaluate RandomForestClassifier
def train_and_evaluate_model(X, y):
    model = RandomForestClassifier(random_state=42)
    
    model.fit(X, y)
    
    # Predict on the training and testing data
    predictions = model.predict(X)
    
    return model

# Preprocess the data
df1 = df.copy()
df2 = df.copy()
df3 = df.copy()

# Model 1: Classify Instrument vs. Others
df1 = combine_categories(df1)

X1, y1 = segregate(df1)

model_first = train_and_evaluate_model(X1, y1)

test_df = pd.read_excel(r'C:\Users\pawvaibh\Downloads\TestCases_Sampled - Copy.xlsx')

# Iterate over each row in the test data
for index, row in test_df.iterrows():
    # Extract the input features from the row
    new_input = row[['SterilizationMethodId', 'MachineTypeId', 'ItemTypeId', 'SpecialtyId']].values.reshape(1, -1)
    # Make predictions using the trained models
    predicted_category = model_first.predict(new_input)

    if predicted_category == 'Others':
        # Model 2: Classify Power System, Rigid Scope vs. Others
        df2 = df2[~df2['CategoryName'].isin(['Instrument'])]
        df2.loc[~df2['CategoryName'].isin(['Power System', 'Rigid Scope']), 'CategoryName'] = 'Others'

        X2, y2 = segregate(df2)

        model_sec = train_and_evaluate_model(X2, y2)
    
        predicted_category = model_sec.predict(new_input)
    
        if predicted_category == 'Others':
            # Model 3: Classify Flexible Scope, Video Vs. Others
            df3 = df3[df3['CategoryName'].isin(['Video', 'Flexible Scope', 'Ultrasound', 'Opthamlmic', 'Endocam'])]

            X3, y3 = segregate(df3)
            
            model_third = train_and_evaluate_model(X3, y3)
        
            predicted_category = model_third.predict(new_input)
            
            test_df.at[index, 'Predicted_Values'] = predicted_category

        else:
            test_df.at[index, 'Predicted_Values'] = predicted_category
    else:
        test_df.at[index, 'Predicted_Values'] = predicted_category

# Save the updated DataFrame back to the Excel file
test_df.to_excel(r'C:\Users\pawvaibh\Downloads\Predicted_Output_3.xlsx', index=False)

  test_df.at[index, 'Predicted_Values'] = predicted_category


In [22]:

# Read the predicted values and true values from the Excel file
val_df = pd.read_excel(r'C:\Users\pawvaibh\Downloads\Predicted_Output_3.xlsx')

# Extract true and predicted values
true_values = val_df['True_Vaues']
predicted_values = val_df['Predicted_Values']

# Get unique classes from both true and predicted values
unique_classes_true = true_values.unique()
unique_classes_predicted = predicted_values.unique()

# Calculate evaluation metrics
accuracy = accuracy_score(true_values, predicted_values)
precision = precision_score(true_values, predicted_values, average='weighted')
recall = recall_score(true_values, predicted_values, average='weighted')
f1 = f1_score(true_values, predicted_values, average='weighted')
conf_matrix = confusion_matrix(true_values, predicted_values)

# Print evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Print the unique classes
print("Unique classes in true labels:", unique_classes_true)
print("Unique classes in predicted labels:" , unique_classes_predicted)

print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.5984776403425309
Precision: 0.5450480123395642
Recall: 0.5984776403425309
F1 Score: 0.5560282762474162
Unique classes in true labels: ['Instrument' 'Power System' 'Rigid Scope' 'Video' 'Flexible Scope'
 'Ultrasound' 'Ophthalmic' 'Endocam']
Unique classes in predicted labels: ['Instrument' 'Flexible Scope' 'Rigid Scope' 'Power System' 'Video'
 'Ultrasound' 'Endocam']
Confusion Matrix:
[[ 31   0   9   0   0   0   0   2]
 [  1   6  23   0   9  12   3   1]
 [  0  10 448   0  22  11   4   5]
 [  0  16  21   0   0   0   4  12]
 [  2   1  77   0  30  20   3  11]
 [  6   6  32   0   7  45  12  38]
 [  4   1   3   0   0   2  38   5]
 [  4   2  14   0   0   4   3  31]]


  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
# Generate classification report
report = classification_report(true_values, predicted_values)

# Print classification report
print(report)

                precision    recall  f1-score   support

       Endocam       0.65      0.74      0.69        42
Flexible Scope       0.14      0.11      0.12        55
    Instrument       0.71      0.90      0.80       500
    Ophthalmic       0.00      0.00      0.00        53
  Power System       0.44      0.21      0.28       144
   Rigid Scope       0.48      0.31      0.38       146
    Ultrasound       0.57      0.72      0.63        53
         Video       0.30      0.53      0.38        58

      accuracy                           0.60      1051
     macro avg       0.41      0.44      0.41      1051
  weighted avg       0.55      0.60      0.56      1051



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
