In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, round, concat_ws, lit, regexp_extract
from pyspark.sql.types import DoubleType, StringType
from pyspark.ml.feature import MinMaxScaler, VectorAssembler
from pyspark.ml import Pipeline
from pyspark.sql.functions import col, round, to_timestamp
from pyspark.sql.types import DoubleType

In [2]:
# Initializing Spark Session
spark = SparkSession.builder \
    .appName("DataPreprocessing") \
    .getOrCreate()

24/11/24 20:03:21 WARN Utils: Your hostname, gmtejar resolves to a loopback address: 127.0.1.1; using 192.168.1.63 instead (on interface wlo1)
24/11/24 20:03:21 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/11/24 20:03:21 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/11/24 20:03:22 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
24/11/24 20:03:22 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.


In [3]:
# Load the dataset
file_path = "data/dataset.csv"
data_df = spark.read.csv(file_path, header=True, inferSchema=True)

# Step 1: Rename columns
For better understanding, we are going to rename the column names.

In [4]:
renamed_columns = {
    "Num": "Number",
    "Timestamp": "Time_Stamp",
    "C0": "Current_Joint0 (mA)",
    "T0": "Temperature_Joint0 (°C)",
    "C1": "Current_Joint1 (mA)",
    "T1": "Temperature_Joint1 (°C)",
    "C2": "Current_Joint2 (mA)",
    "T2": "Temperature_Joint2 (°C)",
    "C3": "Current_Joint3 (mA)",
    "T3": "Temperature_Joint3 (°C)",
    "C4": "Current_Joint4 (mA)",
    "T4": "Temperature_Joint4 (°C)",
    "C5": "Current_Joint5 (mA)",
    "T5": "Temperature_Joint5 (°C)",
    "S0": "Speed_Joint0 (m/s)",
    "S1": "Speed_Joint1 (m/s)",
    "S2": "Speed_Joint2 (m/s)",
    "S3": "Speed_Joint3 (m/s)",
    "S4": "Speed_Joint4 (m/s)",
    "S5": "Speed_Joint5 (m/s)",
    "Tool_current": "Tool_Current (mA)",
    "cycle": "Cycle",
    "Robot_ProtectiveStop": "Robot_Protective_Stop",
    "grip_lost": "Grip_Lost"
}
data_df = data_df.select([col(c).alias(renamed_columns.get(c, c)) for c in data_df.columns])

# Step 2: Data Normalization
In this step, we are removing the units and rounding up the data to numerical values for better use.

In [5]:
columns_with_units = [
    'Current_Joint0 (mA)', 'Current_Joint1 (mA)', 'Current_Joint2 (mA)', 'Current_Joint3 (mA)',
    'Current_Joint4 (mA)', 'Current_Joint5 (mA)', 'Temperature_Joint0 (°C)', 'Temperature_Joint1 (°C)',
    'Temperature_Joint2 (°C)', 'Temperature_Joint3 (°C)','Temperature_Joint4 (°C)','Temperature_Joint5 (°C)',
    'Speed_Joint0 (m/s)', 'Speed_Joint1 (m/s)','Speed_Joint2 (m/s)', 'Speed_Joint3 (m/s)',
    'Speed_Joint4 (m/s)', 'Speed_Joint5 (m/s)', 'Tool_Current (mA)'
]
for column in columns_with_units:
    data_df = data_df.withColumn(
        column,regexp_extract(col(column), r'([-+]?[0-9]*\.?[0-9]+)', 0).cast("double")
    )

# Step 3: Converting result Boolean columns to Binary
For the result column (Grip_Lost), instead of boolean value, we have replaced it with Binary values (0/1)

In [6]:
data_df = data_df.withColumn(
    "Grip_Lost",
    when(col("Grip_Lost").isin("TRUE", "T", "t", "true"), 1).otherwise(0)
)

# Step 4: : Basic Data Cleaning Steps using RDD

We have removed the unwanted columns

In [7]:
from pyspark.sql.window import Window
from pyspark.sql.functions import avg, col
from pyspark.sql.types import TimestampType, DoubleType

# Remove Unwanted Columns
columns_to_keep = [col_name for col_name in data_df.columns if col_name != "Number"]
cleaned_rdd = data_df.rdd.map(lambda row: [row[c] for c in columns_to_keep])
data_df = cleaned_rdd.toDF(columns_to_keep)

                                                                                

# Step 5: Handle Missing Values and Duplicate values

In [8]:
data_df = data_df.na.drop()
distinct_rdd = data_df.rdd.distinct()
data_df = distinct_rdd.toDF(data_df.columns)

                                                                                

# Step6: Formatting the timestamp and rounding up the decimal values

In [9]:
data_df = data_df.withColumn("Time_Stamp", to_timestamp(col("Time_Stamp"), "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"))
columns = [
    'Current_Joint0 (mA)', 'Current_Joint1 (mA)', 'Current_Joint2 (mA)', 'Current_Joint3 (mA)',
    'Current_Joint4 (mA)', 'Current_Joint5 (mA)', 'Temperature_Joint0 (°C)', 'Temperature_Joint1 (°C)',
    'Temperature_Joint2 (°C)', 'Temperature_Joint3 (°C)', 'Speed_Joint0 (m/s)', 'Speed_Joint1 (m/s)',
    'Speed_Joint2 (m/s)', 'Speed_Joint3 (m/s)', 'Speed_Joint4 (m/s)', 'Speed_Joint5 (m/s)', 'Tool_Current (mA)'
]
# Convert other columns to double
for col_name in columns:
    data_df = data_df.withColumn(col_name, col(col_name).cast(DoubleType()))
    data_df = data_df.withColumn(col_name, round(col(col_name), 3))

data_df.toPandas()

Unnamed: 0,Time_Stamp,Current_Joint0 (mA),Temperature_Joint0 (°C),Current_Joint1 (mA),Temperature_Joint1 (°C),Current_Joint2 (mA),Temperature_Joint2 (°C),Current_Joint3 (mA),Temperature_Joint3 (°C),Current_Joint4 (mA),...,Speed_Joint0 (m/s),Speed_Joint1 (m/s),Speed_Joint2 (m/s),Speed_Joint3 (m/s),Speed_Joint4 (m/s),Speed_Joint5 (m/s),Tool_Current (mA),cycle,Robot_Protective_Stop,Grip_Lost
0,2022-10-26 08:17:21.847,0.110,27.875,-2.025,29.375,-1.531,29.375,-0.999,32.125,-0.063,...,0.296,0.000,0.001,-0.133,-0.007,-0.153,0.083,1,0,0
1,2022-10-26 08:17:22.852,0.596,27.875,-2.278,29.313,-0.867,29.438,-0.206,32.188,-1.063,...,-7.391,0.000,0.002,0.002,-0.001,0.000,0.506,1,0,0
2,2022-10-26 08:17:23.857,-0.229,27.875,-2.800,29.313,-2.304,29.438,-0.351,32.125,-0.669,...,0.137,0.008,-2.536,0.380,0.000,-0.497,0.079,1,0,0
3,2022-10-26 08:17:24.863,0.065,27.875,-3.688,29.313,-1.218,29.438,-1.209,32.125,-0.820,...,-0.090,-0.005,-0.009,-0.384,0.018,0.426,0.083,1,0,0
4,2022-10-26 08:17:25.877,0.884,27.875,-2.939,29.375,-1.794,29.438,-2.356,32.188,-0.966,...,0.127,0.006,0.001,-0.353,0.015,0.181,0.086,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7203,2022-10-26 15:36:02.555,-0.109,37.188,-2.262,40.313,-1.083,40.688,-0.495,43.375,-0.019,...,0.000,0.000,0.000,0.000,0.000,0.000,0.080,264,0,0
7204,2022-10-26 15:36:03.562,-0.099,37.188,-2.252,40.313,-1.094,40.688,-0.516,43.375,-0.009,...,0.000,0.000,0.000,0.000,0.000,0.000,0.079,264,0,0
7205,2022-10-26 15:36:04.571,-0.121,37.188,-2.281,40.250,-1.121,40.688,-0.502,43.375,-0.003,...,0.000,0.000,0.000,0.000,0.000,0.000,0.091,264,0,0
7206,2022-10-26 15:36:05.572,-0.129,37.188,-2.269,40.313,-1.092,40.625,-0.491,43.375,-0.001,...,0.000,0.000,0.000,0.000,0.000,0.000,0.090,264,0,0


# Step 7: Combine and Decompose columns

In [10]:
# Combine Robot_Protective_Stop and Grip_Lost columns into a single string column
data_df = data_df.withColumn(
    "Combined_Status",
    concat_ws("_", col("Robot_Protective_Stop").cast(StringType()), col("Grip_Lost").cast(StringType()))
)
data_df = data_df.withColumn(
    "Combined_Status",
    when(col("Combined_Status") == "0_0", 0)
    .when(col("Combined_Status") == "1_0", 1)
    .when(col("Combined_Status") == "0_1", 2)
    .when(col("Combined_Status") == "1_1", 3)
    .otherwise(4)  # For unexpected values
)
# Print unique values in Combined_Status
data_df.select("Combined_Status").distinct().show()

+---------------+
|Combined_Status|
+---------------+
|              1|
|              3|
|              2|
|              0|
+---------------+



In [11]:
for col_name in [
    "Current_Joint0 (mA)", "Current_Joint1 (mA)", "Current_Joint2 (mA)", "Current_Joint3 (mA)",
    "Current_Joint4 (mA)", "Current_Joint5 (mA)", "Tool_Current (mA)" ,"Speed_Joint0 (m/s)", "Speed_Joint1 (m/s)",
    "Speed_Joint2 (m/s)", "Speed_Joint3 (m/s)", "Speed_Joint4 (m/s)", "Speed_Joint5 (m/s)"
]:
    data_df = data_df.withColumn(f"{col_name}_Positive", when(col(col_name) > 0, col(col_name)).otherwise(0))
    data_df = data_df.withColumn(f"{col_name}_Negative", when(col(col_name) < 0, -col(col_name)).otherwise(0))
    data_df = data_df.drop(col_name)

## Removing Outliers

In [12]:
from pyspark.sql.functions import col, expr
lower_percentile = 0.025
upper_percentile = 0.975
numeric_columns = [field.name for field in data_df.schema.fields if field.dataType.typeName() in ("int", "double")]
quantile_bounds = {}
for column in numeric_columns:
    bounds = data_df.approxQuantile(column, [lower_percentile, upper_percentile], 0.001)
    quantile_bounds[column] = bounds
    
def is_within_bounds(row):
    for col in numeric_columns:
        lower, upper = quantile_bounds[col]
        if not (lower <= row[col] <= upper):
            return False
    return True

filtered_rdd = data_df.rdd.filter(is_within_bounds)
data_df = spark.createDataFrame(filtered_rdd, data_df.schema)

In [13]:
data_df.toPandas()

24/11/24 20:03:36 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


Unnamed: 0,Time_Stamp,Temperature_Joint0 (°C),Temperature_Joint1 (°C),Temperature_Joint2 (°C),Temperature_Joint3 (°C),Temperature_Joint4 (°C),Temperature_Joint5 (°C),cycle,Robot_Protective_Stop,Grip_Lost,...,Speed_Joint1 (m/s)_Positive,Speed_Joint1 (m/s)_Negative,Speed_Joint2 (m/s)_Positive,Speed_Joint2 (m/s)_Negative,Speed_Joint3 (m/s)_Positive,Speed_Joint3 (m/s)_Negative,Speed_Joint4 (m/s)_Positive,Speed_Joint4 (m/s)_Negative,Speed_Joint5 (m/s)_Positive,Speed_Joint5 (m/s)_Negative
0,2022-10-26 08:21:37.159,28.688,30.375,30.563,33.438,33.8750,33.5000,9,0,0,...,0.000,0.045,0.057,0.000,0.000,0.315,0.017,0.000,0.252,0.000
1,2022-10-26 08:21:41.185,28.688,30.375,30.563,33.500,33.8750,33.5000,9,0,0,...,0.003,0.000,0.000,0.021,0.296,0.000,0.000,0.010,0.000,0.150
2,2022-10-26 08:21:45.197,28.688,30.375,30.563,33.500,33.9375,33.5000,9,0,0,...,0.138,0.000,0.000,2.556,0.391,0.000,0.000,0.000,0.000,0.491
3,2022-10-26 08:21:47.205,28.750,30.375,30.563,33.500,33.9375,33.5000,9,0,0,...,0.000,0.062,0.077,0.000,0.000,0.784,0.033,0.000,0.462,0.000
4,2022-10-26 08:21:49.210,28.688,30.438,30.563,33.563,33.9375,33.5625,9,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,4.004,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3784,2022-10-26 15:36:02.555,37.188,40.313,40.688,43.375,45.2500,44.5625,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3785,2022-10-26 15:36:03.562,37.188,40.313,40.688,43.375,45.2500,44.5000,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3786,2022-10-26 15:36:04.571,37.188,40.250,40.688,43.375,45.2500,44.5000,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3787,2022-10-26 15:36:05.572,37.188,40.313,40.625,43.375,45.2500,44.5625,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [14]:
data_df.toPandas().to_csv("cleaned_data.csv",index=None)

In [15]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import LogisticRegression, DecisionTreeClassifier, RandomForestClassifier, LinearSVC, NaiveBayes, GBTClassifier,OneVsRest
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.mllib.evaluation import MulticlassMetrics
import matplotlib.pyplot as plt
import time

In [16]:
df = spark.read.csv("cleaned_data.csv", header=True, inferSchema=True)

In [17]:
df.toPandas()

Unnamed: 0,Time_Stamp,Temperature_Joint0 (°C),Temperature_Joint1 (°C),Temperature_Joint2 (°C),Temperature_Joint3 (°C),Temperature_Joint4 (°C),Temperature_Joint5 (°C),cycle,Robot_Protective_Stop,Grip_Lost,...,Speed_Joint1 (m/s)_Positive,Speed_Joint1 (m/s)_Negative,Speed_Joint2 (m/s)_Positive,Speed_Joint2 (m/s)_Negative,Speed_Joint3 (m/s)_Positive,Speed_Joint3 (m/s)_Negative,Speed_Joint4 (m/s)_Positive,Speed_Joint4 (m/s)_Negative,Speed_Joint5 (m/s)_Positive,Speed_Joint5 (m/s)_Negative
0,2022-10-26 08:21:37.159,28.688,30.375,30.563,33.438,33.8750,33.5000,9,0,0,...,0.000,0.045,0.057,0.000,0.000,0.315,0.017,0.000,0.252,0.000
1,2022-10-26 08:21:41.185,28.688,30.375,30.563,33.500,33.8750,33.5000,9,0,0,...,0.003,0.000,0.000,0.021,0.296,0.000,0.000,0.010,0.000,0.150
2,2022-10-26 08:21:45.197,28.688,30.375,30.563,33.500,33.9375,33.5000,9,0,0,...,0.138,0.000,0.000,2.556,0.391,0.000,0.000,0.000,0.000,0.491
3,2022-10-26 08:21:47.205,28.750,30.375,30.563,33.500,33.9375,33.5000,9,0,0,...,0.000,0.062,0.077,0.000,0.000,0.784,0.033,0.000,0.462,0.000
4,2022-10-26 08:21:49.210,28.688,30.438,30.563,33.563,33.9375,33.5625,9,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,4.004,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3784,2022-10-26 15:36:02.555,37.188,40.313,40.688,43.375,45.2500,44.5625,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3785,2022-10-26 15:36:03.562,37.188,40.313,40.688,43.375,45.2500,44.5000,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3786,2022-10-26 15:36:04.571,37.188,40.250,40.688,43.375,45.2500,44.5000,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3787,2022-10-26 15:36:05.572,37.188,40.313,40.625,43.375,45.2500,44.5625,264,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [18]:
# Drop unwanted columns
df = df.drop('Time_Stamp', 'cycle', 'Grip_Lost', 'Robot_Protective_Stop')
# Define features and label
assembler = VectorAssembler(inputCols=[col for col in df.columns if col != 'Combined_Status'], outputCol='features')
df = assembler.transform(df)
# Prepare the final dataset with features and label (Combined_Status)
data = df.select('features', 'Combined_Status')
# Binary Classification Evaluator
evaluator = MulticlassClassificationEvaluator(labelCol="Combined_Status", predictionCol="prediction", metricName="accuracy")
# Split the data into training and test sets
train_data, test_data = data.randomSplit([0.8, 0.2], seed=42)
results = []

In [19]:
def evaluate_model(predictions):
    evaluator = MulticlassClassificationEvaluator(labelCol="Combined_Status", predictionCol="prediction")
    accuracy = evaluator.evaluate(predictions, {evaluator.metricName: "accuracy"})
    precision = evaluator.evaluate(predictions, {evaluator.metricName: "weightedPrecision"})
    recall = evaluator.evaluate(predictions, {evaluator.metricName: "weightedRecall"})
    f1 = evaluator.evaluate(predictions, {evaluator.metricName: "f1"})
    
    return accuracy, precision, recall, f1

In [20]:
# Logistic Regression Model
lr = LogisticRegression(labelCol='Combined_Status', featuresCol='features')
start_time = time.time()
lr_model = lr.fit(train_data)
end_time = time.time()
lr_predictions = lr_model.transform(test_data)
lr_accuracy, lr_precision, lr_recall, lr_f1 = evaluate_model(lr_predictions)
results.append(['Logistic Regression', lr_accuracy, lr_precision, lr_recall, lr_f1])
print(f"Logistic Regression - Accuracy: {lr_accuracy}, Precision: {lr_precision}, Recall: {lr_recall}, F1 Score: {lr_f1}")
print(f"Time Taken : {end_time-start_time}")

Logistic Regression - Accuracy: 0.9502133712660028, Precision: 0.9358592730898313, Recall: 0.9502133712660029, F1 Score: 0.9397732771579163
Time Taken : 4.3547797203063965


In [21]:
# Decision Tree Model
start_time = time.time()
dt = DecisionTreeClassifier(labelCol="Combined_Status", featuresCol="features")
dt_model = dt.fit(train_data)
end_time = time.time()
dt_predictions = dt_model.transform(test_data)
dt_accuracy, dt_precision, dt_recall, dt_f1 = evaluate_model(dt_predictions)
results.append(['Decision Tree', dt_accuracy, dt_precision, dt_recall, dt_f1])
print(f"Decision Tree - Accuracy: {dt_accuracy}, Precision: {dt_precision}, Recall: {dt_recall}, F1 Score: {dt_f1}")
print(f"Time Taken : {end_time-start_time}")

Decision Tree - Accuracy: 0.9459459459459459, Precision: 0.9412307816382519, Recall: 0.9459459459459459, F1 Score: 0.941619497555127
Time Taken : 1.188058853149414


In [22]:
# Random Forest Model
start_time = time.time()
rf = RandomForestClassifier(labelCol="Combined_Status", featuresCol="features")
rf_model = rf.fit(train_data)
end_time = time.time()
rf_predictions = rf_model.transform(test_data)
rf_accuracy, rf_precision, rf_recall, rf_f1 = evaluate_model(rf_predictions)
results.append(['Random Forest', rf_accuracy, rf_precision, rf_recall, rf_f1])
print(f"Random Forest - Accuracy: {rf_accuracy}, Precision: {rf_precision}, Recall: {rf_recall}, F1 Score: {rf_f1}")
print(f"Time Taken : {end_time-start_time}")

Random Forest - Accuracy: 0.9559032716927454, Precision: 0.9137510648328946, Recall: 0.9559032716927454, F1 Score: 0.9343519979309454
Time Taken : 0.9660627841949463


In [None]:
# Support Vector Machine Model
spark.sparkContext.setLogLevel("ERROR")
start_time = time.time()
svm = LinearSVC(labelCol="Combined_Status", featuresCol="features",regParam=0.1)
ovr = OneVsRest(classifier=svm, labelCol="Combined_Status", featuresCol="features")
ovr_model = ovr.fit(train_data)
end_time = time.time()
svm_predictions = ovr_model.transform(test_data)
svm_accuracy, svm_precision, svm_recall, svm_f1 = evaluate_model(svm_predictions)
results.append(['SVM', svm_accuracy, svm_precision, svm_recall, svm_f1])
print(f"SVM - Accuracy: {svm_accuracy}, Precision: {svm_precision}, Recall: {svm_recall}, F1 Score: {svm_f1}")
print(f"Time Taken : {end_time-start_time}")

In [None]:
# Naive Bayes Model
start_time = time.time()
nb = NaiveBayes(labelCol="Combined_Status", featuresCol="features")
nb_model = nb.fit(train_data)
end_time = time.time()
nb_predictions = nb_model.transform(test_data)
nb_accuracy, nb_precision, nb_recall, nb_f1 = evaluate_model(nb_predictions)
results.append(['Naive Bayes', nb_accuracy, nb_precision, nb_recall, nb_f1])
print(f"Naive Bayes - Accuracy: {nb_accuracy}, Precision: {nb_precision}, Recall: {nb_recall}, F1 Score: {nb_f1}")
print(f"Time Taken : {end_time-start_time}")

In [None]:
# Gradient-Boosted Tree (GBT) Model
start_time = time.time()
gbt = GBTClassifier(labelCol="Combined_Status", featuresCol="features")
ovr = OneVsRest(classifier=gbt, labelCol="Combined_Status", featuresCol="features")
ovr_model = ovr.fit(train_data)
end_time = time.time()
gbt_predictions = ovr_model.transform(test_data)
gbt_accuracy, gbt_precision, gbt_recall, gbt_f1 = evaluate_model(gbt_predictions)
results.append(['Gradient-Boosted Tree (GBT)', gbt_accuracy, gbt_precision, gbt_recall, gbt_f1])
print(f"Gradient-Boosted Tree (GBT): {gbt_accuracy}, Precision: {gbt_precision}, Recall: {gbt_recall}, F1 Score: {gbt_f1}")
print(f"Time Taken : {end_time-start_time}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
results_df = pd.DataFrame(results, columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score'])
sns.set(style="whitegrid")
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
sns.barplot(x='Accuracy', y='Model', data=results_df, ax=axes[0, 0], hue='Model', palette="viridis", legend=False)
axes[0, 0].set_title('Model Accuracy')
axes[0, 0].set_xlim(0.7, 1) 
axes[0, 0].set_ylim(-0.5, len(results_df)-0.5)

# Precision Plot
sns.barplot(x='Precision', y='Model', data=results_df, ax=axes[0, 1], hue='Model', palette="viridis", legend=False)
axes[0, 1].set_title('Model Precision')
axes[0, 1].set_xlim(0.8, 1)
axes[0, 1].set_ylim(-0.5, len(results_df)-0.5)

# Recall Plot
sns.barplot(x='Recall', y='Model', data=results_df, ax=axes[1, 0], hue='Model', palette="viridis", legend=False)
axes[1, 0].set_title('Model Recall')
axes[1, 0].set_xlim(0.9, 1)
axes[1, 0].set_ylim(-0.5, len(results_df)-0.5)

# F1 Score Plot
sns.barplot(x='F1 Score', y='Model', data=results_df, ax=axes[1, 1], hue='Model', palette="viridis", legend=False)
axes[1, 1].set_title('Model F1 Score')
axes[1, 1].set_xlim(0.9, 1)
axes[1, 1].set_ylim(-0.5, len(results_df)-0.5)

plt.tight_layout()
plt.show()