Title: Classification Model Performance Metrics

Accuracy, Precision, Recall, F1-Score:

Task 1: Evaluate a binary classifier for spam detection using accuracy, precision, recall and F1-score.

In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

data = pd.DataFrame({
    'WordCount': [200, 150, 300, 120, 180, 250, 190, 160, 310, 220],
        'HasLink': [1, 0, 1, 0, 1, 1, 0, 0, 1, 1],
            'Spam': [1, 0, 1, 0, 1, 0, 0, 0, 1, 0]
            })

X = data[['WordCount', 'HasLink']]
y = data['Spam']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-Score: 1.0



Task 2: Compare performance of a multi-class classifier on recognizing animals.

In [2]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

data = pd.DataFrame({
    'Weight': [50, 70, 30, 100, 200, 10, 40, 15, 180, 75],
        'Height': [1.2, 1.5, 0.5, 1.8, 2.0, 0.4, 1.1, 0.3, 1.9, 1.4],
            'Animal': [0, 1, 2, 1, 1, 2, 0, 2, 1, 0]
            })


X = data[['Weight', 'Height']]
y = data['Animal']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions, average='weighted')
f1 = f1_score(y_test, predictions, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")


Accuracy: 0.5
Precision: 1.0
Recall: 0.5
F1-Score: 0.6666666666666666


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Task 3: Analyze classifier performance for predicting disease outbreaks.

In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

data = pd.DataFrame({
    'Temperature': [30, 35, 28, 40, 25, 33, 27, 37, 32, 29],
        'Humidity': [70, 80, 60, 90, 55, 75, 65, 85, 78, 68],
            'Outbreak': [1, 1, 0, 1, 0, 1, 0, 1, 0, 0]
            })

X = data[['Temperature', 'Humidity']]
y = data['Outbreak']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")

Accuracy: 0.5
Precision: 0.5
Recall: 1.0
F1-Score: 0.6666666666666666
