# Applied Modeling: Problem Definition, Evaluation Metrics
In this notebook, we cover:
- Structuring machine learning problems
- Choosing evaluation metrics for classification and regression tasks

In [2]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, mean_absolute_error, mean_squared_error, r2_score

## Classification Problem

In [3]:
# Simulate classification data
X_cls, y_cls = make_classification(n_samples=300, n_features=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X_cls, y_cls, test_size=0.25, random_state=42)
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [4]:
# Evaluate classification model
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

Accuracy: 0.9866666666666667
Precision: 1.0
Recall: 0.9761904761904762
F1 Score: 0.9879518072289156
Confusion Matrix:
 [[33  0]
 [ 1 41]]


## Regression Problem

In [5]:
# Simulate regression data
X_reg, y_reg = make_regression(n_samples=300, n_features=5, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X_reg, y_reg, test_size=0.25, random_state=42)
reg = LinearRegression()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

In [6]:
# Evaluate regression model
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R-squared:', r2_score(y_test, y_pred))

Mean Absolute Error: 7.359747940435262
Mean Squared Error: 93.2694359258009
Root Mean Squared Error: 9.657610259572547
R-squared: 0.9918814354436036


## Summary
- Clearly define whether the problem is classification or regression
- Match metrics to problem type
- Always validate your models with hold-out or cross-validation techniques