In [26]:
# The Dataset
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import cross_val_score

In [27]:
data = {
    "Animal": ["Dog", "Cat", "Dog", "Rabbit", "Dog", "Cat", "Dog", "Cat"],
    "Age": [5, 3, 2, 1, 7, 4, 6, 2],       # in years
    "Weight": [20, 4, 15, 2, 25, 5, 22, 3], # in kg
    "Adopted": [1, 1, 0, 1, 0, 1, 1, 0]    # 1 = yes, 0 = no
}

df = pd.DataFrame(data)
df

Unnamed: 0,Animal,Age,Weight,Adopted
0,Dog,5,20,1
1,Cat,3,4,1
2,Dog,2,15,0
3,Rabbit,1,2,1
4,Dog,7,25,0
5,Cat,4,5,1
6,Dog,6,22,1
7,Cat,2,3,0


In [28]:
# 1. Train/Test Split
X = df[["Age", "Weight"]]
y = df["Adopted"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print("Train size:", X_train.shape[0])
print("Test size:", X_test.shape[0])

Train size: 5
Test size: 3


In [29]:
# 2. Pre-processing with Encoders and Scalers
encoder = OneHotEncoder(sparse_output=False)
animal_encoded = encoder.fit_transform(df[["Animal"]])

scaler = StandardScaler()
scaled = scaler.fit_transform(df[["Age", "Weight"]])

print("Encoded animals:\n", animal_encoded)
print("Scaled features:\n", scaled)

Encoded animals:
 [[0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]
Scaled features:
 [[ 0.62994079  0.89723545]
 [-0.37796447 -0.89723545]
 [-0.8819171   0.33646329]
 [-1.38586973 -1.12154431]
 [ 1.63784605  1.4580076 ]
 [ 0.12598816 -0.78508102]
 [ 1.13389342  1.12154431]
 [-0.8819171  -1.00938988]]


In [30]:
# 3. Pipelines
pipe = Pipeline([
    ("scale", StandardScaler()),
    ("model", LogisticRegression())
])

pipe.fit(X, y)
preds = pipe.predict(X)
print("Predictions:", preds)

Predictions: [1 1 1 1 1 1 1 1]


In [31]:
# Model Evaluation with Metrics
y_pred = pipe.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [32]:
# 5. Cross-Validation
scores = cross_val_score(LogisticRegression(), X, y, cv=3)
print("Cross-validation scores:", scores)
print("Average score:", scores.mean())

Cross-validation scores: [0.33333333 0.33333333 0.        ]
Average score: 0.2222222222222222
