In [1]:
import pandas as pd

data = pd.DataFrame({
    "color": ["red","green","blue","red","green","blue"],
    "target": [1,0,0,1,0,0]
})
data

Unnamed: 0,color,target
0,red,1
1,green,0
2,blue,0
3,red,1
4,green,0
5,blue,0


In [2]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

le = LabelEncoder()
data["color_label"] = le.fit_transform(data["color"])

ohe = OneHotEncoder(sparse_output=False)
ohe_matrix = ohe.fit_transform(data[["color"]])
ohe_df = pd.DataFrame(ohe_matrix, columns = ohe.categories_[0])

data_ohe = pd.concat([data, ohe_df], axis = 1)
display(data_ohe)

Unnamed: 0,color,target,color_label,blue,green,red
0,red,1,2,0.0,0.0,1.0
1,green,0,1,0.0,1.0,0.0
2,blue,0,0,1.0,0.0,0.0
3,red,1,2,0.0,0.0,1.0
4,green,0,1,0.0,1.0,0.0
5,blue,0,0,1.0,0.0,0.0


In [3]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

df_ord = pd.DataFrame({"size":["small","medium", "large", "medium", "small"]})
ord_enc = OrdinalEncoder(categories=[["small", "medium", "large"]])
df_ord["size_code"] = ord_enc.fit_transform(df_ord[["size"]])
display(df_ord)

Unnamed: 0,size,size_code
0,small,0.0
1,medium,1.0
2,large,2.0
3,medium,1.0
4,small,0.0


In [4]:
import numpy as np 
from sklearn.preprocessing import StandardScaler

X = np.array([[150, 1000],
[160, 20000],
[170, 50000]])

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

df_scale = pd.DataFrame(
    np.hstack([X, X_scaled]), 
    columns = ["키(cm)", "용돈(원)", "키(스케일링 후)", "용돈(스케일링 후)"]
)
display(df_scale)

Unnamed: 0,키(cm),용돈(원),키(스케일링 후),용돈(스케일링 후)
0,150.0,1000.0,-1.224745,-1.123698
1,160.0,20000.0,0.0,-0.181775
2,170.0,50000.0,1.224745,1.305473


In [5]:
from sklearn.preprocessing import Normalizer

X = np.array([[3,4],
              [1,2],
              [10,0]])

normalizer = Normalizer()
X_norm = normalizer.fit_transform(X)

df_norm = pd.DataFrame(
    np.hstack([X, X_norm]),
    columns = ["x1","x2","x1(정규화 후)", "x2(정규화 후)"]
)
display(df_norm)

Unnamed: 0,x1,x2,x1(정규화 후),x2(정규화 후)
0,3.0,4.0,0.6,0.8
1,1.0,2.0,0.447214,0.894427
2,10.0,0.0,1.0,0.0


In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

X, y = load_iris(return_X_y= True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("knn", KNeighborsClassifier(n_neighbors=5))

])
pipe.fit(X_train, y_train)
print("Pipeline Acc: ", pipe.score(X_test, y_test))

Pipeline Acc:  1.0


In [None]:
from sklearn.decomposition import PCA
from sklearn.datasets import load_digits
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


digits = load_digits()
y = (digits.target == 7).astype(int)

X = PCA(n_components = 10, random_state=42).fit_transform(digits.data)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=11
)