In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# 데이터 불러오기
file_path = "C:/Users/82104/Downloads/abalone.csv"
df = pd.read_csv(file_path)  # 경로는 환경에 따라 조정
df


Unnamed: 0,id,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...,...
4172,4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [7]:
X = df.drop(columns=["id", "Rings"])
y = df["Rings"]
X.head()
y.head()

0    15
1     7
2     9
3    10
4     7
Name: Rings, dtype: int64

In [8]:
categorical_features = ["Sex"]
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(), categorical_features)
    ],
    remainder="passthrough"
)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# 선형 회귀
model = LinearRegression()
pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("regressor", model)])
pipeline.fit(X_train, y_train)
lr_preds = pipeline.predict(X_test)
print("선형 회귀 평균제곱근오차:", np.sqrt(mean_squared_error(y_test, lr_preds)))


선형 회귀 평균제곱근오차: 2.2116130871218322


In [11]:
# 결정 트리 회귀
model = DecisionTreeRegressor(random_state=42)
pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("regressor", model)])
pipeline.fit(X_train, y_train)
dt_preds = pipeline.predict(X_test)
print("결정 트리 회귀 평균제곱근오차:", np.sqrt(mean_squared_error(y_test, dt_preds)))


결정 트리 회귀 평균제곱근오차: 3.033307920043569


In [12]:
# 랜덤 포레스트 회귀
model = RandomForestRegressor(random_state=42)
pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("regressor", model)])
pipeline.fit(X_train, y_train)
rf_preds = pipeline.predict(X_test)
print("랜덤 포레스트 회귀 평균제곱근오차:", np.sqrt(mean_squared_error(y_test, rf_preds)))


랜덤 포레스트 회귀 평균제곱근오차: 2.2659429842022734


In [13]:
# 서포트 벡터 회귀 (선형 커널)
model = SVR(kernel='linear')
pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("regressor", model)])
pipeline.fit(X_train, y_train)
svr_preds = pipeline.predict(X_test)
print("SVR (선형 커널) 평균제곱근오차:", np.sqrt(mean_squared_error(y_test, svr_preds)))

SVR (선형 커널) 평균제곱근오차: 2.3214779976216087
