In [1]:
import warnings
warnings.filterwarnings(action='ignore') 

import datetime
import pandas as pd
import numpy as np
import csv
import folium
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import scipy as sp
import statsmodels.formula.api as smf
plt.rc('font',family='D2CodingLigature Nerd Font')
# plt.rcParams['axes.unicode_minus']=False  # '- 표시

In [2]:
w = pd.read_csv('../../data/chick004.csv')
w.head(2)

Unnamed: 0,chick_nm,weight,egg_weight,movement,food
0,a01,140,65,146,14
1,a02,128,62,153,12


In [3]:
w.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   chick_nm    30 non-null     object
 1   weight      30 non-null     int64 
 2   egg_weight  30 non-null     int64 
 3   movement    30 non-null     int64 
 4   food        30 non-null     int64 
dtypes: int64(4), object(1)
memory usage: 1.3+ KB


In [4]:
w2 = w.iloc[:, 1:5]
w2.head(2)

Unnamed: 0,weight,egg_weight,movement,food
0,140,65,146,14
1,128,62,153,12


## OLS

In [5]:
model_mlm = smf.ols(formula = 'weight ~ egg_weight + food + movement', data = w2)

result_mlm = model_mlm.fit()

result_mlm.summary()

0,1,2,3
Dep. Variable:,weight,R-squared:,0.948
Model:,OLS,Adj. R-squared:,0.942
Method:,Least Squares,F-statistic:,157.7
Date:,"Tue, 09 Dec 2025",Prob (F-statistic):,8.46e-17
Time:,20:08:59,Log-Likelihood:,-56.008
No. Observations:,30,AIC:,120.0
Df Residuals:,26,BIC:,125.6
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.9748,8.587,0.346,0.732,-14.676,20.626
egg_weight,1.7763,0.195,9.117,0.000,1.376,2.177
food,1.5847,0.405,3.915,0.001,0.753,2.417
movement,-0.0087,0.017,-0.522,0.606,-0.043,0.026

0,1,2,3
Omnibus:,1.993,Durbin-Watson:,2.03
Prob(Omnibus):,0.369,Jarque-Bera (JB):,1.746
Skew:,-0.48,Prob(JB):,0.418
Kurtosis:,2.311,Cond. No.,4310.0


In [6]:
## movement의 p-value를 고려하여 제거한다

model_mlm = smf.ols(formula = 'weight ~ egg_weight + food', data = w2)

result_mlm = model_mlm.fit()

result_mlm.summary()

0,1,2,3
Dep. Variable:,weight,R-squared:,0.947
Model:,OLS,Adj. R-squared:,0.943
Method:,Least Squares,F-statistic:,243.0
Date:,"Tue, 09 Dec 2025",Prob (F-statistic):,5.44e-18
Time:,20:15:08,Log-Likelihood:,-56.164
No. Observations:,30,AIC:,118.3
Df Residuals:,27,BIC:,122.5
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.6638,8.370,0.438,0.665,-13.510,20.837
egg_weight,1.7453,0.183,9.536,0.000,1.370,2.121
food,1.5955,0.399,4.001,0.000,0.777,2.414

0,1,2,3
Omnibus:,2.302,Durbin-Watson:,2.103
Prob(Omnibus):,0.316,Jarque-Bera (JB):,1.94
Skew:,-0.502,Prob(JB):,0.379
Kurtosis:,2.263,Cond. No.,1840.0


## LinearRegression

In [7]:
x = w2[["egg_weight", "food", "movement"]] 
y = w2["weight"] 

In [8]:
# 모델 생성 및 학습
model = LinearRegression()
model.fit(x, y)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [9]:
# 결과 출력
print("Intercept (절편):", model.intercept_)
print("Coefficients (회귀 계수):", model.coef_)

Intercept (절편): 2.9748303446102113
Coefficients (회귀 계수): [ 1.77634969  1.58472919 -0.0086737 ]


In [13]:
# R² 점수
y_pred = model.predict(x)
r2 = r2_score(y, y_pred)
print(f"결정계수 (R²): {r2:.4f}")

결정계수 (R²): 0.9479


In [16]:
## 새로운 실제 값으로 예측 진행

w_new = np.array([[60, 13, 150]])
pred_new = model.predict(w_new)

print(f"예측 weight : {pred_new[0]}")

예측 weight : 128.85623537888023


In [17]:
## 새로운 실제 값으로 예측 진행 _ 데이터프레임 구조

df_new = pd.DataFrame({"egg_weight":[60],
                       "food" : [13],
                       "movement" : [150] } )

pred_new2 = model.predict(df_new)
print(f"예측 weight : {pred_new[0]}")

예측 weight : 128.85623537888023
