In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
import pickle

In [5]:
file = 'Fish.csv'
df = pd.read_csv(file)

features = ['Species', 'Length1', 'Length2', 'Length3', 'Height', 'Width']
target = 'Weight'

X = df[features]
y = df[target]

numerical_cols = ['Length1', 'Length2', 'Length3', 'Height', 'Width']
categorical_cols = ['Species']  

In [6]:
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ]
)

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [7]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model.fit(X_train, y_train)

with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)

score = model.score(X_test, y_test)
print(f"Model R^2 score: {score:.4f}")

Model R^2 score: 0.9507
