In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# Generate random dataset
np.random.seed(42)
data = {
    'Species': np.random.choice(['Salmon', 'Tuna', 'Trout'], 500),
    'Length': np.random.uniform(20, 100, 500),  # Fish length in cm
    'Girth': np.random.uniform(10, 50, 500),   # Fish girth in cm
    'Weight': np.random.uniform(500, 5000, 500)  # Fish weight in grams
}

# Create DataFrame
df = pd.DataFrame(data)

# Encode species as numeric values
df['Species'] = df['Species'].astype('category').cat.codes  # Salmon=0, Tuna=1, Trout=2

# Define independent and dependent variables
X = df[['Species', 'Length', 'Girth']]
y = df['Weight']

# Split dataset into 70% training and 30% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Build linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Print results
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)
print("Model Coefficients:", model.coef_)
print("Intercept:", model.intercept_)


Mean Absolute Error: 1039.4452744586706
Model Coefficients: [13.61826178 -2.33323035  1.04849248]
Intercept: 2835.418027202459
