In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

df = pd.read_csv("data/leiteintel_base_ampliada.csv")

X = df[["temperatura_media", "chuvas_mm", "preco_litro", "tipo_producao"]]
y = df["producao_litros"]

preproc = ColumnTransformer([
    ("onehot", OneHotEncoder(), ["tipo_producao"])
], remainder="passthrough")

modelo = Pipeline([
    ("prep", preproc),
    ("reg", LinearRegression())
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
modelo.fit(X_train, y_train)

y_pred = modelo.predict(X_test)

print("R²:", r2_score(y_test, y_pred))
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
