In [2]:
import dill

import numpy as np
import pandas as pd

In [3]:
df_loaded = pd.read_csv("beer.csv")
target_col = "weekend"

In [4]:
from databricks.automl_runtime.sklearn.column_selector import ColumnSelector
supported_cols = ["temp_min_c", "beer_cons_liters", "temp_max_c", "temp_median_c", "precip_mm"]
col_selector = ColumnSelector(supported_cols)

In [5]:
transformers = []

In [6]:
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

numerical_pipeline = Pipeline(steps=[
    ("converter", FunctionTransformer(lambda df: df.apply(pd.to_numeric, errors="coerce"))),
    ("imputer", SimpleImputer(strategy="mean"))
])

transformers.append(("numerical", numerical_pipeline, ["temp_min_c", "beer_cons_liters", "temp_max_c", "temp_median_c", "precip_mm"]))

In [7]:
from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer(transformers, remainder="passthrough", sparse_threshold=0)

In [8]:
from sklearn.preprocessing import StandardScaler

standardizer = StandardScaler()

In [9]:
from sklearn.linear_model import LogisticRegression

In [10]:
import sklearn
from sklearn import set_config
from sklearn.pipeline import Pipeline

set_config(display="diagram")

sklr_classifier = LogisticRegression(
  C=22.5199609553659,
  l1_ratio=0.00012381248467975483,
  penalty="elasticnet",
  solver="saga",
  random_state=714697156,
)

model = Pipeline([
    ("column_selector", col_selector),
    ("preprocessor", preprocessor),
    ("standardizer", standardizer),
    ("classifier", sklr_classifier),
])

In [11]:
X = df_loaded.iloc[:, [0, 1, 2, 3, 5]]
y = df_loaded.iloc[:, 4]

In [12]:
X.head()

Unnamed: 0,temp_median_c,temp_min_c,temp_max_c,precip_mm,beer_cons_liters
0,27.3,23.9,32.5,0.0,25.461
1,27.02,24.5,33.5,0.0,28.972
2,24.82,22.4,29.9,0.0,30.814
3,23.98,21.5,28.6,1.2,29.799
4,23.82,21.0,28.3,0.0,28.9


In [13]:
y.head()

0    0.0
1    0.0
2    1.0
3    1.0
4    0.0
Name: weekend, dtype: float64

In [14]:
model.fit(X, y)



In [15]:
# Create a Pickle file
pickle_out = open("classification_model.pkl", "wb")
dill.dump(model, pickle_out)
pickle_out.close()