In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle

In [2]:
df = pd.read_csv("https://gist.githubusercontent.com/slopp/ce3b90b9168f2f921784de84fa445651/raw/4ecf3041f0ed4913e7c230758733948bc561f434/penguins.csv")
df.head(3)

Unnamed: 0,rowid,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,1,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,2,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007
2,3,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007


In [3]:
df.drop(['rowid', 'year'], axis=1, inplace=True)

In [4]:
df = df[df.isna().sum(axis=1) == 0]

In [6]:
X = df.iloc[:,1:]
y = df.iloc[:,:1]

In [8]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, stratify=y, random_state=123)

In [12]:
pipe = Pipeline([('encoder', OneHotEncoder), ('classifier', DecisionTreeClassifier)])

In [20]:
categorical_features = ["sex", "island"]
categorical_transformer = Pipeline(
    steps=[
        ("encoder", OneHotEncoder()),
    ]
)
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", categorical_transformer, categorical_features),
    ]
)

In [21]:
clf = Pipeline(
    steps=[("preprocessor", preprocessor), ("classifier", DecisionTreeClassifier())]
)

In [23]:
clf.fit(Xtrain, ytrain)

In [26]:
accuracy_score(clf.predict(Xtest), ytest)

0.67

In [30]:
with open("../server/models/penguins-decision-tree.pkl", "wb") as f:
    pickle.dump(clf, f)

In [31]:
with open("../server/models/penguins-decision-tree.pkl", "rb") as f:
    var = pickle.load(f)

In [47]:
var.predict(pd.DataFrame([['Torgersen', 40, 19, 199, 4000, 'male']] ,columns=Xtrain.columns))[0]

'Adelie'

In [2]:
!pip install streamlit requests

Collecting streamlit
  Downloading streamlit-1.29.0-py2.py3-none-any.whl.metadata (8.2 kB)
Collecting requests
  Downloading requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Downloading altair-5.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Using cached blinker-1.7.0-py3-none-any.whl.metadata (1.9 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Using cached cachetools-5.3.2-py3-none-any.whl.metadata (5.2 kB)
Collecting click<9,>=7.0 (from streamlit)
  Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting importlib-metadata<7,>=1.4 (from streamlit)
  Downloading importlib_metadata-6.11.0-py3-none-any.whl.metadata (4.9 kB)
Collecting pillow<11,>=7.1.0 (from streamlit)
  Using cached Pillow-10.1.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (9.5 kB)
Collecting protobuf<5,>=3.20 (from streamlit)
  Using cached protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)