In [59]:
import pandas as pd
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeRegressor
import category_encoders as ce

In [60]:
df=pd.read_csv(r"https://raw.githubusercontent.com/sydneyproject000/dat-sample/main/iowa_mini.csv")

In [61]:
# fill in missing reservations
df.fillna(0)

# drop missing values from shifts
df = df.dropna()

In [62]:
df[['Id', 'Neighborhood', 'OverallQual']]

Unnamed: 0,Id,Neighborhood,OverallQual
0,1,CollgCr,7
1,2,Veenker,6
2,3,CollgCr,7
3,4,Crawfor,7
4,5,NoRidge,8
...,...,...,...
1455,1456,Gilbert,6
1456,1457,NWAmes,6
1457,1458,Crawfor,7
1458,1459,NAmes,5


In [63]:
X = df[['Id', 'Neighborhood', 'OverallQual']]
y = df['SalePrice']

In [64]:
# let's assume these were our optimized parameters
tree = DecisionTreeRegressor(max_depth = 6, max_features = 0.8, min_samples_leaf = 10)

pipe=make_pipeline(ce.TargetEncoder(), tree)

In [65]:
# fit the tree, and export it
pipe.fit(X, y)

Pipeline(steps=[('targetencoder', TargetEncoder(cols=['Neighborhood'])),
                ('decisiontreeregressor',
                 DecisionTreeRegressor(max_depth=6, max_features=0.8,
                                       min_samples_leaf=10))])

In [66]:
# the pickle module allows you to export saved models
import pickle

# rb -- WRITE the file in BYTES
with open('pipe.pkl', 'wb') as export:
    # this creates an external version of the file that we can now import later on
    pickle.dump(pipe, export)

In [67]:
# we can now import this, and re-use it on new data:  very handy

with open('pipe.pkl', 'rb') as import_:
    # this will import the pickled object again
    pipe2 = pickle.load(import_)

In [68]:
# here it is
pipe2

Pipeline(steps=[('targetencoder', TargetEncoder(cols=['Neighborhood'])),
                ('decisiontreeregressor',
                 DecisionTreeRegressor(max_depth=6, max_features=0.8,
                                       min_samples_leaf=10))])

In [69]:
# and we can use it to make new predictions
pipe2.predict(X)

array([220387.70833333, 182929.43333333, 220387.70833333, ...,
       192364.92405063, 137632.44186047, 124743.28169014])

In [74]:
id_= 9999
OverallQual_=7
Neighborhood_='Veenker'



In [75]:
sample = {'Id':id_,
         'OverallQual': OverallQual_,
         'Neighborhood':Neighborhood_}


sample=pd.DataFrame(sample, index = [0])

In [76]:
pipe2.predict(sample)[0]

314842.22222222225