# 👋 Hello from the Modelbit team! 

Thanks for downloading our notebook! We hope it helps you on your journey to build and deploy a lead scorer. 

For more assistance, or just to say hi, reach out: harry@modelbit.com. 

## 1. Get the leads

In [1]:
#if you get the invalid ssl url certificate else move on to the next line of code
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [2]:
import pandas 

leads = pandas.read_csv(
    "https://cdn.modelbit.com/blog/2022-09-30-how-to-build-and-deploy-a-lead-scorer/fake_leads.csv",
    index_col = 0
)

leads

Unnamed: 0,lead_source,num_employees,industry,converted_to_customer
0,facebook,501-1000,Other,False
1,direct-mail,11-50,Other,False
2,,,,True
3,direct-mail,1-10,Other,True
4,,51-100,,True
...,...,...,...,...
49995,other,11-50,Retail,True
49996,search-engine,11-50,Legal,False
49997,twitter,51-100,Food & Beverage,True
49998,search-engine,1-10,Marketing Agency,False


## 2. Feature engineering

In [3]:
X = leads[["lead_source", "industry"]]

def convert_num_employees(num_employees: str) -> int:
    num_employees_map = {
        '1-10':     1,
        '11-50':    2,
        '51-100':   3,
        '101-250':  4,
        '251-500':  5,
        '501-1000': 6,
        '1000+':    7
    }
    
    if num_employees in num_employees_map:
        return num_employees_map[num_employees]
    
    return 0

X = X.assign(num_employees_feature = leads['num_employees'].apply(convert_num_employees))
X

Unnamed: 0,lead_source,industry,num_employees_feature
0,facebook,Other,6
1,direct-mail,Other,2
2,,,0
3,direct-mail,Other,1
4,,,3
...,...,...,...
49995,other,Retail,2
49996,search-engine,Legal,2
49997,twitter,Food & Beverage,3
49998,search-engine,Marketing Agency,1


In [4]:
y = leads['converted_to_customer'].astype(int)
y

0        0
1        0
2        1
3        1
4        1
        ..
49995    1
49996    0
49997    1
49998    0
49999    1
Name: converted_to_customer, Length: 50000, dtype: int64

## 3. Fitting the model

In [5]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from xgboost import XGBRegressor

pipeline = Pipeline([
    ('encoder', OneHotEncoder(handle_unknown = 'ignore')),
    ('regression', XGBRegressor())
])

pipeline.fit(X, y)

Pipeline(steps=[('encoder', OneHotEncoder(handle_unknown='ignore')),
                ('regression',
                 XGBRegressor(base_score=None, booster=None, callbacks=None,
                              colsample_bylevel=None, colsample_bynode=None,
                              colsample_bytree=None, early_stopping_rounds=None,
                              enable_categorical=False, eval_metric=None,
                              feature_types=None, gamma=None, gpu_id=None,
                              grow_policy=None, importance_type=None,
                              interaction_constraints=None, learning_rate=None,
                              max_bin=None, max_cat_threshold=None,
                              max_cat_to_onehot=None, max_delta_step=None,
                              max_depth=None, max_leaves=None,
                              min_child_weight=None, missing=nan,
                              monotone_constraints=None, n_estimators=100,
                   

## 4. Building the function to deploy

In [6]:
def score_lead(lead_source: str, industry: str, num_employees: str) -> float:
    num_employees_feature = convert_num_employees(num_employees)
    df = pandas.DataFrame({
        'lead_source': [lead_source],
        'industry': [industry],
        'num_employees_feature': [num_employees_feature]
    })
    
    return float(pipeline.predict(df)[0])

score_lead('facebook', 'Entertainment', '11-50')

0.7944052815437317

## 5. Deploy! 🚀

In [7]:
import modelbit
mb = modelbit.login()
mb.deploy(score_lead)





Property,Value
Function,"score_lead(lead_source, industry, num_employees)"
Helpers,"convert_num_employees(num_employees, num_employees_map)"
Values,"pipeline: Pipeline(steps=[('encoder', OneHotEncoder(handle_unknown='ignore')), ('regression', XGBRegressor(base_score=None, booster=None, callbacks=None, colsample_bylevel=None, colsample_bynode=None, colsample..."
Imports,from typing import * from sklearn.pipeline import Pipeline from sklearn.preprocessing._encoders import OneHotEncoder from xgboost.sklearn import XGBRegressor import pandas as pandas
Python Version,3.9
