In [10]:
import joblib, pandas as pd
import numpy as np
pipe = joblib.load("models/lead_scoring_pipeline_20250814-174851.joblib")     # saved full pipeline
df_in = pd.read_csv("b2b_leads_new.csv")

def clip_quantile(s, low=0.01, high=0.99):
    ql, qh = s.quantile(low), s.quantile(high)
    return s.clip(lower=ql, upper=qh)

df_in = df_in[(df_in["RevenueUSD"]>0) & (df_in["EmployeesCount"]>0)]
df_in["YearFounded"] = df_in["YearFounded"].clip(lower=1850, upper=2025)

df_in["RevenueUSD_log"]  = np.log1p(df_in["RevenueUSD"])
df_in["Employees_log"]   = np.log1p(df_in["EmployeesCount"])

df_in["RevenueUSD_log"]  = clip_quantile(df_in["RevenueUSD_log"], 0.005, 0.995)
df_in["Employees_log"]   = clip_quantile(df_in["Employees_log"],   0.005, 0.995)

df_in["CompanyAge"] = (2025 - df_in["YearFounded"]).clip(lower=0)
df_in["RevenueUSD_log"] = (df_in["RevenueUSD_log"] + 1).map(lambda x: np.log1p(x))
df_in["Employees_log"] = (df_in["Employees_log"] + 1).map(lambda x: np.log1p(x))
df_in["HasOwnerEmail"] = df_in["OwnerEmail"].notna().astype(int)
df_in["HasOwnerPhone"] = df_in["OwnerPhonesNumber"].notna().astype(int)
df_in["HasOwnerLinkedin"] = df_in["OwnerLinkedin"].notna().astype(int)

proba = pipe.predict_proba(df_in)[:,1]
df_in["Converted_Prob"] = proba
df_in["LeadScore"] = (proba*100).round().astype(int)
df_in["Bucket"] = pd.cut(df_in["LeadScore"], bins=[-1,60,80,100], labels=["C","B","A"])
df_in.to_csv("b2b_leads_scored.csv", index=False)