In [12]:
import joblib
import os
import pandas as pd
import numpy as np
import sklearn

In [14]:
# 1. Define the full path to the downloaded job directory
job_dir = "/home/k3s-server-07/federated_learning/workspace/example_project/prod_00/admin@nvidia.com/transfer/07b59842-a8e2-44be-a8e4-d599a5557719"

# 2. Define the relative path to the model file
model_path = os.path.join(job_dir, "workspace/app_server/model_param.joblib")

# 3. Load the model
try:
    param_dict = joblib.load(model_path)
    print("Successfully loaded the global sklearn model.")
    # You can now inspect or use the model, e.g., print its type:
    print(f"Model type: {type(param_dict)}")
except Exception as e:
    print(f"Error loading the model: {e}")

Successfully loaded the global sklearn model.
Model type: <class 'dict'>


In [15]:
param_dict

{'coef': array([[-3.10072697e-01,  7.27916741e-03, -4.63179203e-03,
         -4.12573264e-01,  3.64146395e-03,  7.70010644e-01,
         -3.26065688e-03,  1.89035134e-03,  4.09931495e-02,
          1.75671980e-01,  1.27347000e-03, -6.60943152e-03,
         -5.79892200e-02,  1.42853058e-01, -1.00413259e-02,
         -8.32015762e-03, -7.08854308e-02,  1.91790911e-01,
         -3.06138391e-03,  4.38926438e-03, -6.88427915e-02,
         -1.48053557e-01,  7.17498054e-01,  6.73036636e-01,
          4.97098049e-01, -1.13753159e+00,  1.63746863e+00,
         -3.02185426e+00]]),
 'intercept': array([0.2719594])}

In [45]:
param_dict['coef'][0]

array([-3.10072697e-01,  7.27916741e-03, -4.63179203e-03, -4.12573264e-01,
        3.64146395e-03,  7.70010644e-01, -3.26065688e-03,  1.89035134e-03,
        4.09931495e-02,  1.75671980e-01,  1.27347000e-03, -6.60943152e-03,
       -5.79892200e-02,  1.42853058e-01, -1.00413259e-02, -8.32015762e-03,
       -7.08854308e-02,  1.91790911e-01, -3.06138391e-03,  4.38926438e-03,
       -6.88427915e-02, -1.48053557e-01,  7.17498054e-01,  6.73036636e-01,
        4.97098049e-01, -1.13753159e+00,  1.63746863e+00, -3.02185426e+00])

In [52]:
1e-4

0.0001

In [65]:
from sklearn.linear_model import SGDClassifier
rebuilt_model = SGDClassifier(
    loss='log_loss',
    penalty='l2',
    fit_intercept=True,
    eta0=1e-4,
    learning_rate='constant',
)

rebuilt_model.coef_ = param_dict['coef']
rebuilt_model.intercept_ = param_dict['intercept']
rebuilt_model.classes_ = np.array([0, 1])
rebuilt_model

0,1,2
,loss,'log_loss'
,penalty,'l2'
,alpha,0.0001
,l1_ratio,0.15
,fit_intercept,True
,max_iter,1000
,tol,0.001
,shuffle,True
,verbose,0
,epsilon,0.1


In [54]:
columns = ['y', 'lepton  pT', 'lepton  eta', 'lepton  phi', 'missing energy magnitude', 'missing energy phi', 'jet 1 pt', 'jet 1 eta', 'jet 1 phi', 'jet 1 b-tag', 'jet 2 pt', 'jet 2 eta', 'jet 2 phi', 'jet 2 b-tag', 'jet 3 pt', 'jet 3 eta', 'jet 3 phi', 'jet 3 b-tag', 'jet 4 pt', 'jet 4 eta', 'jet 4 phi', 'jet 4 b-tag', 'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']
df = pd.read_csv("/tmp/nvflare/dataset/HIGGS.csv", names=columns)
df.head()

Unnamed: 0,y,lepton pT,lepton eta,lepton phi,missing energy magnitude,missing energy phi,jet 1 pt,jet 1 eta,jet 1 phi,jet 1 b-tag,...,jet 4 eta,jet 4 phi,jet 4 b-tag,m_jj,m_jjj,m_lv,m_jlv,m_bb,m_wbb,m_wwbb
0,1.0,0.869293,-0.635082,0.22569,0.32747,-0.689993,0.754202,-0.248573,-1.092064,0.0,...,-0.010455,-0.045767,3.101961,1.35376,0.979563,0.978076,0.920005,0.721657,0.988751,0.876678
1,1.0,0.907542,0.329147,0.359412,1.49797,-0.31301,1.095531,-0.557525,-1.58823,2.173076,...,-1.13893,-0.000819,0.0,0.30222,0.833048,0.9857,0.978098,0.779732,0.992356,0.798343
2,1.0,0.798835,1.470639,-1.635975,0.453773,0.425629,1.104875,1.282322,1.381664,0.0,...,1.128848,0.900461,0.0,0.909753,1.10833,0.985692,0.951331,0.803252,0.865924,0.780118
3,0.0,1.344385,-0.876626,0.935913,1.99205,0.882454,1.786066,-1.646778,-0.942383,0.0,...,-0.678379,-1.360356,0.0,0.946652,1.028704,0.998656,0.728281,0.8692,1.026736,0.957904
4,1.0,1.105009,0.321356,1.522401,0.882808,-1.205349,0.681466,-1.070464,-0.921871,0.0,...,-0.373566,0.113041,0.0,0.755856,1.361057,0.98661,0.838085,1.133295,0.872245,0.808487


In [55]:
df_sample = df.sample(100_000)
df_sample.to_parquet('./higgs.parquet')

In [56]:
df_sample_y = df_sample['y']
df_sample_X = df_sample.drop(['y'], axis=1)

In [57]:
df_sample_y.head()

7600685    1.0
6722129    1.0
8009422    0.0
2204326    1.0
2965897    1.0
Name: y, dtype: float64

In [58]:
df_sample_X.head()

Unnamed: 0,lepton pT,lepton eta,lepton phi,missing energy magnitude,missing energy phi,jet 1 pt,jet 1 eta,jet 1 phi,jet 1 b-tag,jet 2 pt,...,jet 4 eta,jet 4 phi,jet 4 b-tag,m_jj,m_jjj,m_lv,m_jlv,m_bb,m_wbb,m_wwbb
7600685,0.343325,-1.099665,-1.698674,1.851692,1.406657,0.716094,0.179206,0.21415,1.086538,0.881702,...,-1.288838,0.077526,0.0,0.870475,0.835202,0.995287,2.37736,0.774628,1.848471,1.401605
6722129,0.405365,1.450185,0.224026,0.840836,-1.01183,1.113302,-0.798151,0.121569,2.173076,1.709014,...,0.149447,0.429895,0.0,0.862596,0.72771,0.988069,2.811438,1.006344,1.757354,1.346599
8009422,0.379744,-2.30349,0.001526,2.55154,-0.893597,2.470922,-1.030855,1.380001,0.0,1.351826,...,-0.728348,-0.420887,1.550981,4.441982,2.311386,0.987961,1.314447,0.286206,1.390716,1.717802
2204326,1.371287,-1.428866,0.951449,0.703107,1.036241,1.00997,0.168314,-0.222801,2.173076,0.538867,...,-0.240314,-1.27379,0.0,0.769051,1.273021,0.980849,0.708511,0.672348,1.237125,1.008156
2965897,1.168879,0.527837,-0.717125,0.832957,0.464515,1.198589,0.043545,0.698674,1.086538,0.672702,...,0.09698,-0.193374,0.0,0.83888,0.710297,1.196068,1.086606,0.409869,0.83754,0.778287


In [67]:
# Run the prediction
predictions = rebuilt_model.predict(df_sample_X)
print(predictions)

[1 1 1 ... 1 1 1]




In [70]:
len(predictions)

100000

In [71]:
sum(predictions)

62646

In [72]:
sum(df_sample_y)

53018.0

In [81]:
df_2 = pd.DataFrame(df_sample_y)
df_2['predicted'] = predictions
sum(df_2['y'] == df_2['predicted']) / 100_000


0.64218