In [1]:
import cudf
gdf = cudf.read_csv('https://data.heatonresearch.com/data/t81-558/iris.csv')
for column in ['sepal_l', 'sepal_w', 'petal_l', 'petal_w']:
    print(gdf[column].mean())

5.843333333333334
3.0573333333333332
3.7580000000000005
1.1993333333333331


In [2]:
gdf

Unnamed: 0,sepal_l,sepal_w,petal_l,petal_w,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from dask import dataframe as dd
import xgboost as xgb
import dask_cudf

def main(client):
    fname = 'HIGGS.csv'
    colnames = ["label"] + ["feature-%02d" % i for i in range(1, 29)]
    dask_df = dask_cudf.read_csv(fname, header=None, names=colnames)
    y = dask_df["label"]
    X = dask_df[dask_df.columns.difference(["label"])]
    dtrain = xgb.dask.DaskDMatrix(client, X, y)
    
    output = xgb.dask.train(client,
                            {'tree_method': 'gpu_hist'},
                            dtrain,
                            num_boost_round=100,
                            evals=[(dtrain, 'train')])
    booster = output['booster']
    history = output['history']  
    booster.save_model('xgboost-model')
    print('Training evaluation history:', history)



if __name__ == '__main__':
    with LocalCUDACluster(n_workers=1) as cluster:
        with Client(cluster) as client:
            main(client)

  self.sync(self._update_scheduler_info)


Training evaluation history: {'train': {'rmse': [0.476039, 0.462171, 0.453841, 0.447575, 0.443414, 0.440031, 0.437333, 0.435057, 0.433699, 0.432582, 0.431228, 0.4304, 0.429581, 0.429005, 0.428382, 0.427898, 0.427287, 0.426813, 0.426318, 0.42613, 0.425823, 0.425393, 0.425105, 0.424848, 0.424615, 0.424346, 0.424222, 0.424031, 0.423748, 0.42353, 0.42334, 0.423088, 0.422918, 0.422679, 0.422566, 0.422445, 0.42234, 0.421971, 0.421865, 0.421668, 0.421561, 0.421343, 0.421106, 0.420886, 0.420823, 0.420665, 0.420473, 0.420314, 0.420143, 0.419982, 0.419932, 0.41974, 0.419584, 0.419549, 0.419367, 0.419239, 0.419154, 0.419058, 0.418988, 0.418889, 0.418649, 0.418559, 0.418442, 0.418197, 0.418131, 0.417958, 0.417871, 0.417694, 0.417632, 0.417528, 0.417481, 0.417435, 0.417393, 0.417358, 0.417235, 0.417121, 0.417067, 0.417038, 0.41701, 0.41693, 0.416872, 0.416776, 0.41669, 0.416576, 0.416428, 0.416307, 0.416179, 0.416048, 0.415981, 0.41588, 0.415802, 0.415766, 0.415712, 0.41564, 0.41561, 0.415575, 0.41

In [4]:
!ls HIGGS.csv

HIGGS.csv
