### GPU Open Analytics Initiative: connect MapD to PyGDF to H2OAIGLM

### In this demo, we will train 4000 regularized linear regression models on the U.S. Census dataset, with the goal to predict the income of a person, given approximately 10000 data points (such as age, occupation, zip code, etc.)

### The dataset is about 2GB in memory (50k rows, 10k cols, single-precision floating-point values), so it fits onto the GPU memory.

### By using multiple GPUs, we are able to speed up this process significantly, and can train about 40 models per second (on a DGX-1 with 8 GPUs)

### Choose to Run or Run+Animate

In [1]:
# run=0: Choose not run h2oaiglm and just pass through all other cells
# run=1: Run h2oaiglm without animation
# Requirements: python3.5
run=1

# anim=0: Don't show animation
# anim=1: Do show animation
# pip install pandas psutil matplotlib --user
# pip install -e git+https://github.com/fbcotter/py3nvml#egg=py3nvml --user
anim=1

PWD = !pwd

In [2]:
import sys
import os.path
from pprint import pprint
import warnings
warnings.filterwarnings('ignore')

Connect to MapD using `pymapd`

In [3]:
import pandas as pd
import cudf
import dask_cudf
import cuml
import sys
import os
from pprint import pprint
import warnings
warnings.filterwarnings('ignore')

In [4]:
import gzip
def load_data(cached = '/rapids/notebooks/wip/notebooks-extended/data/ipums/ipums_easy.csv.gz',source='ipums'):
    if os.path.exists(cached) and source=='ipums':
        print('use ipums data')
        with gzip.open(cached) as f:
            #X = pd.read_csv(f)
            X = cudf.read_csv(f)
    else:
        print("no data")
    return X

In [5]:
df = load_data()
print('data',df.shape)

use ipums data
data (10000, 459)


The Query

In [6]:
df.head()

<cudf.DataFrame ncols=459 nrows=5 >

In [7]:
#columns to keep= INCEARN,RECTYPE,IPUMS_YEAR,DATANUM,SERIAL,NUMPREC,SUBSAMP,HHWT,HHTYPE,REPWT,ADJUST,CPI99,REGION,STATEICP,STATEFIP,COUNTY,COUNTYFIPS,METRO,METAREA,METAREAD,MET2013,MET2013ERR,CITY,CITYERR,CITYPOP,PUMA,PUMARES2MIG,STRATA,PUMASUPR,CONSPUMA,CPUMA0010,APPAL,APPALD,HOMELAND,MET2003,CNTRY,GQ,GQTYPE,GQTYPED,FARM,OWNERSHP,OWNERSHPD,MORTGAGE,MORTGAG2,COMMUSE,FARMPROD,ACREHOUS,MORTAMT1,MORTAMT2,TAXINCL,INSINCL,PROPINSR,PROPTX99,OWNCOST,RENT,RENTGRS,RENTMEAL,CONDOFEE,MOBLHOME,MOBLHOM2,MOBLOAN,SECRES,SECRESMO,SECRESRE,COSTELEC,COSTGAS,COSTWATR,COSTFUEL,PUBHOUS,RENTSUB,HEATSUB,LUNCHSUB,FOODSTMP,FDSTPAMT,VALUEH,LINGISOL,VACANCY,KITCHEN,KITCHENORIG,FRIDGE,FRIDGEORIG,SINK,STOVE,ROOMS,ROOMSORIG,PLUMBING,HOTWATER,SHOWER,TOILET,BUILTYR,BUILTYR2,UNITSSTR,BEDROOMS,BEDROOMSORIG,PHONE,PHONEORIG,CILAPTOP,CIHAND,CIOTHCOMP,CINETHH,CIMODEM,CISAT,CIDSL,CIFIBER,CIBRDBND,CIDIAL,CIOTHSVC,FUELHEAT,VEHICLES,SSMC,NFAMS,NSUBFAM,NCOUPLES,NMOTHERS,NFATHERS,MULTGEN,MULTGEND,CBNSUBFAM,REPWT1,REPWT2,REPWT3,REPWT4,REPWT5,REPWT6,REPWT7,REPWT8,REPWT9,REPWT10,REPWT11,REPWT12,REPWT13,REPWT14,REPWT15,REPWT16,REPWT17,REPWT18,REPWT19,REPWT20,REPWT21,REPWT22,REPWT23,REPWT24,REPWT25,REPWT26,REPWT27,REPWT28,REPWT29,REPWT30,REPWT31,REPWT32,REPWT33,REPWT34,REPWT35,REPWT36,REPWT37,REPWT38,REPWT39,REPWT40,REPWT41,REPWT42,REPWT43,REPWT44,REPWT45,REPWT46,REPWT47,REPWT48,REPWT49,REPWT50,REPWT51,REPWT52,REPWT53,REPWT54,REPWT55,REPWT56,REPWT57,REPWT58,REPWT59,REPWT60,REPWT61,REPWT62,REPWT63,REPWT64,REPWT65,REPWT66,REPWT67,REPWT68,REPWT69,REPWT70,REPWT71,REPWT72,REPWT73,REPWT74,REPWT75,REPWT76,REPWT77,REPWT78,REPWT79,REPWT80,RESPMODE,PERNUM,PERWT,SLWT,REPWTP,FAMSIZE,NCHILD,NCHLT5,FAMUNIT,ELDCH,YNGCH,NSIBS,MOMLOC,STEPMOM,MOMRULE,POPLOC,STEPPOP,POPRULE,SPLOC,SPRULE,SUBFAM,SFTYPE,SFRELATE,CBSUBFAM,CBSFTYPE,CBSFRELATE,RELATE,RELATED,SEX,AGE,AGEORIG,BIRTHQTR,MARST,BIRTHYR,MARRNO,MARRINYR,YRMARR,DIVINYR,WIDINYR,FERTYR,RACE,RACED,HISPAN,HISPAND,BPL,BPLD,ANCESTR1,ANCESTR1D,ANCESTR2,ANCESTR2D,CITIZEN,YRNATUR,YRIMMIG,YRSUSA1,YRSUSA2,SPOKEN_LANGUAGE,LANGUAGED,SPEAKENG,TRIBE,TRIBED,RACESING,RACESINGD,RACAMIND,RACASIAN,RACBLK,RACPACIS,RACWHT,RACOTHER,RACNUM,SCHOOL,EDUC,EDUCD,GRADEATT,GRADEATTD,SCHLTYPE,DEGFIELD,DEGFIELDD,DEGFIELD2,DEGFIELD2D,EMPSTAT,EMPSTATD,LABFORCE,OCC,OCC1950,OCC1990,OCC2010,IND,IND1950,IND1990,CLASSWKR,CLASSWKRD,OCCSOC,INDNAICS,WKSWORK1,WKSWORK2,UHRSWORK,WRKLSTWK,ABSENT,LOOKING,AVAILBLE,WRKRECAL,WORKEDYR,POVERTY,OCCSCORE,SEI,HWSEI,PRESGL,PRENT,ERSCOR50,ERSCOR90,EDSCOR50,EDSCOR90,NPBOSS50,NPBOSS90,MIGRATE1,MIGRATE1D,MIGPLAC1,MIGMET1,MIGTYPE1,MIGCITY1,MIGPUMS1,MIGPUMA1,MOVEDIN,MOVEDINORIG,DISABWRK,VETDISAB,DIFFREM,DIFFPHYS,DIFFMOB,DIFFCARE,DIFFSENS,DIFFEYE,DIFFHEAR,VETSTAT,VETSTATD,VET01LTR,VET95X00,VET90X01,VET90X95,VET75X90,VET80X90,VET75X80,VETVIETN,VET55X64,VETKOREA,VET47X50,VETWWII,VETOTHER,VETYRS,PWSTATE2,PWMETRO,PWCITY,PWTYPE,PWPUMA00,PWPUMAS,TRANWORK,CARPOOL,RIDERS,TRANTIME,DEPARTS,ARRIVES,GCHOUSE,GCMONTHS,GCRESPON,PROBAI,PROBAPI,PROBBLK,PROBOTH,PROBWHT,REPWTP1,REPWTP2,REPWTP3,REPWTP4,REPWTP5,REPWTP6,REPWTP7,REPWTP8,REPWTP9,REPWTP10,REPWTP11,REPWTP12,REPWTP13,REPWTP14,REPWTP15,REPWTP16,REPWTP17,REPWTP18,REPWTP19,REPWTP20,REPWTP21,REPWTP22,REPWTP23,REPWTP24,REPWTP25,REPWTP26,REPWTP27,REPWTP28,REPWTP29,REPWTP30,REPWTP31,REPWTP32,REPWTP33,REPWTP34,REPWTP35,REPWTP36,REPWTP37,REPWTP38,REPWTP39,REPWTP40,REPWTP41,REPWTP42,REPWTP43,REPWTP44,REPWTP45,REPWTP46,REPWTP47,REPWTP48,REPWTP49,REPWTP50,REPWTP51,REPWTP52,REPWTP53,REPWTP54,REPWTP55,REPWTP56,REPWTP57,REPWTP58,REPWTP59,REPWTP60,REPWTP61,REPWTP62,REPWTP63,REPWTP64,REPWTP65,REPWTP66,REPWTP67,REPWTP68,REPWTP69,REPWTP70,REPWTP71,REPWTP72,REPWTP73,REPWTP74,REPWTP75,REPWTP76,REPWTP77,REPWTP78,REPWTP79,REPWTP80


In [8]:
dcols = [ #clumns to drop
 'CLUSTER',
 'HHINCOME',
 'INCTOT',
 'FTOTINC',
 'INCWAGE',
 'INCBUS00',
 'INCSS',
 'INCWELFR',
 'INCINVST',
 'INCRETIR',
 'INCSUPP',
 'INCOTHER']
ddf= df.drop(dcols)

In [9]:
print('data',ddf.shape)

data (10000, 447)


In [10]:
ddf.INCEARN.unique()

<cudf.Series nrows=664 >

In [11]:
print(ddf.INCEARN.head(5))

0     4000
1    36700
2    54000
3      900
4     2000
Name: INCEARN, dtype: int64


In [12]:
#query = "SELECT {} FROM ipums_easy WHERE INCEARN > 100;".format(columns)
# ensure sql query is deterministic
#query = "SELECT {} FROM ipums_easy WHERE INCEARN > 100 order by SERIAL;".format(columns)
#print('Query is : ' + query)

# always use True for is columnar
#(df.query('datetimes==@search_date'))
ddf = ddf.fillna(-2048)
#imax = 100 
#idf = ddf.query(ddf.INCEARN==imax)
expr = "INCEARN > 100"
idf = ddf.query(expr)
print(idf)

    RECTYPE  YEAR  DATANUM  SERIAL  NUMPREC  SUBSAMP  HHWT ...  REPWTP80
0        0  2000        3       1        2       26   600 ...     -2048
1        0  2000        3       1        2       26   600 ...     -2048
2        0  2000        3       2        4       76   600 ...     -2048
3        0  2000        3       2        4       76   600 ...     -2048
4        0  2000        3       2        4       76   600 ...     -2048
6        0  2000        3       3        1        2   600 ...     -2048
7        0  2000        3       4        1       92   600 ...     -2048
8        0  2000        3       5        4       81   600 ...     -2048
9        0  2000        3       5        4       81   600 ...     -2048
11        0  2000        3       5        4       81   600 ...     -2048
[5558 more rows]
[439 more columns]


`df` is a PyGDF GPU Dataframe

In [13]:
type(idf)

cudf.dataframe.dataframe.DataFrame

In [14]:
idf.head().to_pandas()

Unnamed: 0,RECTYPE,YEAR,DATANUM,SERIAL,NUMPREC,SUBSAMP,HHWT,HHTYPE,REPWT,ADJUST,...,REPWTP71,REPWTP72,REPWTP73,REPWTP74,REPWTP75,REPWTP76,REPWTP77,REPWTP78,REPWTP79,REPWTP80
0,0,2000,3,1,2,26,600,3,-2048,1.018516,...,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048
1,0,2000,3,1,2,26,600,3,-2048,1.018516,...,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048
2,0,2000,3,2,4,76,600,1,-2048,1.018516,...,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048
3,0,2000,3,2,4,76,600,1,-2048,1.018516,...,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048
4,0,2000,3,2,4,76,600,1,-2048,1.018516,...,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048,-2048


In [15]:
import numpy as np

In [16]:
idf['INCEARN'].mean()

32876.621408045976

Preprocess the data

In [17]:
num_cols = set()
cat_cols = set()
response_set = set(['INCEARN'])
feature_names = set(idf.columns) - response_set

Determine categorical and numeric columns.  Compute unique values from categorical columns.

In [18]:
uniques = {}
for k in feature_names:
    try:
        uniquevals = idf[k].unique_k(k=1000)
        uniques[k] = uniquevals
    except ValueError:
        # more than 1000 unique values
        num_cols.add(k)
    else:
        # within 1000 unique values
        nunique = len(uniquevals)
        if nunique < 2:
            del idf[k]        # drop constant column
        elif 1 < nunique < 1000:
            cat_cols.add(k)  # as cat column
        else:
            num_cols.add(k)  # as num column

Fix numeric columns.  Fill NA, Scale to [0, 1]. Drop near constant

In [19]:
for k in (num_cols - response_set):
    idf[k] = idf[k].fillna(idf[k].mean())
    assert idf[k].null_count == 0
    std = idf[k].std()
    # drop near constant columns
    if not np.isfinite(std) or std < 1e-4:
        del idf[k]
        print('drop near constant', k)
    else:
        idf[k] = idf[k].scale()

Expand categorical columns

In [20]:
for k in cat_cols:
    cats = uniques[k][1:]  # drop first
    idf = idf.one_hot_encoding(k, prefix=k, cats=cats)
    del idf[k]

Ensure INCEARN is float64

In [21]:
nrows=len(idf)
print(nrows)
idf['intercept'] = np.ones(nrows, dtype=np.float64)

5568


In [22]:
idf['INCEARN'] = idf['INCEARN'].astype(np.float64)

In [23]:
idf.dtypes

SERIAL         float64
OWNCOST        float64
PERWT          float64
SLWT           float64
INCEARN        float64
RENTGRS_15     float64
RENTGRS_60     float64
RENTGRS_85     float64
RENTGRS_120    float64
RENTGRS_125    float64
RENTGRS_135    float64
RENTGRS_140    float64
RENTGRS_160    float64
RENTGRS_165    float64
RENTGRS_170    float64
RENTGRS_181    float64
RENTGRS_200    float64
RENTGRS_205    float64
RENTGRS_210    float64
RENTGRS_215    float64
RENTGRS_220    float64
RENTGRS_225    float64
RENTGRS_230    float64
RENTGRS_245    float64
RENTGRS_250    float64
RENTGRS_255    float64
RENTGRS_260    float64
RENTGRS_265    float64
RENTGRS_286    float64
RENTGRS_288    float64
                ...   
OCC1950_810    float64
OCC1950_820    float64
OCC1950_910    float64
OCC1950_930    float64
OCC1950_950    float64
OCC1950_970    float64
EMPSTATD_12    float64
EMPSTATD_14    float64
EMPSTATD_20    float64
EMPSTATD_30    float64
FUELHEAT_2     float64
FUELHEAT_3     float64
FUELHEAT_4 

In [27]:
print(idf.shape)
print(idf.loc[:n60])

(5568, 8551)


ValueError: value not found

Create 60-40: training - testing

In [24]:
# Fraction for train (test is 1-FRACTION)
FRACTION=0.8
validFraction=1.0-FRACTION
n60 = int(len(idf) * FRACTION)
print('60% of {} is {}'.format(len(idf), n60))
train_df = idf.loc[:n60]
if FRACTION<1.0:
    test_df = idf.loc[n60:]
    print('train_df has {} rows | test_df has {} rows'.format(len(train_df), len(test_df)))
else:
    print('train_df has {} rows | test_df has {} rows'.format(len(train_df), 0))

60% of 5568 is 4454


ValueError: value not found

Turn the dataframes into a matrix

In [None]:
train_data_mat = train_df.as_gpu_matrix(columns=df.columns[1:])
train_result_mat = train_df.as_gpu_matrix(columns=[df.columns[0]])
if FRACTION<1.0:
    test_data_mat = test_df.as_gpu_matrix(columns=df.columns[1:])
    test_result_mat = test_df.as_gpu_matrix(columns=[df.columns[0]])

In [None]:
print(train_df['INCEARN'].mean())
if FRACTION<1.0:
    print(test_df['INCEARN'].mean())

In [None]:
print(train_data_mat.shape)
print(train_result_mat.shape)
if FRACTION<1.0:
    print(test_data_mat.shape)
    print(test_result_mat.shape)

The ctypes pointer to the gpu matrices

In [None]:
train_data_mat_ptr = train_data_mat.device_ctypes_pointer
train_result_mat_ptr = train_result_mat.device_ctypes_pointer
print('train_data_mat_ptr address', hex(train_data_mat_ptr.value))
print('train_result_mat_ptr address', hex(train_result_mat_ptr.value))
if FRACTION<1.0:
    test_data_mat_ptr = test_data_mat.device_ctypes_pointer
    test_result_mat_ptr = test_result_mat.device_ctypes_pointer
    print('test_data_mat_ptr address', hex(test_data_mat_ptr.value))
    print('test_result_mat_ptr address', hex(test_result_mat_ptr.value))

In [None]:
import os
os.getcwd()

In [None]:
def RunH2Oaiglm(arg):
    intercept,standardize, lambda_min_ratio, nFolds, nAlphas, nLambdas, nGPUs = arg
    
    
    ## Solve
    t0 = time.time()
    print("vars: %d %d %d %d %d %d %d" % (sourceDev, mTrain, n, mValid, intercept, standardize, precision))
    enet.fit(sourceDev, mTrain, n, mValid, intercept, standardize, precision, a, b, c, d, e)
    t1 = time.time()
    print("Done Solving")
    print("Time to train H2O AI GLM: %r" % (t1-t0))

In [None]:
lambda_min_ratio=1E-9
nFolds=5
nAlphas=8
nLambdas=100
nGPUs=maxNGPUS # choose all GPUs

# Run Model
arg = intercept,standardize, lambda_min_ratio, nFolds, nAlphas, nLambdas, nGPUs 
RunH2Oaiglm(arg)


In [None]:
test_data_mat

In [None]:
print(test_df.head(1))

In [None]:
from cuml.solvers import SGD as cumlSGD
learning_rate = 'adaptive'
datatype = np.float32
penalty = 'elasticnet'
loss = 'squared_loss'
cu_sgd = cumlSGD(learning_rate=learning_rate, eta0=0.07, epochs=500,
                 batch_size=512,
                 tol=0.0, penalty=penalty, loss=loss)
cu_sgd.fit(train_df, test_df)

### Import H2OAIGLM

In [None]:
# Load H2OAIGLM
import h2oaiglm as h2oaiglm
from ctypes import *
import time
if anim==1:
    import pandas as pd

a=c_void_p(train_data_mat_ptr.value)
b=c_void_p(train_result_mat_ptr.value)
if FRACTION<1.0:
    c=c_void_p(test_data_mat_ptr.value)
    d=c_void_p(test_result_mat_ptr.value)
else:
    c=c_void_p(0)
    d=c_void_p(0)

### Define some helper methods for plotting and running the algorithm

In [None]:
def new_alpha(row_fold):
    if row_fold == 0:
        return -0.025
    elif row_fold == 1:
        return -0.05
    elif row_fold == 3:
        return 0.025
    elif row_fold == 4:
        return 0.05
    else: return 0

def plot_cpu_perf(axis, cpu_labels, cpu_snapshot):
    axis.cla()
    axis.grid(False)
    axis.set_ylim([0,100])
    axis.set_ylabel('Percent', labelpad=2, fontsize = 14)
    axis.bar(cpu_labels, cpu_snapshot, color='dodgerblue')
    axis.set_title('CPU Utilization', fontsize = 16)
    
def plot_gpu_perf(axis, gpu_labels, gpu_snapshot):
    axis.cla()
    axis.grid(False)
    axis.set_ylim([0,100])
    axis.set_xticks(gpu_labels)
    axis.set_ylabel('Percent', labelpad=2, fontsize = 14)
    axis.bar(gpu_labels, gpu_snapshot, width =0.5, color = 'limegreen')
    axis.set_title('GPU Utilization', fontsize = 16)
    
def plot_glm_results(axis, results, best_rmse, cb):
    axis.cla()
    axis.set_xscale('log')
    axis.set_xlim([0.1, 1e9])
    axis.set_ylim([-0.12, 1.12])
    axis.set_yticks([x/7. for x in range(0,8)])
    axis.set_ylabel('Parameter 1:  '+r'$\alpha$', fontsize = 16)
    axis.set_xlabel('Parameter 2:  '+r'$\lambda$', fontsize = 16)
    num_models = min(4000,int(4000*results.shape[0]/2570))
    axis.set_title('Elastic Net Models Trained and Evaluated: ' + str(num_models), fontsize = 16)

    try:
        cm = ListedColormap(sns.color_palette("RdYlGn", 10).as_hex())
        cf = axis.scatter(results['lambda'], results['alpha_prime'], c=results['rel_acc'], 
                    cmap=cm, vmin=0, vmax=1)
        axis.plot(best_rmse['lambda'],best_rmse['alpha_prime'], 'o',
            ms=15, mec='k', mfc='none', mew=2)

        if not cb:
            cb = pl.colorbar(cf, ax=axis)
            cb.set_label('Relative  Validation  Accuracy', rotation=270, 
                         labelpad=18, fontsize = 16)   
        cb.update_normal(cf)
    except:
        #print("plot_glm_results exception -- no frame")
        pass

from py3nvml.py3nvml import * # Python Library that can query the state of your GPU
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import seaborn as sns
sns.set_style("whitegrid")
import psutil
import numpy as np
import pylab as pl
from IPython import display
import matplotlib.gridspec as gridspec
from matplotlib.colors import ListedColormap
import sys
import subprocess
maxNGPUS = int(subprocess.check_output("nvidia-smi -L | wc -l", shell=True))
print("Maximum Number of GPUS:", maxNGPUS)

nvmlInit()
deviceCount = nvmlDeviceGetCount()
for i in range(deviceCount):
    handle = nvmlDeviceGetHandleByIndex(i)
    #print("Device {}: {}".format(i, nvmlDeviceGetName(handle)))
    #print ("Driver Version:", nvmlSystemGetDriverVersion())

import os
def RunAnimation(arg):
    deviceCount = arg
    file = os.getcwd() + "/rmse.txt"
    fig = pl.figure(figsize = (9,9))
    pl.rcParams['xtick.labelsize'] = 14
    pl.rcParams['ytick.labelsize'] = 14
    gs = gridspec.GridSpec(3, 2, wspace=0.3, hspace=0.4)
    ax1 = pl.subplot(gs[0,-2])
    ax2 = pl.subplot(gs[0,1])
    ax3 = pl.subplot(gs[1:,:])
    fig.suptitle('H2O.ai Machine Learning $-$ Generalized Linear Modeling', size=18)

    pl.gcf().subplots_adjust(bottom=0.2)

    cb = False
    os.system("mkdir -p images")
    i=0
    while(True):
        try:
            #cpu
            snapshot = psutil.cpu_percent(percpu=True)
            cpu_labels = range(1,len(snapshot)+1)
            plot_cpu_perf(ax1, cpu_labels, snapshot)
    
            #gpu
            gpu_snapshot = []
            gpu_labels = list(range(1,deviceCount+1))
            for j in range(deviceCount):
                handle = nvmlDeviceGetHandleByIndex(j)
                util = nvmlDeviceGetUtilizationRates(handle)
                gpu_snapshot.append(util.gpu)
            gpu_snapshot = gpu_snapshot   
            plot_gpu_perf(ax2, gpu_labels, gpu_snapshot)
    
            res = pd.read_csv(file, sep="\s+",header=None,names=['time','pass','fold','a','i','alpha','lambda','trainrmse','ivalidrmse','validrmse'])
            
            res['rel_acc'] = ((38000- res['validrmse'])/(38000-28000))
            res['alpha_prime'] = res['alpha'] + res['fold'].apply(lambda x: new_alpha(x))

            best = res.ix[res['rel_acc']==np.max(res['rel_acc']),:]
            plot_glm_results(ax3, res, best.tail(1), cb)
            # flag for colorbar to avoid redrawing
            cb = True

            # Add footnotes
            footnote_text = "*U.S. Census dataset (predict Income): 45k rows, 10k cols\nParameters: 5-fold cross-validation, " + r'$\alpha = \{\frac{i}{7},i=0\ldots7\}$' + ", "\
   'full $\lambda$-' + "search"
            #pl.figtext(.05, -.04, footnote_text, fontsize = 14,)
            pl.annotate(footnote_text, (0,0), (-30, -50), fontsize = 12,
                        xycoords='axes fraction', textcoords='offset points', va='top')

            #update the graphics
            display.display(pl.gcf())
            display.clear_output(wait=True)
            time.sleep(0.01)

            #save the images
            saveimage=0
            if saveimage:
                file_name = './images/glm_run_%04d.png' % (i,)
                pl.savefig(file_name, dpi=200)
            i=i+1
        
        except KeyboardInterrupt:
            break
        #except:
        #    #print("Could not Create Frame")
        #    pass

### Choose Data Size and Order and How was processed for H2OAIGLM

In [None]:
intercept = 1 #
standardize = 0
n=train_data_mat.shape[1]
mTrain=train_data_mat.shape[0]
if FRACTION<1.0:
    mValid=test_data_mat.shape[0]
else:
    mValid=0
print("n=%d mTrain=%d mValid=%d" % (n,mTrain,mValid))
# Order of data
fortran = 1
print("fortran=%d" % (fortran))
result={df[k].dtype for k in df.columns}
print(result)
print(fortran)
if result.pop() == np.dtype('float64'):
    print("double precision")
    precision=1
else:
    print("single precision")
    precision=0

### Help function to use Mapd->pygdf data pointers in GPU on H2OAIGLM

In [None]:
def RunH2Oaiglm(arg):
    intercept,standardize, lambda_min_ratio, nFolds, nAlphas, nLambdas, nGPUs = arg
    
        # set solver cpu/gpu according to input args
    if((nGPUs>0) and (h2oaiglm.ElasticNetSolverGPU is None)):
        print("\nGPU solver unavailable, using CPU solver\n")
        nGPUs=0

    sharedA = 0
    sourceme = 0
    sourceDev = 0
    nThreads = 1 if(nGPUs==0) else nGPUs # not required number of threads, but normal.  Bit more optimal to use 2 threads for CPU, but 1 thread per GPU is optimal.

    #print("Setting up Solver")
    os.system("rm -f rmse.txt ; touch rmse.txt ; rm -f varimp.txt ; touch varimp.txt")
    Solver = h2oaiglm.ElasticNetSolverGPU if(nGPUs>0) else h2oaiglm.ElasticNetSolverCPU


    #  Solver = h2oaiglm.ElasticNetSolverCPU
    assert Solver != None, "Couldn't instantiate ElasticNetSolver"
    enet = Solver(sharedA, nThreads, nGPUs, 'c' if fortran else 'r', intercept, standardize, lambda_min_ratio, nLambdas, nFolds, nAlphas)

    # Not using weights
    e=c_void_p(0)

    print("Solving")
    ## Solve
    t0 = time.time()
    print("vars: %d %d %d %d %d %d %d" % (sourceDev, mTrain, n, mValid, intercept, standardize, precision))
    enet.fit(sourceDev, mTrain, n, mValid, intercept, standardize, precision, a, b, c, d, e)
    t1 = time.time()
    print("Done Solving")
    print("Time to train H2O AI GLM: %r" % (t1-t0))

### Train 4000 Elastic Net Models (5-fold cross-validation, 8 $\alpha$ values, 100 $\lambda$ values)

In [None]:
lambda_min_ratio=1E-9
nFolds=5
nAlphas=8
nLambdas=100
nGPUs=maxNGPUS # choose all GPUs


if run==1 and anim==0:
    # Run Model
    arg = intercept,standardize, lambda_min_ratio, nFolds, nAlphas, nLambdas, nGPUs 
    RunH2Oaiglm(arg)

if run==1 and anim==1:
    from threading import Thread

    # Run Model
    arg = intercept,standardize, lambda_min_ratio, nFolds, nAlphas, nLambdas, nGPUs 
    background_thread = Thread(target=RunH2Oaiglm, args=(arg,))
    background_thread.start()

![ScreenShot](gtc-2017-anaconda_v3_key.png)

In [None]:
if run==1 and anim==1:
    # Show Animation
    arg = nGPUs
    RunAnimation(arg)