In [1]:
if (!process.cwd().includes('modules/cudf/notebooks')) {
    process.chdir('modules/cudf/notebooks');
}

In [2]:
var cudf = require('@rapidsai/cudf');

var regexps = [
  /Cloud|Overcast/,
  /Rain|T-Storm|Thunderstorm|Squalls|Drizzle/,
  /Snow/,
  /Fog/,
  /Ice|Hail|Freezing|Sleet/,
  /Dust|Smoke|Sand/,
];

In [3]:
function readUSAccidentsCSV() {
    const boolDtype    = new cudf.Bool8;
    const int32Dtype   = new cudf.Int32;
    const strDtype     = new cudf.Utf8String;
    const float64Dtype = new cudf.Float64;
    return cudf.DataFrame.readCSV({
      header: 0,
      sourceType: 'files',
      sources: [`data/US_Accidents_Dec20.csv`],
      dataTypes: {
        id: strDtype,
        source: strDtype,
        tmc: float64Dtype,
        severity: int32Dtype,
        start_time: strDtype, end_time: strDtype,
        start_lat: float64Dtype, start_lng: float64Dtype, end_lat: float64Dtype, end_lng: float64Dtype, distance: float64Dtype,
        description: strDtype,
        number: int32Dtype,
        street: strDtype, side: strDtype, city: strDtype, county: strDtype, state: strDtype, zipcode: strDtype, country: strDtype, timezone: strDtype, airport_code: strDtype, weather_timestamp: strDtype,
        temperature: float64Dtype, wind_chill: float64Dtype, humidity: float64Dtype, pressure: float64Dtype, visibility: float64Dtype, wind_direction: strDtype, wind_speed: float64Dtype, precipitation: float64Dtype,
        weather_condition: strDtype,
        amenity: boolDtype, bump: boolDtype, crossing: boolDtype, give_way: boolDtype, junction: boolDtype, no_exit: boolDtype, railway: boolDtype, roundabout: boolDtype, station: boolDtype, stop: boolDtype, traffic_calming: boolDtype, traffic_signal: boolDtype, turning_loop: boolDtype,
        sunrise_sunset: strDtype, civil_twilight: strDtype, nautical_twighlight: strDtype, astronomical_twighlight: strDtype
      },
    })
}

In [7]:
var weather_condition_gpu = readUSAccidentsCSV().get('weather_condition');

console.time(`GPU time`);

regexps.forEach((regexp) => {
  console.time(`${regexp.source} time`);
  const matches = weather_condition_gpu.containsRe(regexp.source).sum();
  console.timeEnd(`${regexp.source} time`);
  console.log(`${regexp.source} matches: ${matches.toLocaleString()}\n`);
});

console.timeEnd(`GPU time`);

Cloud|Overcast time: 27.504ms
Cloud|Overcast matches: 1,896,354

Rain|T-Storm|Thunderstorm|Squalls|Drizzle time: 64.973ms
Rain|T-Storm|Thunderstorm|Squalls|Drizzle matches: 326,441

Snow time: 6.591ms
Snow matches: 68,101

Fog time: 6.441ms
Fog matches: 52,063

Ice|Hail|Freezing|Sleet time: 36.736ms
Ice|Hail|Freezing|Sleet matches: 4,698

Dust|Smoke|Sand time: 27.006ms
Dust|Smoke|Sand matches: 8,846

GPU time: 171.13ms


In [8]:
var weather_condition_cpu = (() => {
  const categorical = weather_condition_gpu.cast(new cudf.Categorical(new cudf.Utf8String));
  const categories = [...categorical.categories];
  const codes = [...categorical.codes];
  return codes.map((i) => categories[i]);
})();

console.time(`CPU time`);

regexps.forEach((regexp) => {
  console.time(`${regexp.source} time`);
  const matches = weather_condition_cpu.reduce((matches, weather_condition) => {
    return matches + (regexp.exec(weather_condition) || []).length;
  }, 0);
  console.timeEnd(`${regexp.source} time`);
  console.log(`${regexp.source} matches: ${matches.toLocaleString()}\n`);
});

console.timeEnd(`CPU time`);

Cloud|Overcast time: 231.976ms
Cloud|Overcast matches: 1,896,354

Rain|T-Storm|Thunderstorm|Squalls|Drizzle time: 202.052ms
Rain|T-Storm|Thunderstorm|Squalls|Drizzle matches: 326,441

Snow time: 207.602ms
Snow matches: 68,101

Fog time: 216.786ms
Fog matches: 52,063

Ice|Hail|Freezing|Sleet time: 205.503ms
Ice|Hail|Freezing|Sleet matches: 4,698

Dust|Smoke|Sand time: 200.62ms
Dust|Smoke|Sand matches: 8,846

CPU time: 1.267s
