<img src="https://developer.nvidia.com/sites/default/files/pictures/2018/rapids/rapids-logo.png"/>

[Rapids](https://rapids.ai) is an open-source GPU accelerated Data Sceince and Machine Learning library, developed and mainatained by [Nvidia](https://www.nvidia.com). It is designed to be compatible with many existing CPU tools, such as Pandas, scikit-learn, numpy, etc. It enables **massive** acceleration of many data-science and machine learning tasks, oftentimes by a factor fo 100X, or even more. If you are interested in installing and running Rapids locally on your own machine, then you should [refer to the followong instructions](https://rapids.ai/start.html).

In [None]:
import cudf
import cuml
import cupy as cp
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os
from scipy.interpolate import interp1d
import gc
from cuml.linear_model import Ridge
from cuml.neighbors import KNeighborsRegressor
from cuml.svm import SVR
from cuml.ensemble import RandomForestRegressor
from sklearn.model_selection import GroupKFold, KFold
from cuml.metrics import mean_squared_error

import soundfile as sf
# Librosa Libraries
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from sklearn.metrics import roc_auc_score, label_ranking_average_precision_score

In [None]:
train = cudf.read_csv("/kaggle/input/tabular-playground-series-jan-2021/train.csv")
test = cudf.read_csv("/kaggle/input/tabular-playground-series-jan-2021/test.csv")
sample_submission = cudf.read_csv('../input/tabular-playground-series-jan-2021/sample_submission.csv')

In [None]:
target = train['target'].values
columns = test.columns[1:]

In [None]:
rr_train_oof = cp.zeros((300000,))
rr_test_preds = 0
rr_train_oof.shape

In [None]:
NUM_FOLDS = 5
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

for f, (train_ind, val_ind) in tqdm(enumerate(kf.split(train, target))):
        #print(f'Fold {f}')
        train_df, val_df = train.iloc[train_ind][columns], train.iloc[val_ind][columns]
        train_target, val_target = target[train_ind], target[val_ind]
        
        model = Ridge(alpha=0.01)
        model.fit(train_df, train_target)
        temp_oof = model.predict(val_df)
        temp_test = model.predict(test[columns])

        rr_train_oof[val_ind] = temp_oof
        rr_test_preds += temp_test/NUM_FOLDS

In [None]:
mean_squared_error(rr_train_oof, target, squared=False)

In [None]:
rf_train_oof = cp.zeros((300000,))
rf_test_preds = 0
rf_train_oof.shape

In [None]:
NUM_FOLDS = 5
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

for f, (train_ind, val_ind) in tqdm(enumerate(kf.split(train, target))):
        #print(f'Fold {f}')
        train_df, val_df = cp.asnumpy(train.iloc[train_ind][columns].values), cp.asnumpy(train.iloc[val_ind][columns].values)
        train_target, val_target = cp.asnumpy(target[train_ind]), cp.asnumpy(target[val_ind])
        
        model = RandomForestRegressor(n_estimators=1000, max_depth=15)
        model.fit(np.float32(train_df), np.float32(train_target))
        temp_oof = model.predict(val_df)
        temp_test = model.predict(test[columns])

        rf_train_oof[val_ind] = temp_oof
        rf_test_preds += temp_test/NUM_FOLDS

In [None]:
mean_squared_error(rf_train_oof, target, squared=False)

In [None]:
mean_squared_error(0.9*rf_train_oof+0.1*rr_train_oof, target, squared=False)

In [None]:
knn_train_oof = cp.zeros((300000,))
knn_test_preds = 0
knn_train_oof.shape

In [None]:
NUM_FOLDS = 5
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

for f, (train_ind, val_ind) in tqdm(enumerate(kf.split(train, target))):
        #print(f'Fold {f}')
        train_df, val_df = train.iloc[train_ind][columns], train.iloc[val_ind][columns]
        train_target, val_target = target[train_ind], target[val_ind]
        
        model = KNeighborsRegressor(n_neighbors=150)
        model.fit(train_df, train_target)
        temp_oof = model.predict(val_df)
        temp_test = model.predict(test[columns])

        knn_train_oof[val_ind] = temp_oof
        knn_test_preds += temp_test/NUM_FOLDS

In [None]:
mean_squared_error(knn_train_oof, target, squared=False)

In [None]:
mean_squared_error(0.5*rf_train_oof+0.5*knn_train_oof, target, squared=False)

In [None]:
svr_train_oof = cp.zeros((300000,))
svr_test_preds = 0
svr_train_oof.shape

In [None]:
NUM_FOLDS = 5
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

for f, (train_ind, val_ind) in tqdm(enumerate(kf.split(train, target))):
        #print(f'Fold {f}')
        train_df, val_df = train.iloc[train_ind][columns], train.iloc[val_ind][columns]
        train_target, val_target = target[train_ind], target[val_ind]
        
        model = SVR(C=0.1)
        model.fit(train_df, train_target)
        temp_oof = model.predict(val_df)
        temp_test = model.predict(test[columns])

        svr_train_oof[val_ind] = temp_oof
        svr_test_preds += temp_test/NUM_FOLDS

In [None]:
mean_squared_error(svr_train_oof, target, squared=False)

In [None]:
mean_squared_error(0.35*rf_train_oof+0.34*knn_train_oof+0.31*svr_train_oof, target, squared=False)

In [None]:
sample_submission['target'] = 0.35*rf_test_preds+0.34*knn_test_preds+0.31*svr_test_preds
sample_submission.to_csv('submission.csv', index=False)