## SVR Stacking

### This kernel use the ouput files submission_{name}.csv or training_{name}.csv from https://www.kaggle.com/ngo1013/preparation-for-stacking-by-regression-8-model
### submission_{name}.csv ・・・ A submission file predicted using the {name} algorithm.
### training_{name}.csv ・・・ A training file predicted using the {name} algorithm.

### In the case, I use my dataset https://www.kaggle.com/ngo1013/brain-models

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
algorithm = ['lasso', 'elastic', 'ridge', 'lightgbm', 'xgboost', 'svr', 'linear', 'tweedie']
columns = ['age', 'domain1_var1', 'domain1_var2', 'domain2_var1', 'domain2_var2']

In [None]:
DATA_PATH = '../input/brain-models/'

df_pred = pd.read_csv(DATA_PATH + 'training_ridge.csv')['Id']

In [None]:
for a in algorithm:
    p = pd.read_csv(DATA_PATH + 'training_{}.csv'.format(a))
    for c in p.columns[1:]:
        p['{}_{}'.format(a,c[5:])] = p[c]
        p = p.drop(c, axis=1)
    df_pred = pd.merge(df_pred, p, on='Id')

In [None]:
df_sub = pd.read_csv(DATA_PATH + 'submission_ridge.csv')['Id']

for a in algorithm:
    p = pd.read_csv(DATA_PATH + 'submission_{}.csv'.format(a))
    for c in p.columns[1:]:
        p['{}_{}'.format(a,c)] = p[c]
        p = p.drop(c, axis=1)
    df_sub = pd.merge(df_sub, p, on='Id')

In [None]:
def extract_algo(df, col):
    for c in df.columns[1:]:
        if col not in c:
            df = df.drop(c, axis=1)
    
    for c in df.columns[1:]:
        s = c.split('_')[0]
        df[s] = df[c]
        df = df.drop(c, axis=1)
    return df

In [None]:
extract_algo(df_pred, 'age')

In [None]:
score_pd = pd.read_csv("/kaggle/input/trends-assessment-prediction/train_scores.csv")

In [None]:
score_pd

In [None]:
df = extract_algo(df_pred, 'domain1_var1')
df = df.merge(score_pd, on="Id", how="left")
test_df = extract_algo(df_sub, 'domain1_var1')
sub_df = test_df.copy()

In [None]:
params = {
    'max_depth': -1,
    'num_leaves': 3,
    'min_data_in_leaf': 4,
}


# Support Vector Regression (SVR) Stacking

In [None]:
from sklearn.model_selection import KFold

def metric(y_true, y_pred):
    return np.mean(np.sum(np.abs(y_true - y_pred), axis=0)/np.sum(y_true, axis=0))

In [None]:
from sklearn.svm import SVR

In [None]:
NUM_FOLDS = 5
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=0)

overal_score = 0
for target, c, w in [("age", 100, 0.3), ("domain1_var1", 10, 0.175), ("domain1_var2", 10, 0.175), ("domain2_var1", 10, 0.175), ("domain2_var2", 10, 0.175)]:    
    df = extract_algo(df_pred, target)
    df = df.merge(score_pd, on="Id", how="left")
    test_df = extract_algo(df_sub, target)

    y_oof = np.zeros(df.shape[0])
    y_test = np.zeros((test_df.shape[0], NUM_FOLDS))
    
    for f, (train_ind, val_ind) in enumerate(kf.split(df, df)):
        train_df, val_df = df.iloc[train_ind], df.iloc[val_ind] # train, val split
        train_df = train_df[train_df[target].notnull()] # null排除

        model = SVR(kernel='rbf')
        model.fit(train_df[algorithm], train_df[target]) #dfからfeature, targetを指定してmodelをtrain

        y_oof[val_ind] = model.predict(val_df[algorithm]) #Out of fold, クロスバリデーションの外のデータを集める
        y_test[:, f] = model.predict(test_df[algorithm])
                
    df["pred_{}".format(target)] = y_oof
    sub_df[target] = y_test.mean(axis=1)
    score = metric(df[df[target].notnull()][target].values, df[df[target].notnull()]["pred_{}".format(target)].values)
    overal_score += w*score
    print(target, np.round(score, 4))
    print()
    
print("Overal score:", np.round(overal_score, 4))

In [None]:
sub_df

In [None]:
sub_df = pd.melt(sub_df[["Id", "age", "domain1_var1", "domain1_var2", "domain2_var1", "domain2_var2"]], id_vars=["Id"], value_name="Predicted")
sub_df["Id"] = sub_df["Id"].astype("str") + "_" +  sub_df["variable"].astype("str")

sub_df = sub_df.drop("variable", axis=1).sort_values("Id")

In [None]:
sub_df.to_csv("submission.csv", index=False)