- only 3 ensembles
- days from camapign start

In [1]:
import os
import warnings
import numpy as np
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
from datetime import timedelta
from sklearn import preprocessing
from tqdm.notebook import tqdm_notebook
from sklearn.metrics import average_precision_score
from sklearn.model_selection import StratifiedKFold
from imblearn.under_sampling import RandomUnderSampler

warnings.filterwarnings('ignore')
tqdm_notebook.pandas(desc="progress: ")

In [2]:
DIR = "../input/20201115at/"
train = pd.read_csv(DIR+"train.csv")
test = pd.read_csv(DIR+"test.csv")
campaign = pd.read_csv(DIR+"campaign.csv")

#advertiser_video = pd.read_csv(DIR+"advertiser_video.csv")
#map_game_feed_native_video_assets = pd.read_csv(DIR+'map_game_feed_native_video_assets.csv')
#advertiser_converted_video = pd.read_csv(DIR+"advertiser_converted_video.csv")

In [3]:
#cols = ["mst_advertiser_video_id", "mst_game_feed_id", "mst_video_template_id"]
#advertiser_converted_video = advertiser_converted_video[~advertiser_converted_video[cols].duplicated(keep="last")]

# preprocess

In [4]:
agg_df = train.groupby(['advertiser_id'])['target'].sum()
zero_df = agg_df[agg_df == 0]
zero_adver = list(set(zero_df.index) - set(test.advertiser_id.unique()))
train = train[~train.advertiser_id.isin(zero_adver)].reset_index(drop=True)

In [5]:
# with campaign
train = train.merge(campaign, left_on="campaign_id", right_on = "id", how="left").drop(["mst_advertiser_id", "id"], axis=1)
test = test.merge(campaign, left_on="campaign_id", right_on = "id", how="left").drop(["mst_advertiser_id", "id"], axis=1)

In [6]:
train["imp_at"] = pd.to_datetime(train["imp_at"])
test["imp_at"] = pd.to_datetime(test["imp_at"])
train["date"] = train["imp_at"].dt.date
test["date"] = test["imp_at"].dt.date
train["hour"] = train["imp_at"].dt.hour
test["hour"] = test["imp_at"].dt.hour

In [7]:
train["click_or_view_at"] = pd.to_datetime(train["click_or_view_at"])
train["cv_at"] = pd.to_datetime(train["cv_at"])

In [8]:
# drop because distribution is different
train.drop(['os_version'], axis=1, inplace=True)
test.drop(['os_version'], axis=1, inplace=True)

# noise features?
train.drop(['user_type_id', "category_id",  "video_template_id", "country_code", "game_feed_asset_type_id", "header_bidding"], axis=1, inplace=True)
test.drop(['user_type_id', "category_id",  "video_template_id", "country_code", "game_feed_asset_type_id", "header_bidding"], axis=1, inplace=True)

In [9]:
app_ranking = pd.concat([train, test]).groupby("app_id")["uid"].nunique().reset_index(drop=False)
app_ranking.columns = ["app_id", "app_users"]
train = train.merge(app_ranking, on="app_id", how="left")
test = test.merge(app_ranking, on="app_id", how="left")

media_app_rank = pd.concat([train, test]).groupby(["game_feed_id", "campaign_id"])["media_app_id"].nunique().reset_index(drop=False)
media_app_rank.columns = ["game_feed_id", "campaign_id", "madia_app_num"]
train = train.merge(media_app_rank, on=["game_feed_id", "campaign_id"], how="left")
test = test.merge(media_app_rank, on=["game_feed_id", "campaign_id"], how="left")

campaign_start_date = pd.concat([train, test]).groupby("campaign_id").first().reset_index(drop=False)[["campaign_id", "date"]]
campaign_start_date.columns = ["campaign_id", "first_date"]
train = train.merge(campaign_start_date, on=["campaign_id"], how="left")
test = test.merge(campaign_start_date, on=["campaign_id"], how="left")

In [10]:
train["days_from_start"] = (train["date"] - train["first_date"]).apply(lambda x: x.days)
test["days_from_start"] = (test["date"] - test["first_date"]).apply(lambda x: x.days)

In [11]:
game_feed_target = train.groupby("game_feed_id")["target"].mean().reset_index(drop=False)
game_feed_counts = train["game_feed_id"].value_counts().reset_index(drop=False)
game_feed_counts.columns = ["game_feed_id", "game_feed_counts"]
game_feed_target = game_feed_target.merge(game_feed_counts, on ="game_feed_id", how="left")
popular_game_feed = list(game_feed_target[(game_feed_target.target > 0.1) & (game_feed_target.game_feed_counts > 100)]["game_feed_id"])
print(len(popular_game_feed))

train["popular_game"] = train["game_feed_id"].apply(lambda x: 1 if x in popular_game_feed else 0)
test["popular_game"] = test["game_feed_id"].apply(lambda x: 1 if x in popular_game_feed else 0)

camp_target = train.groupby("campaign_id")["target"].mean().reset_index(drop=False)
camp_counts = train["campaign_id"].value_counts().reset_index(drop=False)
camp_counts.columns = ["campaign_id", "campaign_counts"]
camp_target = camp_target.merge(camp_counts, on ="campaign_id", how="left")
popular_campaign = list(camp_target[(camp_target.target > 0.1) & (camp_target.campaign_counts > 100)]["campaign_id"])

train["popular_campaign"] = train["campaign_id"].apply(lambda x: 1 if x in popular_campaign else 0)
test["popular_campaign"] = test["campaign_id"].apply(lambda x: 1 if x in popular_campaign else 0)

154


In [12]:
object_feats = [i for i in train.columns if "object" == str(train[i].dtype) 
                and i != "request_id" and i != "uid" and i != "date" and i != "hour" and i !="first_date"]

def get_non_overlapping(train: pd.DataFrame, test: pd.DataFrame, column: str):
    only_in_train = set(train[column].unique()) - set(test[column].unique())
    only_in_test = set(test[column].unique()) - set(train[column].unique())
    non_overlapping = only_in_train.union(only_in_test)
    return non_overlapping

def category2num(train: pd.DataFrame, test: pd.DataFrame, columns: list):
    train_ = train.copy()
    test_ = test.copy()
    for column in columns:
        non_overlapping = get_non_overlapping(train, test, column)
        try:
            if train[column].dtype == np.dtype("O"):
                # dtypeがobjectなら欠損は'missing' クラスにする
                train_[column] = train[column].fillna("missing")
                test_[column] = test[column].fillna("missing")
                train_[column] = train_[column].map(lambda x: x if x not in non_overlapping else "other")
                test_[column] = test_[column].map(lambda x: x if x not in non_overlapping else "other")
            else:
                # dtypeがint/floatなら欠損は'-1'とする
                train_[column] = train[column].fillna(-1)
                test_[column] = test[column].fillna(-1)
                train_[column] = train_[column].map(lambda x: x if x not in non_overlapping else -2)
                test_[column] = test_[column].map(lambda x: x if x not in non_overlapping else -2)

            le = preprocessing.LabelEncoder()
            concatenated = pd.concat([train_, test_], axis=0).reset_index(drop=True)
            le.fit(concatenated[column])
            train_[column] = le.transform(train_[column])
            test_[column] = le.transform(test_[column])
        except Exception:
            import pdb
            pdb.set_trace()
    return train_, test_

train, test = category2num(train, test, object_feats)

In [13]:
# drop because the exist only in train
train.drop(['click_or_view_at', 'cv_at','last_cv_interval', 'last_paid_interval'], axis=1, inplace=True)

# feature engineering

In [14]:
train["type"] = "train"
test["type"] = "test"
all_df = pd.concat([train, test])
all_df = all_df.sort_values(["uid", "imp_at"]).reset_index(drop=True)

In [15]:
all_df["count"] = 1
all_df["count"] = all_df.groupby("uid")["count"].cumsum()

In [16]:
def _create_whole_imp_at_feature(whole_df, var) -> pd.DataFrame:

    imp_at = pd.to_datetime(whole_df['imp_at']) + timedelta(hours=9) # utc -> asia/tokyo
    out_df = pd.DataFrame()
    out_df['hours'] = imp_at.dt.hour
    out_df['dayofweek'] = imp_at.dt.dayofweek
    out_df['hour_zone'] = pd.cut(out_df['hours'].values, bins=[-np.inf, 6, 12, 18, np.inf]).codes

    def _create_pivot(input_df, c, column='dayofweek', values='hours'):
        _df = pd.pivot_table(data=input_df, index=[c], columns=[column], values=values, aggfunc='count')
        _df = _df.fillna(0)
        _df.columns = [column + '=' + str(x) for x in _df.columns]

        # index ごとに正規化して割合にする
        _df = _df.div(_df.sum(axis=1), axis=0)
        return pd.merge(input_df[c], _df, on=c, how='left').drop(columns=[c]).add_prefix(c + '_')

    # uid での集約
    for c in [var]:
        _df = out_df.copy()
        _df[c] = whole_df[c]
        _feat = pd.concat([
            _create_pivot(_df, c),
            _create_pivot(_df, c, column='hour_zone', values='dayofweek')  
        ], axis=1)
        out_df = pd.concat([out_df, _feat], axis=1)

    out_df['request_id'] = whole_df['request_id']
    return out_df

user_imp_df = _create_whole_imp_at_feature(all_df, "uid")
user_imp_df.drop(["hours","dayofweek","hour_zone"], axis=1, inplace=True)
all_df = all_df.merge(user_imp_df, on="request_id", how="left")

In [17]:
camp_num = all_df.groupby("date")["campaign_id"].nunique().reset_index(drop=False)
camp_num.columns=["date", "camp_num"]
all_df = all_df.merge(camp_num, on="date", how="left")

user_num = all_df.groupby("date")["uid"].nunique().reset_index(drop=False)
user_num.columns=["date", "user_num"]
all_df = all_df.merge(user_num, on="date", how="left")

all_df.drop(["date", "hour", "first_date"], axis=1, inplace=True)

In [18]:
train = all_df[all_df.type=="train"].drop("type", axis=1).reset_index(drop=True)
test = all_df[all_df.type=="test"].drop("type", axis=1).reset_index(drop=True)

In [19]:
train = train.sort_values("imp_at").reset_index(drop=True)
test = test.sort_values("imp_at").reset_index(drop=True)

In [20]:
ce_feats = ["uid"]
for c in ce_feats:
    freq = pd.concat([train[c], test[c]]).value_counts()
    train[c] = train[c].map(freq)
    test[c] = test[c].map(freq)

# separation for validation

In [21]:
train.drop(['request_id'], axis=1, inplace=True)
test.drop(['request_id'], axis=1, inplace=True)

In [22]:
mask = train.imp_at > "2020-05-21"
valid_df = train.loc[mask].copy()
train_df = train.loc[~mask].copy()

In [23]:
train = train.drop(["imp_at"], axis=1) 
test = test.drop(["imp_at"], axis=1) 

valid_df = valid_df.drop(["imp_at"], axis=1)
train_df = train_df.drop(["imp_at"], axis=1)

In [24]:
test = test.drop("target", axis=1)

In [25]:
train.shape, test.shape 

((1856405, 43), (390095, 42))

In [26]:
train.columns

Index(['target', 'adnw_id', 'adspot_id', 'adspot_video_format_id',
       'advertiser_id', 'app_id', 'auction_type_id', 'campaign_id',
       'first_login_interval', 'frequency', 'from_click', 'game_feed_id',
       'game_template_id', 'is_interstitial', 'item_id', 'last_login_interval',
       'login_frequency', 'max_login_interval', 'media_app_id', 'os', 'pos',
       'uid', 'mst_advertiser_order_id', 'mst_user_type_id', 'app_users',
       'madia_app_num', 'days_from_start', 'popular_game', 'popular_campaign',
       'count', 'uid_dayofweek=0', 'uid_dayofweek=1', 'uid_dayofweek=2',
       'uid_dayofweek=3', 'uid_dayofweek=4', 'uid_dayofweek=5',
       'uid_dayofweek=6', 'uid_hour_zone=0', 'uid_hour_zone=1',
       'uid_hour_zone=2', 'uid_hour_zone=3', 'camp_num', 'user_num'],
      dtype='object')

In [27]:
object_feats = [i for i in object_feats if i in train.columns]
object_feats

['advertiser_id',
 'app_id',
 'campaign_id',
 'game_feed_id',
 'game_template_id',
 'media_app_id',
 'mst_advertiser_order_id']

# stratified lgb

In [28]:
# stratified k fold
n_folds = 5
def pr_auc_metric(y_predicted, y_true):
    return 'pr_auc', average_precision_score(y_true.get_label(), y_predicted), True

def modelling(seed):
    print("seed:", seed)
    lgbm_params = {'objective': 'binary', 'metric': 'None', 'boosting_type': 'gbdt', 
               'tree_learner': 'serial', 'learning_rate': 0.1, "num_leaves": 10, 'random_seed':seed,'max_depth': 5}
    
    y = train.target.copy()
    X = train.drop(['target'],axis=1).copy()
    
    skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)
    
    valid = np.zeros([X.shape[0]])
    features_list = [i for i in X.columns]
    pred_value = np.zeros(test.shape[0])
    feature_importance_df = pd.DataFrame(features_list, columns=["Feature"])
    
    for i , (train_index, val_index) in enumerate(skf.split(X, y)):
        print("fold: ", i)
        
        X_train = X.iloc[train_index,:]
        y_train = y.iloc[train_index]
        
        X_valid = X.iloc[val_index,:]
        y_valid = y.iloc[val_index]
        
        lgb_train = lgb.Dataset(X_train, y_train)
        lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train)
        
        model = lgb.train(lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval],
           num_boost_round=10000,early_stopping_rounds=20,verbose_eval = 30,
                          feval=pr_auc_metric, categorical_feature = sorted(object_feats))
        
        valid_predict = model.predict(X_valid, num_iteration = model.best_iteration)
        valid[val_index] = valid_predict
        pred_value += model.predict(test, num_iteration = model.best_iteration) / n_folds
        feature_importance_df["Fold_"+str(i+1)] = model.feature_importance()
    
    feature_importance_df["Average"] = np.mean(feature_importance_df.iloc[:,1:n_folds+1], axis=1)
    feature_importance_df["Std"] = np.std(feature_importance_df.iloc[:,1:n_folds+1], axis=1)
    feature_importance_df["Cv"] = feature_importance_df["Std"] / feature_importance_df["Average"]
    
    return pred_value, valid, feature_importance_df

validation = np.zeros([train.shape[0]])
pred_value = np.zeros([test.shape[0]])
seeds = [0]
for seed_ in seeds:
    test_pred, oof_predict, feature_importance_df = modelling(seed_)
    validation += oof_predict / len(seeds)
    pred_value += test_pred / len(seeds)

score = average_precision_score(train.target, validation)
print(score)

seed: 0
fold:  0
Training until validation scores don't improve for 20 rounds
[30]	training's pr_auc: 0.27296	valid_1's pr_auc: 0.264796
[60]	training's pr_auc: 0.295868	valid_1's pr_auc: 0.27967
[90]	training's pr_auc: 0.309346	valid_1's pr_auc: 0.286163
[120]	training's pr_auc: 0.319423	valid_1's pr_auc: 0.290065
[150]	training's pr_auc: 0.325938	valid_1's pr_auc: 0.29116
[180]	training's pr_auc: 0.33448	valid_1's pr_auc: 0.294057
[210]	training's pr_auc: 0.340371	valid_1's pr_auc: 0.294256
[240]	training's pr_auc: 0.346654	valid_1's pr_auc: 0.295449
Early stopping, best iteration is:
[245]	training's pr_auc: 0.347955	valid_1's pr_auc: 0.295729
fold:  1
Training until validation scores don't improve for 20 rounds
[30]	training's pr_auc: 0.273677	valid_1's pr_auc: 0.269212
[60]	training's pr_auc: 0.296034	valid_1's pr_auc: 0.285186
[90]	training's pr_auc: 0.310436	valid_1's pr_auc: 0.292183
[120]	training's pr_auc: 0.319106	valid_1's pr_auc: 0.294492
[150]	training's pr_auc: 0.32748	v

In [29]:
feature_importance_df.sort_values("Average", ascending=False)

Unnamed: 0,Feature,Fold_1,Fold_2,Fold_3,Fold_4,Fold_5,Average,Std,Cv
4,app_id,541,391,514,781,379,521.2,144.991586,0.278188
6,campaign_id,358,286,344,431,296,343.0,51.822775,0.151087
17,media_app_id,264,180,225,376,175,244.0,73.541825,0.301401
20,uid,170,136,162,185,139,158.4,18.618271,0.11754
10,game_feed_id,160,112,142,237,99,150.0,48.534524,0.323563
3,advertiser_id,118,99,101,149,94,112.2,20.09378,0.179089
28,count,81,66,80,96,58,76.2,13.151426,0.172591
21,mst_advertiser_order_id,71,52,70,101,43,67.4,19.905778,0.295338
7,first_login_interval,46,32,37,74,32,44.2,15.753095,0.356405
14,last_login_interval,46,28,39,59,34,41.2,10.684568,0.259334


# ts lgb

In [30]:
def pr_auc_metric(y_predicted, y_true):
    return 'pr_auc', average_precision_score(y_true.get_label(), y_predicted), True

def modelling_ts(seed):
    print("seed:", seed)
    lgbm_params = {'objective': 'binary', 'metric': 'None', 'boosting_type': 'gbdt',
               'tree_learner': 'serial', 'learning_rate': 0.1, 'random_seed':seed,'max_depth': 5}
    y_train = train_df.target.copy()
    X_train = train_df.drop(['target'],axis=1).copy()
    y_valid = valid_df.target.copy()
    X_valid = valid_df.drop(['target'],axis=1).copy()
    
    valid_lgb = pd.DataFrame(np.zeros([X_train.shape[0]]))
    real = np.array([])
    features_list = [i for i in X_train.columns]
    feature_importance_df = pd.DataFrame(features_list, columns=["Feature"])
    
    #rus = RandomUnderSampler(sampling_strategy = {0:sum(y_train==0)//2, 1:sum(y_train==1)}, random_state=seed)
    #X_resampled, y_resampled = rus.fit_resample(X_train, y_train)  
    #X_train = pd.DataFrame(X_resampled, columns = X_train.columns)
    #y_train = pd.DataFrame(y_resampled, columns = ["target"])
        
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train)
        
    model = lgb.train(lgbm_params, lgb_train,valid_sets=[lgb_train, lgb_eval],
       num_boost_round=10000,early_stopping_rounds=10,verbose_eval = 10, feval=pr_auc_metric, categorical_feature = sorted(object_feats))
    
    valid_predict = model.predict(X_valid, num_iteration = model.best_iteration)
    feature_importance_df["Importance"] = model.feature_importance()
        
    score = average_precision_score(y_valid, valid_predict)
    print("average precision score = {}".format(score))
    pred_value = model.predict(test, num_iteration = model.best_iteration) 
    
    return pred_value, valid_predict, feature_importance_df

validation_ts = np.zeros([valid_df.shape[0]])
pred_value_ts = np.zeros([test.shape[0]])
seeds = [0]
for seed_ in seeds:
    test_pred, oof_predict, feature_importance_df = modelling_ts(seed_)
    validation_ts += oof_predict / len(seeds)
    pred_value_ts += test_pred / len(seeds)

ts_score = average_precision_score(valid_df.target, validation_ts)
print(ts_score)

seed: 0
Training until validation scores don't improve for 10 rounds
[10]	training's pr_auc: 0.289076	valid_1's pr_auc: 0.225873
[20]	training's pr_auc: 0.293322	valid_1's pr_auc: 0.231327
[30]	training's pr_auc: 0.300198	valid_1's pr_auc: 0.235064
[40]	training's pr_auc: 0.309861	valid_1's pr_auc: 0.237893
[50]	training's pr_auc: 0.319679	valid_1's pr_auc: 0.240666
[60]	training's pr_auc: 0.329407	valid_1's pr_auc: 0.242594
[70]	training's pr_auc: 0.336348	valid_1's pr_auc: 0.243706
[80]	training's pr_auc: 0.342521	valid_1's pr_auc: 0.244472
[90]	training's pr_auc: 0.3474	valid_1's pr_auc: 0.244886
[100]	training's pr_auc: 0.352795	valid_1's pr_auc: 0.245632
[110]	training's pr_auc: 0.357567	valid_1's pr_auc: 0.245835
[120]	training's pr_auc: 0.363008	valid_1's pr_auc: 0.246505
[130]	training's pr_auc: 0.36816	valid_1's pr_auc: 0.246376
Early stopping, best iteration is:
[125]	training's pr_auc: 0.366405	valid_1's pr_auc: 0.246738
average precision score = 0.24673768501285995
0.246737

In [31]:
feature_importance_df.sort_values("Importance", ascending=False)

Unnamed: 0,Feature,Importance
4,app_id,562
6,campaign_id,396
17,media_app_id,245
23,app_users,234
7,first_login_interval,222
16,max_login_interval,203
20,uid,184
10,game_feed_id,157
24,madia_app_num,142
14,last_login_interval,141


# stratified xgboost

In [32]:
# https://xgboost.readthedocs.io/en/latest/parameter.html
def pr_auc_metric(y_predicted, y_true):
    return 'pr_auc', - average_precision_score(y_true.get_label(), y_predicted)

n_folds=5
np.random.seed(0)
def modelling_xgb():
    y = train.target
    X = train.drop(['target'],axis=1)
    xgb_params = {"objective" : "binary:logistic", "max_depth" : 5, "learning_rate" : 0.1, "tree_method": "gpu_hist"}
    
    skf=StratifiedKFold(n_splits = n_folds, shuffle=True, random_state=0)
    
    valid = np.zeros([X.shape[0]])
    pred_value = np.zeros(test.shape[0])
    X_test = xgb.DMatrix(test.copy())
    
    for i , (train_index, val_index) in enumerate(skf.split(X, y)):
        print("fold: ", i)
        
        X_train = X.iloc[train_index,:]
        y_train = y.iloc[train_index]

        X_valid = X.iloc[val_index,:]
        y_valid = y.iloc[val_index]
        
        xgb_train = xgb.DMatrix(X_train, label = y_train)
        xgb_eval = xgb.DMatrix(X_valid, label = y_valid)
        watchlist = [(xgb_train, "train"), (xgb_eval, "eval")]
        num_boost_round = 100000
        
        model = xgb.train(
        xgb_params, xgb_train, num_boost_round, watchlist,
        early_stopping_rounds=10, verbose_eval = 50,
        feval=pr_auc_metric,
    )
                
        valid_predict = model.predict(xgb_eval, ntree_limit = model.best_ntree_limit)
        valid[val_index] = valid_predict
        pred_value += model.predict(X_test, ntree_limit = model.best_ntree_limit) / n_folds
    
    return pred_value, valid

pred_value_xgb, validation_xgb = modelling_xgb()
score_xgb = average_precision_score(train.target, validation_xgb)
print(score_xgb)

fold:  0
[0]	train-error:0.03519	eval-error:0.03522	train-pr_auc:-0.21772	eval-pr_auc:-0.21458
Multiple eval metrics have been passed: 'eval-pr_auc' will be used for early stopping.

Will train until eval-pr_auc hasn't improved in 10 rounds.
[50]	train-error:0.03481	eval-error:0.03489	train-pr_auc:-0.27824	eval-pr_auc:-0.27030
[100]	train-error:0.03450	eval-error:0.03466	train-pr_auc:-0.30441	eval-pr_auc:-0.29059
[150]	train-error:0.03441	eval-error:0.03466	train-pr_auc:-0.31578	eval-pr_auc:-0.29727
[200]	train-error:0.03430	eval-error:0.03462	train-pr_auc:-0.32595	eval-pr_auc:-0.30263
[250]	train-error:0.03422	eval-error:0.03460	train-pr_auc:-0.33363	eval-pr_auc:-0.30600
[300]	train-error:0.03414	eval-error:0.03458	train-pr_auc:-0.34116	eval-pr_auc:-0.30847
[350]	train-error:0.03404	eval-error:0.03460	train-pr_auc:-0.34807	eval-pr_auc:-0.31081
[400]	train-error:0.03398	eval-error:0.03460	train-pr_auc:-0.35502	eval-pr_auc:-0.31269
[450]	train-error:0.03389	eval-error:0.03461	train-pr_a

# submission

In [33]:
lgb_sub = pd.read_csv(DIR + "atmaCup7__sample_submission.csv")
lgb_sub["target"] = (pred_value + pred_value_ts + pred_value_xgb) / 3
lgb_sub.to_csv("atmacup7_"+str(score)[:-10]+".csv", index = False)