In this kernel, I checked scores of all 0 prediction at various evaluation date range. 

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize

import os
import gc

from sklearn.metrics import mean_squared_error

## data preparation

In [None]:
def load_df(csv_path, chunksize=100000):
    features = ['date', 'fullVisitorId', 'totals_transactionRevenue']
    JSON_COLS = ['totals']
    print('Load {}'.format(csv_path))
    df_reader = pd.read_csv(csv_path,
                            converters={ column: json.loads for column in JSON_COLS },
                            dtype={ 'date': str, 'fullVisitorId': str},
                            usecols=['date', 'fullVisitorId', 'totals'], 
                            chunksize=chunksize)
    res = pd.DataFrame()
    for cidx, df in enumerate(df_reader):
        df.reset_index(drop=True, inplace=True)
        for col in JSON_COLS:
            col_as_df = json_normalize(df[col])
            col_as_df.columns = ['{}_{}'.format(col, subcol) for subcol in col_as_df.columns]
            df = df.drop(col, axis=1).merge(col_as_df, right_index=True, left_index=True)
        res = pd.concat([res, df[features]], axis=0).reset_index(drop=True)
        del df
        gc.collect()
        print('{}: {}'.format(cidx + 1, res.shape))
    return res

In [None]:
train = load_df('../input/train_v2.csv')
test = load_df('../input/test_v2.csv')

In [None]:
full_df = train.append(test).reset_index(drop=True)
full_df['date'] = pd.to_datetime(full_df['date'])
full_df.loc[:, full_df.columns.str.startswith('totals_')] = full_df.loc[:, full_df.columns.str.startswith('totals_')].astype(float).fillna(0)

In [None]:
full_df.head()

## Extract target fullVisitorId at Each Validation Period

Processing to extract the fulVisitorId having the log from 7 months ago to 1.5 months before the evaluation start date as the evaluation target.

In [None]:
def get_target_fullvisitorid(full_df, target_datestart):
    target_fullvisitorid = pd.DataFrame(
        full_df.loc[
            (full_df['date'] >= pd.to_datetime(target_datestart)-pd.DateOffset(214)) &  
            (full_df['date'] < pd.to_datetime(target_datestart)-pd.DateOffset(45)), 
            'fullVisitorId'
        ].unique(), 
        columns=['fullVisitorId']).reset_index(drop=True)
    return target_fullvisitorid

Checking whether it properly extract fullVisitorIds in test set.

In [None]:
test_ids = get_target_fullvisitorid(full_df, '2018-12-01')
ss = pd.read_csv('../input/sample_submission_v2.csv', dtype={'fullVisitorId': str},)

In [None]:
print((~test_ids['fullVisitorId'].isin(ss['fullVisitorId'])).sum())

In [None]:
print((~ss['fullVisitorId'].isin(test_ids['fullVisitorId'])).sum())

It seems to extract fullVisitorIds without omission.

## Making Gound Truth and Checking All 0 Prediction Score at Each Validation Period

The verification period is set to 2 months (61days) according to competition task.

In [None]:
def make_groundtruth(target_datestart):
    target_fullvisitorid = get_target_fullvisitorid(full_df, target_datestart)
    
    date_range_for_groundtruth = [pd.to_datetime(target_datestart), pd.to_datetime(target_datestart)+pd.DateOffset(61)]
    
    exist_users_groundtruth = full_df.loc[
        (full_df['date'] >= date_range_for_groundtruth[0]) &  
        (full_df['date'] < date_range_for_groundtruth[1]) &
        (full_df['fullVisitorId'].isin(target_fullvisitorid['fullVisitorId'])), 
        ['fullVisitorId', 'totals_transactionRevenue']
    ].groupby('fullVisitorId')['totals_transactionRevenue'].sum()\
    .to_frame(name='LogSumRevenue').apply(np.log1p)
    full_groundtruth = target_fullvisitorid.merge(exist_users_groundtruth.reset_index(), on='fullVisitorId', how='left').set_index('fullVisitorId').fillna(0)
        
    return date_range_for_groundtruth, full_groundtruth

In [None]:
for target_datestart in ['2017-12-01', '2018-01-01', '2018-02-01', 
                         '2018-03-01', '2018-04-01', '2018-05-01', 
                         '2018-06-01', '2018-07-01', '2018-08-01']:
    date_range_for_groundtruth, groundtruth = make_groundtruth(target_datestart)
    all0_pred = groundtruth.assign(pred=0)[['pred']]
    score = np.sqrt(mean_squared_error(groundtruth['LogSumRevenue'], all0_pred['pred']))
    print('validatin date range:{0} to {1} num of fullVisitorIds:{2} score:{3:.5f}'\
              .format(str(date_range_for_groundtruth[0].date()), str(date_range_for_groundtruth[1].date()), groundtruth.shape[0], score))

For most of time range,  all 0 prediction appears to be scored around 0.15-0.35.