# Dataset

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%load_ext autoreload
%autoreload 2

In [2]:
required_libs = [ ("numpy", "numpy"),
                 ("pandas", "pandas"),
                 ("seaborn", "seaborn"),
                 ("matplotlib", "matplotlib"),
                 ("sklearn", "sklearn"),
                 ("colorama", "colorama"),
                 ("emoji", "emoji"),
                 ("catboost", "catboost")
                ]

In [3]:
def is_lib_exists(name):
    import importlib
    lib = importlib.util.find_spec(name)
    return lib is not None

In [4]:
for (clz,lib) in required_libs:
    if not is_lib_exists(clz):
        print(f"Installing {lib}")
        !pip -qq install {lib}
    else:
        print(f"{lib} exists")

numpy exists
pandas exists
seaborn exists
matplotlib exists
sklearn exists
colorama exists
emoji exists
catboost exists


In [53]:
# Uninstall fastai ver 1 and install ver 2
!pip uninstall -qq --yes fastai 
!pip install -q fastai
!pip install -q nbdev
import fastai
print(fastai.__version__)

2.0.11


In [6]:
COLAB = True

DATASET_NAME = '4D.zip'

FEATURE_DATASET_PREFIX = 'feature_matrix_d2_v3'

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import calendar
import traceback
import catboost
import gc
from pathlib import Path
from dateutil.relativedelta import *
from datetime import *
from catboost import *
from catboost import datasets
from catboost import CatBoostClassifier
from scipy import stats
from scipy.stats.stats import pearsonr
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from fastai.tabular.all import *
np.set_printoptions(precision=4)
pd.options.display.max_columns = None

  import pandas.util.testing as tm


In [8]:
%matplotlib inline
%aimport

Modules to reload:
all-except-skipped

Modules to skip:



In [9]:
from IPython.display import display

In [10]:
# check catboost version
print(catboost.__version__)
!python --version

0.24.1
Python 3.6.9


In [11]:
# colab setup
if COLAB:
  !rm -rf dl-projects
  !git clone https://github.com/mengwangk/dl-projects
  
  !cp dl-projects/utils* .
  !cp dl-projects/preprocess* .
  !cp dl-projects/plot* .
  
  from google.colab import drive
  drive.mount('/content/gdrive')
  GDRIVE_DATASET_FOLDER = Path('gdrive/My Drive/datasets/')
  DATASET_PATH = GDRIVE_DATASET_FOLDER
  ORIGIN_DATASET_PATH = Path('dl-projects/datasets')
  #!ls -l gdrive/"My Drive"/datasets/ --block-size=M

DATASET = DATASET_PATH/f"{FEATURE_DATASET_PREFIX}.ft"
ORIGIN_DATASET = ORIGIN_DATASET_PATH/DATASET_NAME

Cloning into 'dl-projects'...
remote: Enumerating objects: 42, done.[K
remote: Counting objects: 100% (42/42), done.[K
remote: Compressing objects: 100% (36/36), done.[K
remote: Total 2252 (delta 25), reused 13 (delta 6), pack-reused 2210[K
Receiving objects: 100% (2252/2252), 80.16 MiB | 30.85 MiB/s, done.
Resolving deltas: 100% (1404/1404), done.
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## EDA

In [12]:
from preprocess import *
from utils import feature_selection, plot_feature_importances
from plot import plot_correlation_matrix, plot_labeled_scatter

In [13]:
data = pd.read_feather(DATASET)
origin_data = format_tabular(ORIGIN_DATASET)

In [14]:
jan_2020 = pd.read_feather(DATASET_PATH/f"feature_matrix_2020_jan.ft")
feb_2020 = pd.read_feather(DATASET_PATH/f"feature_matrix_2020_feb.ft")
mar_2020 = pd.read_feather(DATASET_PATH/f"feature_matrix_2020_mar.ft")
apr_2020 = pd.read_feather(DATASET_PATH/f"feature_matrix_2020_apr.ft")
may_2020 = pd.read_feather(DATASET_PATH/f"feature_matrix_2020_may.ft")
jun_2020 = pd.read_feather(DATASET_PATH/f"feature_matrix_2020_jun.ft")
jul_2020 = pd.read_feather(DATASET_PATH/f"feature_matrix_2020_jul.ft")

In [15]:
new_data = data.append(jan_2020[data.columns],ignore_index=True)
new_data = new_data.append(feb_2020[data.columns],ignore_index=True)
new_data = new_data.append(mar_2020[data.columns],ignore_index=True)
new_data = new_data.append(apr_2020[data.columns],ignore_index=True)
new_data = new_data.append(may_2020[data.columns],ignore_index=True)
new_data = new_data.append(jun_2020[data.columns],ignore_index=True)
new_data = new_data.append(jul_2020[data.columns],ignore_index=True)
data.shape, new_data.shape 

((959893, 217), (1029893, 217))

In [16]:
data = new_data

In [17]:
data.isna().sum().sort_values(ascending=False)

CUM_SUM(SKEW(Results.TotalStrike))           7685
CUM_MEAN(TREND(Results.DrawNo, DrawDate))    7685
TREND(Results.CUM_SUM(DrawNo), DrawDate)     7685
TREND(Results.CUM_SUM(LuckyNo), DrawDate)    7685
CUM_SUM(SKEW(Results.LuckyNo))               7685
                                             ... 
CUM_SUM(MIN(Results.DrawNo))                    0
NUM_UNIQUE(Results.DAY(DrawDate))               0
NUM_UNIQUE(Results.MONTH(DrawDate))             0
SUM(Results.PERCENTILE(LuckyNo))                0
NumberId                                        0
Length: 217, dtype: int64

In [52]:
# data.info(max_cols=500, null_counts=True)
# data.columns.tolist()

In [42]:
feature_matrix = data

### fastai to handle missing values
# feature_matrix = data.fillna(0)
# feature_matrix.sort_values(by=['time', 'MAX(Results.LuckyNo)'], inplace=True)
# feature_matrix.head(20)

In [43]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, chi2, f_regression
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import VarianceThreshold

# other_features = ['Label', 'NumberId', 'time', 'TotalStrike',  'month', 'year']
# feature_matrix = feature_selection(data)

def select_features(df, func=f_regression):
  X = df.drop(columns=other_features)
  y = df.Label
  fs = SelectKBest(score_func=func, k=50)
  X_selected = fs.fit_transform(X, y)
  mask = fs.get_support()
  return X.columns[mask]

def reduce_features(df, lower_threshold=10, upper_threshold=10000):
  cols_to_drop = []
  for col in df.columns:
    if df[col].nunique() > upper_threshold or df[col].nunique() < lower_threshold:
      cols_to_drop.append(col)
  print(cols_to_drop)
  return cols_to_drop, df.drop(columns=cols_to_drop)

def select_by_variance(df, threshold=0):
  """Select features by variance"""
  filter = VarianceThreshold(threshold=threshold)
  filter.fit(df)
  cols_to_drop = [column for column in df.columns
                  if column not in df.columns[filter.get_support()]]
  print(f"No of columns to drop - {len(cols_to_drop)}")
  print(cols_to_drop)
  return cols_to_drop

def select_duplicates(df):
  df_transposed = df.head(20000).T
  print(df_transposed.duplicated().sum())
  unique_features = df_transposed.drop_duplicates(keep='first').T
  print(unique_features.shape)
  duplicated_features = [dup_col for dup_col in df.columns if dup_col not in unique_features.columns]
  return duplicated_features

def select_correlated(df):
  correlated_features = set()
  correlation_matrix = df.corr()
  for i in range(len(correlation_matrix .columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > 0.8:
            colname = correlation_matrix.columns[i]
            correlated_features.add(colname)
  print(len(correlated_features))
  print(correlated_features)
  return correlated_features


In [44]:
selected_features = feature_matrix.columns.to_list()
# display(len(selected_features), selected_features)
print(len(selected_features))

217


In [48]:
df_selected_features = feature_matrix[selected_features]

In [23]:
# def split_data(df, dt):
#   y_train = df.loc[df['time'] < dt, 'Label']
#   y_validation = df.loc[df['time'] == dt, 'Label']
# 
#   X_train = df[df['time'] < dt].drop(columns = ['NumberId', 'time', 'Label', 'TotalStrike', 'year', 'month'])
#   X_validation = df[df['time'] == dt].drop(columns = ['NumberId', 'time', 'Label', 'TotalStrike', 'year', 'month'])
# 
#   # Reduce by standard deviation
#   # X_train = X_train.loc[:, X_train.std() > .0]
#   # X_validation = X_validation[X_train.columns]
# 
#   return X_train, X_validation, y_train, y_validation

In [24]:
TARGET_MONTH = datetime(2020,1,1)

# X_train, X_validation, y_train, y_validation = split_data(df_selected_features, TARGET_MONTH)
# X_train.shape, X_validation.shape, y_train.shape, y_validation.shape

((959893, 211), (10000, 211), (959893,), (10000,))

In [31]:
# cols_to_drop = select_by_variance(X_train,0.0)
# X_train = X_train.drop(columns=cols_to_drop)
# X_validation = X_validation[X_train.columns]
# X_train.shape, X_validation.shape
# 
# cols_to_drop = select_by_variance(X_train,0.01)
# X_train = X_train.drop(columns=cols_to_drop)
# X_validation = X_validation[X_train.columns]
# X_train.shape, X_validation.shape
# 
# cols_to_drop = select_duplicates(X_train)
# X_train = X_train.drop(columns=cols_to_drop)
# X_validation = X_validation[X_train.columns]
# X_train.shape, X_validation.shape
# 
# cols_to_drop = select_correlated(X_train)
# X_train = X_train.drop(columns=cols_to_drop)
# X_validation = X_validation[X_train.columns]
# X_train.shape, X_validation.shape

In [32]:
# CAT_FEATURES = []
# UNIQUE_THRESHOLD = 500
# for col in X_train.select_dtypes(include=['int64']).columns:
#   print(f"{col}: Index - {X_train.columns.get_loc(col)}, Unique values - {X_train[col].nunique()}")
#   if X_train[col].nunique() <= UNIQUE_THRESHOLD:
#     CAT_FEATURES.append(X_train.columns.get_loc(col))
#   
# print(CAT_FEATURES)

In [49]:
# Prepare the data
# unused_features = ['NumberId', 'time', 'TotalStrike',  'month', 'year']
unused_features = ['time', 'TotalStrike']
df = df_selected_features.drop(columns=unused_features)

# Target label. No need to np.log
dep_var = 'Label'

# Train test split
procs = [Categorify, FillMissing, Normalize]
cond = (df.year < 2020) | (df.month < 1)
train_idx = np.where( cond)[0]
valid_idx = np.where(~cond)[0]
splits = (list(train_idx),list(valid_idx))

In [58]:
# Continuous and categorical variables
cont,cat = cont_cat_split(df, 1, dep_var=dep_var)
print(cont)
print(cat)

['NumberId', 'STD(Results.DrawNo)', 'STD(Results.TotalStrike)', 'STD(Results.LuckyNo)', 'MAX(Results.DrawNo)', 'MAX(Results.LuckyNo)', 'MIN(Results.DrawNo)', 'MIN(Results.LuckyNo)', 'MEAN(Results.DrawNo)', 'MEAN(Results.LuckyNo)', 'SKEW(Results.DrawNo)', 'SKEW(Results.TotalStrike)', 'SKEW(Results.LuckyNo)', 'LAST(Results.results_index)', 'LAST(Results.DrawNo)', 'LAST(Results.LuckyNo)', 'AVG_TIME_BETWEEN(Results.DrawDate)', 'COUNT(Results)', 'SUM(Results.DrawNo)', 'SUM(Results.TotalStrike)', 'SUM(Results.LuckyNo)', 'TREND(Results.TotalStrike, DrawDate)', 'TREND(Results.LuckyNo, DrawDate)', 'TREND(Results.DrawNo, DrawDate)', 'MONTH(first_Results_time)', 'DAY(first_Results_time)', 'TIME_SINCE(first_Results_time)', 'TIME_SINCE_PREVIOUS(first_Results_time)', 'STD(Results.TIME_SINCE(DrawDate))', 'STD(Results.CUM_MEAN(DrawNo))', 'STD(Results.CUM_SUM(LuckyNo))', 'STD(Results.PERCENTILE(DrawNo))', 'STD(Results.TIME_SINCE_PREVIOUS(DrawDate))', 'STD(Results.CUM_SUM(TotalStrike))', 'STD(Results.PE

In [None]:
gc.collect() # Garbage collect

to = TabularPandas(df, procs=procs, cat_names=cat, cont_names=cont, y_names=dep_var, splits=splits)
len(to.train),len(to.valid)

In [66]:
to.items.head(3)

Unnamed: 0,NumberId,STD(Results.DrawNo),STD(Results.TotalStrike),STD(Results.LuckyNo),MAX(Results.DrawNo),MAX(Results.TotalStrike),MAX(Results.LuckyNo),MIN(Results.DrawNo),MIN(Results.TotalStrike),MIN(Results.LuckyNo),MEAN(Results.DrawNo),MEAN(Results.TotalStrike),MEAN(Results.LuckyNo),SKEW(Results.DrawNo),SKEW(Results.TotalStrike),SKEW(Results.LuckyNo),LAST(Results.results_index),LAST(Results.DrawNo),LAST(Results.TotalStrike),LAST(Results.LuckyNo),AVG_TIME_BETWEEN(Results.DrawDate),COUNT(Results),SUM(Results.DrawNo),SUM(Results.TotalStrike),SUM(Results.LuckyNo),"TREND(Results.TotalStrike, DrawDate)","TREND(Results.LuckyNo, DrawDate)","TREND(Results.DrawNo, DrawDate)",NUM_UNIQUE(Results.PrizeType),MONTH(first_Results_time),DAY(first_Results_time),TIME_SINCE(first_Results_time),TIME_SINCE_PREVIOUS(first_Results_time),STD(Results.TIME_SINCE(DrawDate)),STD(Results.CUM_MEAN(DrawNo)),STD(Results.CUM_SUM(LuckyNo)),STD(Results.PERCENTILE(DrawNo)),STD(Results.TIME_SINCE_PREVIOUS(DrawDate)),STD(Results.CUM_SUM(TotalStrike)),STD(Results.PERCENTILE(TotalStrike)),STD(Results.CUM_SUM(DrawNo)),STD(Results.CUM_MEAN(LuckyNo)),STD(Results.CUM_MEAN(TotalStrike)),STD(Results.PERCENTILE(LuckyNo)),MAX(Results.TIME_SINCE(DrawDate)),MAX(Results.CUM_MEAN(DrawNo)),MAX(Results.CUM_SUM(LuckyNo)),MAX(Results.PERCENTILE(DrawNo)),MAX(Results.TIME_SINCE_PREVIOUS(DrawDate)),MAX(Results.CUM_SUM(TotalStrike)),MAX(Results.PERCENTILE(TotalStrike)),MAX(Results.CUM_SUM(DrawNo)),MAX(Results.CUM_MEAN(LuckyNo)),MAX(Results.CUM_MEAN(TotalStrike)),MAX(Results.PERCENTILE(LuckyNo)),MIN(Results.TIME_SINCE(DrawDate)),MIN(Results.CUM_MEAN(DrawNo)),MIN(Results.CUM_SUM(LuckyNo)),MIN(Results.PERCENTILE(DrawNo)),MIN(Results.TIME_SINCE_PREVIOUS(DrawDate)),MIN(Results.CUM_SUM(TotalStrike)),MIN(Results.PERCENTILE(TotalStrike)),MIN(Results.CUM_SUM(DrawNo)),MIN(Results.CUM_MEAN(LuckyNo)),MIN(Results.CUM_MEAN(TotalStrike)),MIN(Results.PERCENTILE(LuckyNo)),MODE(Results.MONTH(DrawDate)),MODE(Results.DAY(DrawDate)),MEAN(Results.TIME_SINCE(DrawDate)),MEAN(Results.CUM_MEAN(DrawNo)),MEAN(Results.CUM_SUM(LuckyNo)),MEAN(Results.PERCENTILE(DrawNo)),MEAN(Results.TIME_SINCE_PREVIOUS(DrawDate)),MEAN(Results.CUM_SUM(TotalStrike)),MEAN(Results.PERCENTILE(TotalStrike)),MEAN(Results.CUM_SUM(DrawNo)),MEAN(Results.CUM_MEAN(LuckyNo)),MEAN(Results.CUM_MEAN(TotalStrike)),MEAN(Results.PERCENTILE(LuckyNo)),SKEW(Results.TIME_SINCE(DrawDate)),SKEW(Results.CUM_MEAN(DrawNo)),SKEW(Results.CUM_SUM(LuckyNo)),SKEW(Results.PERCENTILE(DrawNo)),SKEW(Results.TIME_SINCE_PREVIOUS(DrawDate)),SKEW(Results.CUM_SUM(TotalStrike)),SKEW(Results.PERCENTILE(TotalStrike)),SKEW(Results.CUM_SUM(DrawNo)),SKEW(Results.CUM_MEAN(LuckyNo)),SKEW(Results.CUM_MEAN(TotalStrike)),SKEW(Results.PERCENTILE(LuckyNo)),LAST(Results.TIME_SINCE(DrawDate)),LAST(Results.CUM_MEAN(DrawNo)),LAST(Results.CUM_SUM(LuckyNo)),LAST(Results.PERCENTILE(DrawNo)),LAST(Results.DAY(DrawDate)),LAST(Results.TIME_SINCE_PREVIOUS(DrawDate)),LAST(Results.CUM_SUM(TotalStrike)),LAST(Results.MONTH(DrawDate)),LAST(Results.CUM_SUM(DrawNo)),LAST(Results.CUM_MEAN(LuckyNo)),LAST(Results.PERCENTILE(TotalStrike)),LAST(Results.CUM_MEAN(TotalStrike)),LAST(Results.PERCENTILE(LuckyNo)),SUM(Results.TIME_SINCE(DrawDate)),SUM(Results.CUM_MEAN(DrawNo)),SUM(Results.CUM_SUM(LuckyNo)),SUM(Results.PERCENTILE(DrawNo)),SUM(Results.TIME_SINCE_PREVIOUS(DrawDate)),SUM(Results.CUM_SUM(TotalStrike)),SUM(Results.PERCENTILE(TotalStrike)),SUM(Results.CUM_SUM(DrawNo)),SUM(Results.CUM_MEAN(LuckyNo)),SUM(Results.CUM_MEAN(TotalStrike)),SUM(Results.PERCENTILE(LuckyNo)),"TREND(Results.CUM_MEAN(LuckyNo), DrawDate)","TREND(Results.PERCENTILE(LuckyNo), DrawDate)","TREND(Results.PERCENTILE(DrawNo), DrawDate)","TREND(Results.CUM_MEAN(DrawNo), DrawDate)","TREND(Results.TIME_SINCE(DrawDate), DrawDate)","TREND(Results.CUM_SUM(TotalStrike), DrawDate)","TREND(Results.PERCENTILE(TotalStrike), DrawDate)","TREND(Results.TIME_SINCE_PREVIOUS(DrawDate), DrawDate)","TREND(Results.CUM_MEAN(TotalStrike), DrawDate)","TREND(Results.CUM_SUM(DrawNo), DrawDate)","TREND(Results.CUM_SUM(LuckyNo), DrawDate)",NUM_UNIQUE(Results.MONTH(DrawDate)),NUM_UNIQUE(Results.DAY(DrawDate)),CUM_SUM(SKEW(Results.LuckyNo)),CUM_SUM(MIN(Results.DrawNo)),CUM_SUM(MEAN(Results.DrawNo)),CUM_SUM(STD(Results.LuckyNo)),CUM_SUM(LAST(Results.DrawNo)),CUM_SUM(MAX(Results.TotalStrike)),"CUM_SUM(TREND(Results.TotalStrike, DrawDate))",CUM_SUM(MEAN(Results.TotalStrike)),CUM_SUM(MIN(Results.TotalStrike)),CUM_SUM(AVG_TIME_BETWEEN(Results.DrawDate)),CUM_SUM(MEAN(Results.LuckyNo)),CUM_SUM(MAX(Results.DrawNo)),CUM_SUM(COUNT(Results)),CUM_SUM(MIN(Results.LuckyNo)),CUM_SUM(SUM(Results.LuckyNo)),CUM_SUM(NUM_UNIQUE(Results.PrizeType)),CUM_SUM(LAST(Results.TotalStrike)),CUM_SUM(STD(Results.DrawNo)),CUM_SUM(SKEW(Results.DrawNo)),"CUM_SUM(TREND(Results.LuckyNo, DrawDate))",CUM_SUM(SUM(Results.TotalStrike)),CUM_SUM(STD(Results.TotalStrike)),CUM_SUM(SKEW(Results.TotalStrike)),CUM_SUM(SUM(Results.DrawNo)),"CUM_SUM(TREND(Results.DrawNo, DrawDate))",CUM_SUM(LAST(Results.LuckyNo)),CUM_SUM(MAX(Results.LuckyNo)),CUM_MEAN(SKEW(Results.LuckyNo)),CUM_MEAN(MIN(Results.DrawNo)),CUM_MEAN(MEAN(Results.DrawNo)),CUM_MEAN(STD(Results.LuckyNo)),CUM_MEAN(LAST(Results.DrawNo)),CUM_MEAN(MAX(Results.TotalStrike)),"CUM_MEAN(TREND(Results.TotalStrike, DrawDate))",CUM_MEAN(MEAN(Results.TotalStrike)),CUM_MEAN(MIN(Results.TotalStrike)),CUM_MEAN(AVG_TIME_BETWEEN(Results.DrawDate)),CUM_MEAN(MEAN(Results.LuckyNo)),CUM_MEAN(MAX(Results.DrawNo)),CUM_MEAN(COUNT(Results)),CUM_MEAN(MIN(Results.LuckyNo)),CUM_MEAN(SUM(Results.LuckyNo)),CUM_MEAN(NUM_UNIQUE(Results.PrizeType)),CUM_MEAN(LAST(Results.TotalStrike)),CUM_MEAN(STD(Results.DrawNo)),CUM_MEAN(SKEW(Results.DrawNo)),"CUM_MEAN(TREND(Results.LuckyNo, DrawDate))",CUM_MEAN(SUM(Results.TotalStrike)),CUM_MEAN(STD(Results.TotalStrike)),CUM_MEAN(SKEW(Results.TotalStrike)),CUM_MEAN(SUM(Results.DrawNo)),"CUM_MEAN(TREND(Results.DrawNo, DrawDate))",CUM_MEAN(LAST(Results.LuckyNo)),CUM_MEAN(MAX(Results.LuckyNo)),MONTH(LAST(Results.DrawDate)),DAY(LAST(Results.DrawDate)),PERCENTILE(SKEW(Results.LuckyNo)),PERCENTILE(MIN(Results.DrawNo)),PERCENTILE(MEAN(Results.DrawNo)),PERCENTILE(STD(Results.LuckyNo)),PERCENTILE(LAST(Results.DrawNo)),PERCENTILE(MAX(Results.TotalStrike)),"PERCENTILE(TREND(Results.TotalStrike, DrawDate))",PERCENTILE(MEAN(Results.TotalStrike)),PERCENTILE(MIN(Results.TotalStrike)),PERCENTILE(AVG_TIME_BETWEEN(Results.DrawDate)),PERCENTILE(MEAN(Results.LuckyNo)),PERCENTILE(MAX(Results.DrawNo)),PERCENTILE(COUNT(Results)),PERCENTILE(MIN(Results.LuckyNo)),PERCENTILE(SUM(Results.LuckyNo)),PERCENTILE(NUM_UNIQUE(Results.PrizeType)),PERCENTILE(LAST(Results.TotalStrike)),PERCENTILE(STD(Results.DrawNo)),PERCENTILE(SKEW(Results.DrawNo)),"PERCENTILE(TREND(Results.LuckyNo, DrawDate))",PERCENTILE(SUM(Results.TotalStrike)),PERCENTILE(STD(Results.TotalStrike)),PERCENTILE(SKEW(Results.TotalStrike)),PERCENTILE(SUM(Results.DrawNo)),"PERCENTILE(TREND(Results.DrawNo, DrawDate))",PERCENTILE(LAST(Results.LuckyNo)),PERCENTILE(MAX(Results.LuckyNo)),Label,MODE(Results.PrizeType)_Prize,LAST(Results.PrizeType)_Prize,month,year,STD(Results.DrawNo)_na,STD(Results.TotalStrike)_na,STD(Results.LuckyNo)_na,SKEW(Results.DrawNo)_na,SKEW(Results.TotalStrike)_na,SKEW(Results.LuckyNo)_na,AVG_TIME_BETWEEN(Results.DrawDate)_na,"TREND(Results.TotalStrike, DrawDate)_na","TREND(Results.LuckyNo, DrawDate)_na","TREND(Results.DrawNo, DrawDate)_na",TIME_SINCE_PREVIOUS(first_Results_time)_na,STD(Results.TIME_SINCE(DrawDate))_na,STD(Results.CUM_MEAN(DrawNo))_na,STD(Results.CUM_SUM(LuckyNo))_na,STD(Results.PERCENTILE(DrawNo))_na,STD(Results.TIME_SINCE_PREVIOUS(DrawDate))_na,STD(Results.CUM_SUM(TotalStrike))_na,STD(Results.PERCENTILE(TotalStrike))_na,STD(Results.CUM_SUM(DrawNo))_na,STD(Results.CUM_MEAN(LuckyNo))_na,STD(Results.CUM_MEAN(TotalStrike))_na,STD(Results.PERCENTILE(LuckyNo))_na,SKEW(Results.TIME_SINCE(DrawDate))_na,SKEW(Results.CUM_MEAN(DrawNo))_na,SKEW(Results.CUM_SUM(LuckyNo))_na,SKEW(Results.PERCENTILE(DrawNo))_na,SKEW(Results.TIME_SINCE_PREVIOUS(DrawDate))_na,SKEW(Results.CUM_SUM(TotalStrike))_na,SKEW(Results.PERCENTILE(TotalStrike))_na,SKEW(Results.CUM_SUM(DrawNo))_na,SKEW(Results.CUM_MEAN(LuckyNo))_na,SKEW(Results.CUM_MEAN(TotalStrike))_na,SKEW(Results.PERCENTILE(LuckyNo))_na,"TREND(Results.CUM_MEAN(LuckyNo), DrawDate)_na","TREND(Results.PERCENTILE(LuckyNo), DrawDate)_na","TREND(Results.PERCENTILE(DrawNo), DrawDate)_na","TREND(Results.CUM_MEAN(DrawNo), DrawDate)_na","TREND(Results.TIME_SINCE(DrawDate), DrawDate)_na","TREND(Results.CUM_SUM(TotalStrike), DrawDate)_na","TREND(Results.PERCENTILE(TotalStrike), DrawDate)_na","TREND(Results.TIME_SINCE_PREVIOUS(DrawDate), DrawDate)_na","TREND(Results.CUM_MEAN(TotalStrike), DrawDate)_na","TREND(Results.CUM_SUM(DrawNo), DrawDate)_na","TREND(Results.CUM_SUM(LuckyNo), DrawDate)_na",CUM_SUM(SKEW(Results.LuckyNo))_na,CUM_SUM(STD(Results.LuckyNo))_na,"CUM_SUM(TREND(Results.TotalStrike, DrawDate))_na",CUM_SUM(AVG_TIME_BETWEEN(Results.DrawDate))_na,CUM_SUM(STD(Results.DrawNo))_na,CUM_SUM(SKEW(Results.DrawNo))_na,"CUM_SUM(TREND(Results.LuckyNo, DrawDate))_na",CUM_SUM(STD(Results.TotalStrike))_na,CUM_SUM(SKEW(Results.TotalStrike))_na,"CUM_SUM(TREND(Results.DrawNo, DrawDate))_na",CUM_MEAN(SKEW(Results.LuckyNo))_na,CUM_MEAN(STD(Results.LuckyNo))_na,"CUM_MEAN(TREND(Results.TotalStrike, DrawDate))_na",CUM_MEAN(AVG_TIME_BETWEEN(Results.DrawDate))_na,CUM_MEAN(STD(Results.DrawNo))_na,CUM_MEAN(SKEW(Results.DrawNo))_na,"CUM_MEAN(TREND(Results.LuckyNo, DrawDate))_na",CUM_MEAN(STD(Results.TotalStrike))_na,CUM_MEAN(SKEW(Results.TotalStrike))_na,"CUM_MEAN(TREND(Results.DrawNo, DrawDate))_na",PERCENTILE(SKEW(Results.LuckyNo))_na,PERCENTILE(STD(Results.LuckyNo))_na,"PERCENTILE(TREND(Results.TotalStrike, DrawDate))_na",PERCENTILE(AVG_TIME_BETWEEN(Results.DrawDate))_na,PERCENTILE(STD(Results.DrawNo))_na,PERCENTILE(SKEW(Results.DrawNo))_na,"PERCENTILE(TREND(Results.LuckyNo, DrawDate))_na",PERCENTILE(STD(Results.TotalStrike))_na,PERCENTILE(SKEW(Results.TotalStrike))_na,"PERCENTILE(TREND(Results.DrawNo, DrawDate))_na"
0,72,75380.359375,0.0,0.0,318809,1,72,137498,1,72,216252.75,1,72,0.91097,0.0,0.0,63949,318809,1,72,115228800.0,4,865011,4,288,0.0,0.0,0.031519,1,7,8,425519936.0,259200.0,143491232.0,37688.804688,86363424.0,0.233506,0.0,17337.064453,0.0,4077031000.0,9.45808,0.0,0.0,425519936.0,179762.03125,318812928,0.861376,0.0,63950,0.500007,11495781906,4997.727539,1.0,0.006728,79833536.0,89112.328125,111194445,0.299728,0.0,22249,0.500007,1982660246,4976.533691,1.0,0.006728,5,1,274211936.0,128482.84375,201152416.0,0.543687,0.0,40363.5,0.500007,5676077000.0,4984.672852,1.0,0.006728,-0.868639,0.911553,0.9245,0.910967,0.0,0.910618,0.0,1.416728,1.190888,0.0,0.0,79833536.0,179762.03125,318812928,0.861376,21,0.0,63950,6,11495781906,4985.34668,0.500007,1.0,0.006728,1096848000.0,513931.375,804609645,2.174748,0.0,161454,2.000027,22704309090,19938.691406,4.0,0.026911,-2e-06,0.0,9.763505e-08,0.015759,-60.0,0.007249,0.0,0.0,0.0,1689.995117,36.109661,4,4,0.0,647236808,1752728000.0,0.0,2860026922,8931,0.0,8931,8931,744384100000.0,44668279,2860026922,68643,44668279,341816577,8931,8931,837819776.0,-8.000237,0.0,68643,0.0,0.0,13607204432,274.803711,44668279,44668279,0.0,72470.8125,196252.109375,0.0,320235.90625,1.0,0.0,1.0,1.0,83348352.0,5001.486816,320235.90625,7.685925,5001.486816,38273.046875,1.0,1.0,93810.296875,-0.000896,0.0,7.685925,0.0,0.0,1523592.5,0.03077,5001.486816,5001.486816,6,21,0.500051,0.893547,0.654777,0.50005,0.355428,0.50005,0.500051,0.50005,0.50005,0.860715,0.007304,0.355428,0.099,0.007304,0.005203,0.50005,0.50005,0.216811,0.895006,0.500051,0.099,0.50005,0.500051,0.145223,0.843635,0.007304,0.007304,1,1,1,1,2012,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1,98,111281.078125,0.0,0.0,328109,1,98,42692,1,98,178250.5,1,98,-0.071903,0.0,0.0,66090,328109,1,98,79006632.0,8,1426004,8,784,0.0,0.0,0.030838,1,6,18,616550336.0,0.0,216490688.0,55641.738281,127465808.0,0.344716,0.0,25596.271484,0.0,4589864000.0,21.387133,0.0,0.0,616550336.0,184416.984375,329347263,0.890186,0.0,66091,0.500007,12188302975,5037.229004,1.0,0.009045,63503940.0,41701.070312,2221418,0.006048,0.0,441,0.500007,18390172,4973.64502,1.0,0.009045,1,3,350449152.0,109490.601562,157451120.0,0.425967,0.0,31626.5,0.500007,4708998000.0,4984.78125,1.0,0.009045,0.112712,-0.071832,-0.06999,-0.071898,0.0,-0.072083,0.0,0.568398,2.729383,0.0,0.0,63503940.0,184416.984375,329347263,0.890186,27,0.0,66091,12,12188302975,4983.239258,0.500007,1.0,0.009045,2803593000.0,875924.8125,1259608924,3.407737,0.0,253012,4.000054,37671983301,39878.25,8.0,0.072356,-3e-06,0.0,9.55265e-08,0.015419,-60.0,0.007093,0.0,0.0,0.0,1238.922852,35.322491,6,8,0.0,17801084,77751770.0,0.0,136784731,427,0.0,427,427,36008230000.0,2153700,136784731,3638,2153700,18059674,427,427,43969096.0,-19.395048,0.0,3638,0.0,0.0,670903154,13.144516,2153700,2153700,0.0,41688.722656,182088.4375,0.0,320338.9375,1.0,0.0,1.0,1.0,84328400.0,5043.793945,320338.9375,8.519906,5043.793945,42294.320312,1.0,1.0,102972.125,-0.045422,0.0,8.519906,0.0,0.0,1571201.75,0.030783,5043.793945,5043.793945,12,27,0.500051,0.043422,0.252926,0.50005,0.44007,0.50005,0.500051,0.50005,0.50005,0.58817,0.009905,0.44007,0.604402,0.009905,0.013207,0.50005,0.50005,0.851075,0.461625,0.500051,0.604402,0.50005,0.500051,0.486043,0.13201,0.009905,0.009905,1,1,1,1,2012,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
2,121,60627.515625,0.0,0.0,288507,1,121,141698,1,121,232103.5,1,121,-0.742656,0.0,0.0,56980,288507,1,121,55883520.0,6,1392621,6,726,0.0,0.0,0.031519,1,10,11,417311936.0,86400.0,115408352.0,30304.837891,69265768.0,0.187748,0.0,13941.128906,0.0,3036686000.0,7.270361,0.0,0.0,417311936.0,164609.859375,283680705,0.76751,0.0,56981,0.500007,9379634343,4994.775391,1.0,0.011146,137894336.0,91203.757812,115933733,0.312739,0.0,23211,0.500007,2116930422,4974.328125,1.0,0.011146,10,11,244382336.0,136420.703125,219145424.0,0.59284,0.0,44014.167969,0.500007,6356513000.0,4980.381348,1.0,0.011146,0.764903,-0.74348,-0.740264,-0.743296,0.0,-0.743769,0.0,-0.520317,2.112381,0.0,0.0,137894336.0,164609.859375,283680705,0.76751,19,0.0,56981,8,9379634343,4978.51416,0.500007,1.0,0.011146,1466294000.0,818524.25,1314872520,3.557042,0.0,264085,3.00004,38139079600,29882.287109,6.0,0.066874,-3e-06,0.0,9.76055e-08,0.015755,-60.0,0.007248,0.0,0.0,0.0,1572.22583,36.009151,5,5,0.0,660088424,1774167000.0,0.0,2890000476,9023,0.0,9023,9023,751519800000.0,45141527,2890000476,69238,45141527,344994350,9023,9023,844573440.0,-9.392215,0.0,69238,0.0,0.0,13747565783,277.589386,45141527,45141527,0.0,73156.203125,196627.203125,0.0,320292.625,1.0,0.0,1.0,1.0,83289352.0,5002.939941,320292.625,7.673501,5002.939941,38234.992188,1.0,1.0,93602.289062,-0.001041,0.0,7.673501,0.0,0.0,1523613.625,0.030765,5002.939941,5002.939941,8,19,0.500051,0.902801,0.795798,0.50005,0.176288,0.50005,0.500051,0.50005,0.50005,0.233481,0.012206,0.176288,0.323162,0.012206,0.012606,0.50005,0.50005,0.09038,0.148997,0.500051,0.323162,0.50005,0.500051,0.464432,0.84384,0.012206,0.012206,1,1,1,1,2012,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [1]:
dls = to.dataloaders(bs=64)
dls.show_batch()

NameError: ignored