In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.



from sklearn.model_selection import train_test_split
import lightgbm as lgb
import gc


path = '../input/' 
path_train = path + 'train.csv'
path_test = path + 'test.csv'

train_cols = ['ip', 'app', 'device', 'os', 'channel', 'click_time', 'is_attributed']
test_cols = ['ip', 'app', 'device', 'os', 'channel', 'click_time']

dtypes = {
        'ip'            : 'uint32',
        'app'           : 'uint16',
        'device'        : 'uint16',
        'os'            : 'uint16',
        'channel'       : 'uint16',
        'is_attributed' : 'uint8',
        'click_id'      : 'uint32'
        }
        
skip = range(1, 140000000)
print("Loading Data")
#skiprows=skip, 
train = pd.read_csv(path_train, dtype=dtypes,
        header=0,usecols=train_cols,parse_dates=["click_time"])#.sample(1000)
test = pd.read_csv(path_test, dtype=dtypes, header=0,
        usecols=test_cols,parse_dates=["click_time"])#.sample(1000)
#test['is_attributed'] = -1

len_train = len(train)
print('The initial size of the train set is', len_train)
print('The initial size of the test set is', len(test))
print('Binding the training and test set together...')



print("Creating new time features in train: 'hour' and 'day'...")
train['hour'] = train["click_time"].dt.hour.astype('uint8')
train['day'] = train["click_time"].dt.day.astype('uint8')


print("Creating new time features in test: 'hour' and 'day'...")
test['hour'] = test["click_time"].dt.hour.astype('uint8')
test['day'] = test["click_time"].dt.day.astype('uint8')

['test.csv', 'test_supplement.csv', 'train.csv', 'train_sample.csv', 'y.pickle']
Loading Data
The initial size of the train set is 184903890
The initial size of the test set is 18790469
Binding the training and test set together...
Creating new time features in train: 'hour' and 'day'...
Creating new time features in test: 'hour' and 'day'...


In [2]:
def prepare_data(data, training_day, profile_days, sample_count=1):
    if sample_count != 1:
        #sample 1/4 of the data:
        data = data.set_index('ip').loc[lambda x: (x.index + 401) % sample_count == 0].reset_index()
        len_train = len(data)
        print('len after sample:', len_train)

    train_ip_contains_training_day = data.groupby('ip').filter(lambda x: x['day'].max() == training_day)

    print('train_ip_contains_training_day', train_ip_contains_training_day)
    print('train_ip_contains_training_day unique ips:', len(train_ip_contains_training_day['ip'].unique()))

    train_ip_contains_training_day = train_ip_contains_training_day \
        .query('day < {0} & day > {1}'.format(training_day, training_day - 1 - profile_days) )
    print('train_ip_contains_training_day unique ips:', len(train_ip_contains_training_day['ip'].unique()))

    print('split attributed data:')
    train_ip_contains_training_day_attributed = train_ip_contains_training_day.query('is_attributed == 1')
    print('len:',len(train_ip_contains_training_day_attributed))

    #only use data on 9 to train, but data before 9 as features
    train = data.query('day == {}'.format(training_day))
    print('training data len:', len(train))
    
    return train, train_ip_contains_training_day, train_ip_contains_training_day_attributed

def add_statistic_feature(group_by_cols, training, training_hist, training_hist_attribution, 
                          with_hist, counting_col='channel'):
    features_added = []
    feature_name_added = '_'.join(group_by_cols) + 'count'
    print('count ip with group by:', group_by_cols)
    n_chans = training[group_by_cols + [counting_col]].groupby(by=group_by_cols)[[counting_col]] \
        .count().reset_index().rename(columns={counting_col: feature_name_added})
    training = training.merge(n_chans, on=group_by_cols, how='left')
    del n_chans
    gc.collect()
    training[feature_name_added] = training[feature_name_added].astype('uint16')
    features_added.append(feature_name_added)
    
    if with_hist:
        print('count ip with group by in hist data:', group_by_cols)
        feature_name_added = '_'.join(group_by_cols) + "count_in_hist"
        n_chans = training_hist[group_by_cols + [counting_col]].groupby(by=group_by_cols)[[counting_col]] \
            .count().reset_index().rename(columns={counting_col: feature_name_added})
        training = training.merge(n_chans, on=group_by_cols, how='left')
        del n_chans
        gc.collect()
        #training[feature_name_added] = training[feature_name_added].astype('uint16')
        print('count ip attribution with group by in hist data:', group_by_cols)
        feature_name_added1 = '_'.join(group_by_cols) + "count_attribution_in_hist"
        n_chans = training_hist_attribution[group_by_cols + [counting_col]] \
            .groupby(by=group_by_cols)[[counting_col]] \
            .count().reset_index().rename(columns={counting_col: feature_name_added1 })
        training = training.merge(n_chans, on=group_by_cols, how='left')
        del n_chans
        gc.collect()
        #training[feature_name_added1] = training[feature_name_added1].astype('uint16')
                                               
        training['_'.join(group_by_cols) + "count_attribution_rate_in_hist"] = \
            training[feature_name_added1] / training[feature_name_added]
            
        features_added.append(feature_name_added)
        features_added.append(feature_name_added1)
        features_added.append('_'.join(group_by_cols) + "count_attribution_rate_in_hist")
        
    print('added features:', features_added)
                                               
    return training, features_added

def generate_counting_history_features(data, history, history_attribution):
        
    new_features = []

    # Count by IP,DAY,HOUR
    print('a given IP address within each hour...')
    data, features_added = add_statistic_feature(['ip','day','hour'], data, history, history_attribution, False)
    new_features = new_features + features_added
    gc.collect()

    # Count by IP and APP
    data, features_added = add_statistic_feature(['ip','app'], data, history, history_attribution, True)
    new_features = new_features + features_added
    
    # Count by IP and channel
    data, features_added = add_statistic_feature(['ip','channel'], data, history, history_attribution, True, counting_col='os')
    new_features = new_features + features_added
    
    # Count by IP and channel app
    data, features_added = add_statistic_feature(['ip','channel', 'app'], data, history, history_attribution, True, counting_col='os')
    new_features = new_features + features_added
    
    data, features_added  = add_statistic_feature(['ip','app','os'], data, history, history_attribution, True)
    new_features = new_features + features_added

    #######
    # Count by IP
    data, features_added  = add_statistic_feature(['ip'], data, history, history_attribution, True)
    new_features = new_features + features_added

    # Count by IP HOUR CHANNEL                                               
    data, features_added  = add_statistic_feature(['ip','hour','channel'], \
        data, history, history_attribution, True, counting_col='os')
    new_features = new_features + features_added

    # Count by IP HOUR Device
    data, features_added  = add_statistic_feature(['ip','hour','os'], \
        data, history, history_attribution, True)
    new_features = new_features + features_added

    data, features_added  = add_statistic_feature(['ip','hour','app'], \
        data, history, history_attribution, True)
    new_features = new_features + features_added

    data, features_added  = add_statistic_feature(['ip','hour','device'], \
        data, history, history_attribution, True)
    new_features = new_features + features_added
    
    return data, new_features

#test['hour'] = test["click_time"].dt.hour.astype('uint8')
#test['day'] = test["click_time"].dt.day.astype('uint8')

train, train_ip_contains_training_day, train_ip_contains_training_day_attributed = \
    prepare_data(train, 9, 3, 4)

train, new_features = generate_counting_history_features(train, train_ip_contains_training_day, 
                                                         train_ip_contains_training_day_attributed)

print('train data:', train)
print('new features:', new_features)

val = train.set_index('ip').loc[lambda x: (x.index) % 17 == 0].reset_index()
print(val)
print('The size of the validation set is ', len(val))

gc.collect()

train = train.set_index('ip').loc[lambda x: (x.index) % 17 != 0].reset_index()
print('The size of the train set is ', len(train))

target = 'is_attributed'
train[target] = train[target].astype('uint8')
train.info()

len after sample: 46849705
train_ip_contains_training_day               ip  app  device  os  channel          click_time  is_attributed  \
1          18787    3       1  16      379 2017-11-06 14:36:26              0   
2         124979    3       1  18      379 2017-11-06 14:40:16              0   
3          80447    3       1  19      379 2017-11-06 14:40:51              0   
4         134575    3       1  13      379 2017-11-06 14:43:10              0   
6         191759    3       1  13      379 2017-11-06 14:44:51              0   
7         209663    3       1  13      379 2017-11-06 14:48:55              0   
8         208347    3       1  19      379 2017-11-06 14:49:38              0   
10         28739    3       1  13      379 2017-11-06 14:50:29              0   
11        103175   18       1  17      376 2017-11-06 14:53:23              0   
12         74715    3       1  19      379 2017-11-06 14:55:25              0   
13        128855    3       1  13      379 2017-11-

count ip with group by in hist data: ['ip', 'hour', 'channel']
count ip attribution with group by in hist data: ['ip', 'hour', 'channel']
added features: ['ip_hour_channelcount', 'ip_hour_channelcount_in_hist', 'ip_hour_channelcount_attribution_in_hist', 'ip_hour_channelcount_attribution_rate_in_hist']
count ip with group by: ['ip', 'hour', 'os']
count ip with group by in hist data: ['ip', 'hour', 'os']
count ip attribution with group by in hist data: ['ip', 'hour', 'os']
added features: ['ip_hour_oscount', 'ip_hour_oscount_in_hist', 'ip_hour_oscount_attribution_in_hist', 'ip_hour_oscount_attribution_rate_in_hist']
count ip with group by: ['ip', 'hour', 'app']
count ip with group by in hist data: ['ip', 'hour', 'app']
count ip attribution with group by in hist data: ['ip', 'hour', 'app']
added features: ['ip_hour_appcount', 'ip_hour_appcount_in_hist', 'ip_hour_appcount_attribution_in_hist', 'ip_hour_appcount_attribution_rate_in_hist']
count ip with group by: ['ip', 'hour', 'device']
co

            ip  app  device  os  channel          click_time  is_attributed  \
0        18683   18       1  15      134 2017-11-09 00:00:00              0   
1        92735    3       1  19      137 2017-11-09 00:00:00              0   
2        85663   18       1  19      439 2017-11-09 00:00:00              0   
3       148155   12       1  16      481 2017-11-09 00:00:00              0   
4       164475   12       1  14      178 2017-11-09 00:00:00              0   
5       107219   11       1  77      219 2017-11-09 00:00:00              0   
6       109735   12       1  17      265 2017-11-09 00:00:00              0   
7       123675   13       1  13      477 2017-11-09 00:00:00              0   
8       123675   13       1  13      477 2017-11-09 00:00:00              0   
9        84847   12       1  19      178 2017-11-09 00:00:00              0   
10       31467    3       1  19      466 2017-11-09 00:00:00              0   
11      108919   11       1  13      122 2017-11-09 

The size of the train set is  12678368
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12678368 entries, 0 to 12678367
Data columns (total 46 columns):
ip                                               uint64
app                                              uint16
device                                           uint16
os                                               uint16
channel                                          uint16
click_time                                       datetime64[ns]
is_attributed                                    uint8
hour                                             uint8
day                                              uint8
ip_day_hourcount                                 uint16
ip_appcount                                      uint16
ip_appcount_in_hist                              float64
ip_appcount_attribution_in_hist                  float64
ip_appcount_attribution_rate_in_hist             float64
ip_channelcount                                  uint1

In [3]:
print(train[['ip_channelcount','ip_channel_appcount']])

          ip_channelcount  ip_channel_appcount
0                       2                    2
1                     133                  130
2                      17                   17
3                       6                    6
4                      19                   10
5                      46                   46
6                      44                   44
7                      31                   31
8                      62                   10
9                      15                    1
10                      7                    2
11                     14                   14
12                     38                   38
13                     75                   75
14                     15                   10
15                    382                  382
16                     61                   12
17                     18                   18
18                     24                   13
19                     41                    6
20           

In [4]:
predictors0 = ['device', 'app', 'os', 'channel', 'hour', # Starter Vars, Then new features below
              'ip_day_hourcount','ipcount','ip_appcount', 'ip_app_oscount',
              "ip_hour_channelcount", "ip_hour_oscount", "ip_hour_appcount","ip_hour_devicecount"]

categorical = ['app', 'device', 'os', 'channel', 'hour']

predictors1 = categorical + new_features
#for ii in new_features:
#    predictors1 = predictors1 + ii
#print(predictors1)
gc.collect()

#train.fillna(value={x:-1 for x in new_features})

print("Preparing the datasets for training...")

params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'auc',
    'learning_rate': 0.1,
    'num_leaves': 7,  
    'max_depth': 4,  
    'min_child_samples': 100,  
    'max_bin': 150,  
    'subsample': 0.7,  
    'subsample_freq': 1,  
    'colsample_bytree': 0.7,  
    'min_child_weight': 0,  
    'subsample_for_bin': 200000,  
    'min_split_gain': 0,  
    'reg_alpha': 0,  
    'reg_lambda': 0,  
    'nthread': 5,
    'verbose': 9,
    #'is_unbalance': True,
    'scale_pos_weight':99 
    }
    
predictors_to_train = [predictors1]

for predictors in predictors_to_train:
    print('training with :', predictors)
    #print('training data: ', train[predictors].values)
    #print('validation data: ', val[predictors].values)
    dtrain = lgb.Dataset(train[predictors].values, label=train[target].values,
                          feature_name=predictors,
                          categorical_feature=categorical
                          )
    dvalid = lgb.Dataset(val[predictors].values, label=val[target].values,
                          feature_name=predictors,
                          categorical_feature=categorical
                          )

    evals_results = {}
    print("Training the model...")

    lgb_model = lgb.train(params, 
                     dtrain, 
                     valid_sets=[dtrain, dvalid], 
                     valid_names=['train','valid'], 
                     evals_result=evals_results, 
                     num_boost_round=1000,
                     early_stopping_rounds=30,
                     verbose_eval=50, 
                     feval=None)

    #del train
    #del val
    #gc.collect()

    # Nick's Feature Importance Plot
    import matplotlib.pyplot as plt
    f, ax = plt.subplots(figsize=[7,10])
    lgb.plot_importance(lgb_model, ax=ax, max_num_features=len(predictors))
    plt.title("Light GBM Feature Importance")
    plt.savefig('feature_import.png')

    # Feature names:
    print('Feature names:', lgb_model.feature_name())
    # Feature importances:
    print('Feature importances:', list(lgb_model.feature_importance()))

    feature_imp = pd.DataFrame(lgb_model.feature_name(),list(lgb_model.feature_importance()))
    
    
    

Preparing the datasets for training...
training with : ['app', 'device', 'os', 'channel', 'hour', 'ip_day_hourcount', 'ip_appcount', 'ip_appcount_in_hist', 'ip_appcount_attribution_in_hist', 'ip_appcount_attribution_rate_in_hist', 'ip_channelcount', 'ip_channelcount_in_hist', 'ip_channelcount_attribution_in_hist', 'ip_channelcount_attribution_rate_in_hist', 'ip_channel_appcount', 'ip_channel_appcount_in_hist', 'ip_channel_appcount_attribution_in_hist', 'ip_channel_appcount_attribution_rate_in_hist', 'ip_app_oscount', 'ip_app_oscount_in_hist', 'ip_app_oscount_attribution_in_hist', 'ip_app_oscount_attribution_rate_in_hist', 'ipcount', 'ipcount_in_hist', 'ipcount_attribution_in_hist', 'ipcount_attribution_rate_in_hist', 'ip_hour_channelcount', 'ip_hour_channelcount_in_hist', 'ip_hour_channelcount_attribution_in_hist', 'ip_hour_channelcount_attribution_rate_in_hist', 'ip_hour_oscount', 'ip_hour_oscount_in_hist', 'ip_hour_oscount_attribution_in_hist', 'ip_hour_oscount_attribution_rate_in_hi



Training until validation scores don't improve for 30 rounds.
[50]	train's auc: 0.968689	valid's auc: 0.969612
[100]	train's auc: 0.973703	valid's auc: 0.972061
[150]	train's auc: 0.975904	valid's auc: 0.972407
Early stopping, best iteration is:
[133]	train's auc: 0.975296	valid's auc: 0.972551
Feature names: ['app', 'device', 'os', 'channel', 'hour', 'ip_day_hourcount', 'ip_appcount', 'ip_appcount_in_hist', 'ip_appcount_attribution_in_hist', 'ip_appcount_attribution_rate_in_hist', 'ip_channelcount', 'ip_channelcount_in_hist', 'ip_channelcount_attribution_in_hist', 'ip_channelcount_attribution_rate_in_hist', 'ip_channel_appcount', 'ip_channel_appcount_in_hist', 'ip_channel_appcount_attribution_in_hist', 'ip_channel_appcount_attribution_rate_in_hist', 'ip_app_oscount', 'ip_app_oscount_in_hist', 'ip_app_oscount_attribution_in_hist', 'ip_app_oscount_attribution_rate_in_hist', 'ipcount', 'ipcount_in_hist', 'ipcount_attribution_in_hist', 'ipcount_attribution_rate_in_hist', 'ip_hour_channelc

In [5]:
for_test = True

if for_test:
    del train
    del test
    gc.collect()

    #prepare test data:
    train = pd.read_csv(path_train, dtype=dtypes,
            header=0,usecols=train_cols,parse_dates=["click_time"])#.sample(1000)
    test = pd.read_csv(path_test, dtype=dtypes, header=0,
            usecols=test_cols,parse_dates=["click_time"])#.sample(1000)
    train=train.append(test)
    del test
    gc.collect()
    print("Creating new time features in train: 'hour' and 'day'...")
    train['hour'] = train["click_time"].dt.hour.astype('uint8')
    train['day'] = train["click_time"].dt.day.astype('uint8')
    
    train, train_ip_contains_training_day, train_ip_contains_training_day_attributed = \
        prepare_data(train, 10, 3, 1)

    train, new_features = generate_counting_history_features(train, train_ip_contains_training_day, 
                                                             train_ip_contains_training_day_attributed)

Creating new time features in train: 'hour' and 'day'...
train_ip_contains_training_day           app  channel          click_time  device      ip  is_attributed  os  \
0           3      379 2017-11-06 14:32:21       1   83230            0.0  13   
1           3      379 2017-11-06 14:33:34       1   17357            0.0  19   
2           3      379 2017-11-06 14:34:12       1   35810            0.0  13   
3          14      478 2017-11-06 14:34:52       1   45745            0.0  13   
5           3      379 2017-11-06 14:36:26       1   18787            0.0  16   
6           3      379 2017-11-06 14:37:44       1  103022            0.0  23   
7           3      379 2017-11-06 14:37:59       1  114221            0.0  19   
9          64      459 2017-11-06 14:38:23       1   74544            0.0  22   
11          3      379 2017-11-06 14:38:51       1  105861            0.0  13   
13          3      379 2017-11-06 14:40:16       1  124979            0.0  18   
14          3      37

count ip with group by in hist data: ['ip', 'hour', 'channel']
count ip attribution with group by in hist data: ['ip', 'hour', 'channel']
added features: ['ip_hour_channelcount', 'ip_hour_channelcount_in_hist', 'ip_hour_channelcount_attribution_in_hist', 'ip_hour_channelcount_attribution_rate_in_hist']
count ip with group by: ['ip', 'hour', 'os']
count ip with group by in hist data: ['ip', 'hour', 'os']
count ip attribution with group by in hist data: ['ip', 'hour', 'os']
added features: ['ip_hour_oscount', 'ip_hour_oscount_in_hist', 'ip_hour_oscount_attribution_in_hist', 'ip_hour_oscount_attribution_rate_in_hist']
count ip with group by: ['ip', 'hour', 'app']
count ip with group by in hist data: ['ip', 'hour', 'app']
count ip attribution with group by in hist data: ['ip', 'hour', 'app']
added features: ['ip_hour_appcount', 'ip_hour_appcount_in_hist', 'ip_hour_appcount_attribution_in_hist', 'ip_hour_appcount_attribution_rate_in_hist']
count ip with group by: ['ip', 'hour', 'device']
co

In [6]:
    print('test data:', train)

    print('new features:', new_features)
    print("Preparing data for submission...")

    submit = pd.read_csv(path_test, dtype='int', usecols=['click_id'])
    print('submit test len:', len(submit))
    print("Predicting the submission data...")
    submit['is_attributed'] = lgb_model.predict(train[predictors1], num_iteration=lgb_model.best_iteration)

    print("Writing the submission data into a csv file...")

    submit.to_csv("submission.csv",index=False)

    print("All done...")
    

    
    

test data:           app  channel          click_time  device      ip  is_attributed  os  \
0           9      107 2017-11-10 04:00:00       1    5744            NaN   3   
1           9      466 2017-11-10 04:00:00       1  119901            NaN   3   
2          21      128 2017-11-10 04:00:00       1   72287            NaN  19   
3          15      111 2017-11-10 04:00:00       1   78477            NaN  13   
4          12      328 2017-11-10 04:00:00       1  123080            NaN  13   
5          18      107 2017-11-10 04:00:00       1  110769            NaN  13   
6           3      137 2017-11-10 04:00:00       1   12540            NaN   1   
7          27      153 2017-11-10 04:00:00       1   88637            NaN  19   
8          18      107 2017-11-10 04:00:00       1   14932            NaN  10   
9          12      424 2017-11-10 04:00:00       1  123701            NaN  53   
10         26      477 2017-11-10 04:00:00       1  106056            NaN  19   
11          3    

submit test len: 18790469
Predicting the submission data...
Writing the submission data into a csv file...
All done...


In [7]:

'''
Another CTR comp and so i suspect libffm will play its part, after all it is an atomic bomb for this kind of stuff.
A sci-kit learn inspired script to convert pandas dataframes into libFFM style data.

The script is fairly hacky (hey thats Kaggle) and takes a little while to run a huge dataset.
The key to using this class is setting up the features dtypes correctly for output (ammend transform to suit your needs)

Example below

'''


class FFMFormatPandas:
    def __init__(self):
        self.field_index_ = None
        self.feature_index_ = None
        self.y = None

    def fit(self, df, y=None):
        self.y = y
        print('fitting')
        df_ffm = df[df.columns.difference([self.y])]
        if self.field_index_ is None:
            self.field_index_ = {col: i for i, col in enumerate(df_ffm)}

        if self.feature_index_ is not None:
            last_idx = max(list(self.feature_index_.values()))

        if self.feature_index_ is None:
            self.feature_index_ = dict()
            last_idx = 0

        for col in df.columns:
            print('fit procssing ', col)
            vals = df[col].unique()
            for val in vals:
                if pd.isnull(val):
                    continue
                name = '{}_{}'.format(col, val)
                if name not in self.feature_index_:
                    self.feature_index_[name] = last_idx
                    last_idx += 1
            self.feature_index_[col] = last_idx
            last_idx += 1
        return self

    def fit_transform(self, df, y=None):
        self.fit(df, y)
        return self.transform(df)

    def transform_row_(self, idx, row, t):
        if idx % 1000 == 0:
            print('transforming idx: {}'.format(idx))
        ffm = []
        if self.y != None:
            ffm.append(str(row.loc[row.index == self.y][0]))
        if self.y is None:
            ffm.append(str(0))
            
        items = row.loc[row.index != self.y].to_dict().items()

        for col, val in items:
            col_type = t[col]
            name = '_'.join([col, val])
            if col_type.kind ==  'O':
                ffm.append(':'.join[self.field_index_[col], self.feature_index_[name],'1'])
            elif col_type.kind == 'i':
                ffm.append(':'.join([self.field_index_[col], self.feature_index_[col], val]))
        return ' '.join(ffm)

    def transform(self, df):
        t = df.dtypes.to_dict()
        print('transforming')
        return pd.Series({idx: self.transform_row_(idx, row, t) for idx, row in df.iterrows()})

ffm_train = FFMFormatPandas()
ffm_train_data = ffm_train.fit_transform(train, y='is_attributed')

print('FFM data:',ffm_train_data)

fitting
fit procssing  app
fit procssing  channel
fit procssing  click_time
fit procssing  device
fit procssing  ip
fit procssing  is_attributed
fit procssing  os
fit procssing  hour
fit procssing  day
fit procssing  ip_day_hourcount
fit procssing  ip_appcount
fit procssing  ip_appcount_in_hist
fit procssing  ip_appcount_attribution_in_hist
fit procssing  ip_appcount_attribution_rate_in_hist
fit procssing  ip_channelcount
fit procssing  ip_channelcount_in_hist
fit procssing  ip_channelcount_attribution_in_hist
fit procssing  ip_channelcount_attribution_rate_in_hist
fit procssing  ip_channel_appcount
fit procssing  ip_channel_appcount_in_hist
fit procssing  ip_channel_appcount_attribution_in_hist
fit procssing  ip_channel_appcount_attribution_rate_in_hist
fit procssing  ip_app_oscount
fit procssing  ip_app_oscount_in_hist
fit procssing  ip_app_oscount_attribution_in_hist
fit procssing  ip_app_oscount_attribution_rate_in_hist
fit procssing  ipcount
fit procssing  ipcount_in_hist
fit proc

TypeError: sequence item 1: expected str instance, int found