In [1]:
import gzip
import math
import pickle
import zlib
import io

import pandas as pd
import numpy as np

# import scipy.stats

from sklearn.preprocessing import LabelEncoder

import engines
from utils import *

np.random.seed(2016)

transformers = {}

In [2]:
def assert_uniq(series, name):
    uniq = np.unique(series, return_counts=True)
    print("assert_uniq", name, uniq)

def custom_one_hot(df, features, name, names, dtype=np.int8, check=False):
    for n, val in names.items():
        new_name = "%s_%s" % (name, n)
        print(name, new_name)
        df[new_name] = df[name].map(lambda x: 1 if x == val else 0).astype(dtype)

        if check:
            assert_uniq(df[new_name], new_name)
        features.append(new_name)

In [3]:
def label_encode(df, features, name):
    df[name] = df[name].astype('str')
    if name in transformers: # test
        df[name] = transformers[name].transform(df[name])
    else: # train
        transformers[name] = LabelEncoder()
        df[name] = transformers[name].fit_transform(df[name])
    features.append(name)

In [4]:
def encode_top(s, count=100, dtype=np.int8):
    uniqs, freqs = np.unique(s, return_counts=True)
    top = sorted(zip(uniqs,freqs), key=lambda vk: vk[1], reverse = True)[:count]
    top_map = {uf[0]: l+1 for uf, l in zip(top, range(len(top)))}
    return s.map(lambda x: top_map.get(x, 0)).astype(dtype)

In [5]:
## load data
train_df = pd.read_csv('products.csv')
test_df = pd.read_csv('sampleSubmission.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
## fill missing values
#products['customerID'].fillna('BBID_0000', inplace=True)
train_df['promotion_description'].fillna('no_promo', inplace=True)
train_df['Gender'].fillna('no_gender', inplace=True)
train_df['State'].fillna('no_state', inplace=True)
train_df['PinCode'].fillna(-1, inplace=True)
train_df['DOB'].fillna("1", inplace=True)

In [7]:
#handling missing data for product_code
train_df = train_df[np.isfinite(train_df['product_code'])]

In [8]:
cnt_srs = train_df['product_code'].astype('int64').value_counts().reset_index().head(100)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs.columns = ['product_code', 'frequency_count']
cnt_srs

Unnamed: 0,product_code,frequency_count
0,300776411,60994
1,300776410,58437
2,108037568,51338
3,300785148,41500
4,108100382,35259
5,300785150,34794
6,300840018,31149
7,108100362,27963
8,108005676,27727
9,300111517,25360


In [9]:
train_df = train_df[train_df['product_code'].isin(cnt_srs.product_code)].reset_index(drop=True)

In [10]:
train_df.head()

Unnamed: 0,customerID,DOB,Gender,State,PinCode,transactionDate,store_code,store_description,till_no,transaction_number_by_till,promo_code,promotion_description,product_code,product_description,sale_price_after_promo,discountUsed
0,BBID_2041,1981-10-18,male,no_state,452001,2016-01-19,2655,BB-INDORE-MALHAR MEGA MALL,2,51535,NONPROMO,no_promo,300785147.0,BB-CB-16X20X168SWG-Suitable for ROI New,2.0,Payback
1,BBID_2041,1981-10-18,male,no_state,452001,2016-05-01,2655,BB-INDORE-MALHAR MEGA MALL,21,47586,NONPROMO,no_promo,108100335.0,RIDGE GOURD,34.2,Payback
2,BBID_2041,1981-10-18,male,no_state,452001,2016-03-20,2655,BB-INDORE-MALHAR MEGA MALL,2,72624,0000856620,AZ 5% OFF EMPLOYEE DISCOUNT,108100331.0,BOTTLE GOURD LONG,7.9,Payback
3,BBID_2041,1981-10-18,male,no_state,452001,2016-03-20,2655,BB-INDORE-MALHAR MEGA MALL,2,72624,0000856620,AZ 5% OFF EMPLOYEE DISCOUNT,108100306.0,CORIANDER,4.92,Payback
4,BBID_2041,1981-10-18,male,no_state,452001,2016-03-20,2655,BB-INDORE-MALHAR MEGA MALL,2,72624,0000856620,AZ 5% OFF EMPLOYEE DISCOUNT,108100296.0,CAULIFLOWER,19.95,Payback


In [11]:
train_df['DOB'].replace('NANA', None, inplace=True)
train_df['DOB'].replace('1', None, inplace=True)


In [12]:
train_df = train_df[['customerID', 'Gender', 'State', 'transactionDate', 'product_code']]

In [13]:
train_df.head()

Unnamed: 0,customerID,Gender,State,transactionDate,product_code
0,BBID_2041,male,no_state,2016-01-19,300785147.0
1,BBID_2041,male,no_state,2016-05-01,108100335.0
2,BBID_2041,male,no_state,2016-03-20,108100331.0
3,BBID_2041,male,no_state,2016-03-20,108100306.0
4,BBID_2041,male,no_state,2016-03-20,108100296.0


In [14]:
train_df = pd.get_dummies(train_df, columns=['product_code'],prefix='_')

In [15]:
train_df.columns=train_df.columns.str.replace('_','')
train_df.reset_index(drop=True)
train_df.head()

Unnamed: 0,customerID,Gender,State,transactionDate,100105505.0,108000537.0,108000568.0,108000707.0,108001125.0,108001127.0,...,300808923.0,300840018.0,300942704.0,300974316.0,300974360.0,500096181.0,1000010411.0,1000058092.0,1000336252.0,1000609658.0
0,BBID_2041,male,no_state,2016-01-19,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,BBID_2041,male,no_state,2016-05-01,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,BBID_2041,male,no_state,2016-03-20,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,BBID_2041,male,no_state,2016-03-20,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,BBID_2041,male,no_state,2016-03-20,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
state_dict = {'MADHY PRADESH':'MADHYA PRADESH', 'TAMILNADU':'TAMIL NADU', 'MADHYA  PRADESH':'MADHYA PRADESH', 'HARAYANA':'HARYANA',
             'Jharkhand':'JHARKHAND','Tamilnadu':'TAMIL NADU','Tamil Nadu':'TAMIL NADU','Madhya Pradesh':'MADHYA PRADESH',
             'REST OF WEST BENGAL':'WEST BENGAL', 'west bengal':'WEST BENGAL','Uttar Pradesh':'UTTAR PRADESH', 'Delhi':'DELHI',
             'Bhopal':'BHOPAL','CHHATISGARH':'CHHATTISGARH','CHATTISGARH':'CHHATTISGARH', 'jharkhand':'JHARKHAND','Chandigarh':'CHANDIGARH',
             'UTTAR PRADESH WEST': 'UTTAR PRADESH','ODISHA':'ORISSA','MAHARASTRA':'MAHARASHTRA','madhya pradesh':'MADHYA PRADESH',
             'KARNATAK':'KARNATAKA','JAMMU and KASHMIR':'JAMMU AND KASHMIR','JAMMU KASHMIR':'JAMMU AND KASHMIR','Rajasthan':'RAJASTHAN',
             'east singhbhum':'JHARKHAND', 'ORRISA':'ORISSA','Andhra Pradesh':'ANDHRA PRADESH', 'UTTARANCHAL':'UTTARAKHAND',
             'Uttar pradesh':'UTTAR PRADESH','Maharashtra':'MAHARASHTRA','MP':'MADHYA PRADESH', 'UTTAR PRADESH EAST':'UTTAR PRADESH',
             'Punjab':'PUNJAB','maharashtra':'MAHARASHTRA','Karnataka':'KARNATAKA','M.P.':'MADHYA PRADESH','DAMAN':'DAMAN AND DIU',
             'HUBLI':'KARNATAKA','Tamil nadu':'TAMIL NADU','GUJRAT':'GUJARAT', 'Mp':'MADHYA PRADESH','Madhya pradesh':'MADHYA PRADESH',
             'West Bengal':'WEST BENGAL','Gujarat':'GUJARAT','UP':'UTTAR PRADESH','Chennai':'CHENNAI', 'm.p.':'MADHYA PRADESH',
             'kerala':'KERALA'}

In [17]:
train_df.replace({"State": state_dict}, inplace=True)

In [18]:
train_df.State.value_counts()

MADHYA PRADESH                 474002
KARNATAKA                      236396
no_state                       176766
TAMIL NADU                     144976
JHARKHAND                      116863
PUNJAB                         113906
MAHARASHTRA                     19413
UTTAR PRADESH                   12526
WEST BENGAL                     11161
DELHI                            6762
RAJASTHAN                        4936
ANDHRA PRADESH                   4596
BIHAR                            4577
CHHATTISGARH                     4553
GUJARAT                          4498
HARYANA                          4486
ORISSA                           4141
KERALA                           1923
x                                1185
ASSAM                             815
HIMACHAL PRADESH                  783
UTTARAKHAND                       556
DUMMY                             526
GOA                               498
JAMMU AND KASHMIR                 179
PONDICHERRY                       176
CHANDIGARH  

In [19]:
cnt_srs_state = train_df['State'].value_counts().reset_index().head(18)

In [20]:
cnt_srs_state['index']

0     MADHYA PRADESH
1          KARNATAKA
2           no_state
3         TAMIL NADU
4          JHARKHAND
5             PUNJAB
6        MAHARASHTRA
7      UTTAR PRADESH
8        WEST BENGAL
9              DELHI
10         RAJASTHAN
11    ANDHRA PRADESH
12             BIHAR
13      CHHATTISGARH
14           GUJARAT
15           HARYANA
16            ORISSA
17            KERALA
Name: index, dtype: object

In [21]:
train_df = train_df.loc[train_df['State'].isin(cnt_srs_state['index'])]

In [22]:
train_df['transactionDate'] =  pd.to_datetime(train_df['transactionDate'], format='%Y-%m-%d')

In [23]:
train_df.to_csv('all_clean_100prods_18_states.csv', index=False)

In [25]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1346481 entries, 0 to 1351980
Columns: 104 entries, customerID to 1000609658.0
dtypes: datetime64[ns](1), object(3), uint8(100)
memory usage: 179.8+ MB


In [24]:
train_df['State'].value_counts()

MADHYA PRADESH    474002
KARNATAKA         236396
no_state          176766
TAMIL NADU        144976
JHARKHAND         116863
PUNJAB            113906
MAHARASHTRA        19413
UTTAR PRADESH      12526
WEST BENGAL        11161
DELHI               6762
RAJASTHAN           4936
ANDHRA PRADESH      4596
BIHAR               4577
CHHATTISGARH        4553
GUJARAT             4498
HARYANA             4486
ORISSA              4141
KERALA              1923
Name: State, dtype: int64

In [26]:
def apply_transforms(train_df):
    features = []
    with Timer("apply transforms"):
        label_encode(train_df, features, "Gender")
        # label_encode(train_df, features, "nomprov") # use cod_prov only
        label_encode(train_df, features, "State")
        
        train_df["transactionDate_month"] = train_df["transactionDate"].map(lambda x: int(x.split("-")[1])).astype(np.int8)
        features.append("transactionDate_month")
        train_df["transactionDate_year"] = train_df["transactionDate"].map(lambda x: float(x.split("-")[0])).astype(np.int16)
        features.append("transactionDate_month")
        
        train_df['transactionDate'] =  pd.to_datetime(train_df['transactionDate'], format='%Y-%m-%d')
        Y = train_df['transactionDate'].dt.year
        M = train_df['transactionDate'].dt.month
        D = train_df['transactionDate'].dt.day
        int_date = ((Y) - 2017) * 12 + (M)

        
        #train_df["int_date"] = train_df["transactionDate"].apply(date_to_int).astype(np.int8)
        train_df["int_date"] = int_date
        
        custom_one_hot(train_df, features, "Gender", {"male":"MALE", "female":"FEMALE", 'no_gender':'NO_GENDER'})
        
    return train_df, tuple(features)

In [27]:
def make_prev_df(train_df, step):
    with Timer("make prev%s DF" % step):
        prev_df = pd.DataFrame()
        prev_df["customerID"] = train_df["customerID"]
        prev_df["int_date"] = train_df["int_date"].map(lambda x: x+step).astype(np.int8)
        prod_features = ["%s_prev%s" % (prod, step) for prod in products]
        for prod, prev in zip(products, prod_features):
            prev_df[prev] = train_df[prod]
    return prev_df, tuple(prod_features)

In [28]:
def load_data(fname="all_clean.csv"):
    with Timer("load train csv"):
        train_df = pd.read_csv(fname, dtype=dtypes)

    with Timer("fill products NA"):
        for prod in products:
            train_df[prod] = train_df[prod].fillna(0.0).astype(np.int8)

    train_df, features = apply_transforms(train_df)

    prev_dfs = []

    prod_features = None

    use_features = frozenset([1,2])
    for step in range(1,3):
        prev1_train_df, prod1_features = make_prev_df(train_df, step)
        prev_dfs.append(prev1_train_df)
        if step in use_features:
            features += prod1_features
        if step == 1:
            prod_features = prod1_features

    return train_df, prev_dfs, features, prod_features

In [29]:
def join_with_prev(df, prev_df, how):
    with Timer("join %s" % how):
        assert set(df.columns.values.tolist()) & set(prev_df.columns.values.tolist()) == set(["customerID", "int_date"])
        print("before join", len(df))
        df = df.merge(prev_df, on=["customerID", "int_date"], how=how)
        for f in set(prev_df.columns.values.tolist()) - set(["customerID", "int_date"]):
            df[f] = df[f].astype(np.float16)
        print("after join", len(df))
        return df


In [34]:
def make_data():
    train_df, prev_dfs, features, prod_features = load_data()

    for i, prev_df in enumerate(prev_dfs):
        with Timer("join train with prev%s" % (i+1)):
            how = "inner" if i == 0 else "left"
            train_df = join_with_prev(train_df, prev_df, how=how)
            
    for prod in products:
        print()
        print(prod)
        for begin, end in [(2,3),(2,5)]:
            prods = ["%s_prev%s" % (prod, i) for i in range(begin,end+1)]
            mp_df = train_df.as_matrix(columns=prods)
            print(prods)

            minf = "%s_min_%s_%s"%(prod,begin,end)
            train_df[minf] = np.nanmin(mp_df, axis=1).astype(np.int8)

            maxf = "%s_max_%s_%s"%(prod,begin,end)
            train_df[maxf] = np.nanmax(mp_df, axis=1).astype(np.int8)

            features += (minf,maxf,)
            
    for prod in products:
        print()
        print(prod)
        for begin, end in [(2,3),(2,5)]:
            prods = ["%s_prev%s" % (prod, i) for i in range(begin,end+1)]
            mp_df = train_df.as_matrix(columns=prods)
            print(prods)

            minf = "%s_min_%s_%s"%(prod,begin,end)
            train_df[minf] = np.nanmin(mp_df, axis=1).astype(np.int8)

            maxf = "%s_max_%s_%s"%(prod,begin,end)
            train_df[maxf] = np.nanmax(mp_df, axis=1).astype(np.int8)

            features += (minf,maxf,)
            
    with Timer("Remove unused columns"):
        leave_columns = ["customerID", "int_date", "transactionDate"] + list(products) + list(features)
        #assert len(leave_columns) == len(set(leave_columns))
        train_df = train_df[leave_columns]

    return train_df, features, prod_features


In [31]:
def make_submission(f, Y_test, C):
    Y_ret = []
    with Timer("make submission"):
        f.write("customerID,products\n".encode('utf-8'))
        for c, y_test in zip(C, Y_test):
            y_prods = [(y,p,ip) for y,p,ip in zip(y_test, products, range(len(products)))]
            y_prods = sorted(y_prods, key=lambda a: a[0], reverse=True)[:20]
            Y_ret.append([ip for y,p,ip in y_prods])
            y_prods = [p for y,p,ip in y_prods]
            f.write(("%s,%s\n" % (int(c), " ".join(y_prods))).encode('utf-8'))
    return Y_ret


In [32]:
def train_predict(all_df, features, prod_features, str_date, cv):
    
    
    train_df['transactionDate'] =  pd.to_datetime(train_df['transactionDate'], format='%Y-%m-%d')
    Y = train_df['transactionDate'].dt.year
    M = train_df['transactionDate'].dt.month
    D = train_df['transactionDate'].dt.day
    int_date = ((Y) - 2017) * 12 + (M)
    train_df["int_date"] = int_date

    
    test_date = date_to_int(str_date)
    train_df = all_df[all_df.int_date < test_date]
    test_df = pd.DataFrame(all_df[all_df.int_date == test_date])
    print(sorted(set(train_df.columns.values.tolist())))
    print(len(train_df.columns.values.tolist()), len(set(train_df.columns.values.tolist())))
    print(len(features),len(set(features)))

    X = []
    Y = []
    for i, prod in enumerate(products):
        prev = prod + "_prev1"
        prX = train_df[(train_df[prod] == 1) & (train_df[prev] == 0)]
        prY = np.zeros(prX.shape[0], dtype=np.int8) + i
        X.append(prX)
        Y.append(prY)
        print(prod, prX.shape)


    XY = pd.concat(X)
    Y = np.hstack(Y)
    XY["y"] = Y
    XY["url"] = np.zeros(len(XY), dtype=np.int8)

    del train_df
    del all_df


    XY["customerID_transactionDate"] = XY["customerID"].astype(str) + XY["transactionDate"]
    uniqs, counts = np.unique(XY["customerID_transactionDate"], return_counts=True)
    weights = np.exp(1/counts - 1)
    print(np.unique(counts, return_counts=True))
    print(np.unique(weights, return_counts=True))
    wdf = pd.DataFrame()
    wdf["customerID_transactionDate"] = uniqs
    wdf["counts"] = counts
    wdf["weight"] = weights
    print("before merge", len(XY))
    XY = XY.merge(wdf, on="customerID_transactionDate")
    print("after merge", len(XY))

    print(XY.shape)
    
    mask = np.random.rand(len(XY)) < 0.8
    XY_train = XY[mask]
    XY_validate = XY[~mask]

    with Timer("prepare test data"):
        test_df["y"] = test_df["customerID"]
        test_df["url"] = np.zeros(len(test_df), dtype=np.int8)
        test_df["weight"] = np.ones(len(test_df), dtype=np.int8)
        Y_prev = test_df.as_matrix(columns=prod_features)
        C = test_df.as_matrix(columns=["customerID"])
        for prod in products:
            prev = prod + "_prev1"
            padd = prod + "_add"
            test_df[padd] = test_df[prod] - test_df[prev]
        test_add_mat = test_df.as_matrix(columns=[prod + "_add" for prod in products])
        test_add_list = [list() for i in range(len(C))]
        assert test_add_mat.shape == (len(C), len(products))
        count = 0
        for c in range(len(C)):
            for p in range(len(products)):
                if test_add_mat[c,p] > 0:
                    test_add_list[c].append(p)
                    count += 1
                    
    if cv:
        max_map20 = mapk(test_add_list, test_add_list, 20, 0.0)
        map20coef = float(len(test_add_list)) / float(sum([int(bool(a)) for a in test_add_list]))
        print("Max MAP@20", str_date, max_map20, max_map20*map20coef)
        
    
    with Timer("LightGBM"):
        Y_test_lgbm = engines.lightgbm(XY_train, XY_validate, test_df, features, XY_all = XY,
            restore = (str_date == "2017-06-28")
        )
        test_add_list_lightgbm = make_submission(io.BytesIO() if cv else gzip.open("tmp/%s.lightgbm.csv.gz" % str_date, "wb"),
                                                  Y_test_lgbm - Y_prev, C)
        if cv:
            map20lightgbm = mapk(test_add_list, test_add_list_lightgbm, 20, 0.0)
            print("LightGBMlib MAP@20", str_date, map20lightgbm, map20lightgbm*map20coef)
            
    with Timer("XGBoost"):
        Y_test_xgb = engines.xgboost(XY_train, XY_validate, test_df, features, XY_all = XY,
            restore = (str_date == "2017-06-28")
        )
        test_add_list_xgboost = make_submission(io.BytesIO() if cv else gzip.open("tmp/%s.xgboost.csv.gz" % str_date, "wb"),
                                                Y_test_xgb - Y_prev, C)
        if cv:
            map20xgboost = mapk(test_add_list, test_add_list_xgboost, 20, 0.0)
            print("XGBoost MAP@20", str_date, map20xgboost, map20xgboost*map20coef)
            
    Y_test = np.sqrt(np.multiply(Y_test_xgb, Y_test_lgbm))

    test_add_list_xl = make_submission(io.BytesIO() if cv else gzip.open("tmp/%s.xgboost-lightgbm.csv.gz" % str_date, "wb"),
                                       Y_test - Y_prev, C)
    if cv:
        map20xl = mapk(test_add_list, test_add_list_xl, 20, 0.0)
        print("XGBoost+LightGBM MAP@20", str_date, map7xl, map20xl*map20coef)


In [37]:
if __name__ == "__main__":
    if True:
        all_df, features, prod_features = make_data()
        with Timer("save data"):
            #all_df.to_pickle("tmp/cv_data.pickle")
            all_df.to_csv("cv_data.csv", index=False)
            pickle.dump((features, prod_features), open("tmp/cv_meta.pickle", "wb"))
    else:
        with Timer("restore data"):
            all_df = pd.read_pickle("tmp/cv_data.pickle")
            (features, prod_features) = pickle.load(open("tmp/cv_meta.pickle", "rb"))


    train_predict(all_df, features, prod_features, "2017-05-28", cv=True)
    train_predict(all_df, features, prod_features, "2017-06-28", cv=False)


load train csv...
load train csv: cpu 3.75, time 3.75

fill products NA...
fill products NA: cpu 0.49, time 0.49

apply transforms...
Gender Gender_male
Gender Gender_female
Gender Gender_no_gender
apply transforms: cpu 3.17, time 3.17

make prev1 DF...
make prev1 DF: cpu 0.32, time 0.33

make prev2 DF...
make prev2 DF: cpu 0.28, time 0.28

join train with prev1...
join inner...
before join 584334
after join 1062419
join inner: cpu 0.74, time 0.74

join train with prev1: cpu 0.75, time 0.75

join train with prev2...
join left...
before join 1062419
after join 10112124
join left: cpu 13.22, time 13.22

join train with prev2: cpu 13.23, time 13.24


300776411.0
['300776411.0_prev2', '300776411.0_prev3']




['300776411.0_prev2', '300776411.0_prev3', '300776411.0_prev4', '300776411.0_prev5']

300776410.0
['300776410.0_prev2', '300776410.0_prev3']
['300776410.0_prev2', '300776410.0_prev3', '300776410.0_prev4', '300776410.0_prev5']

108037568.0
['108037568.0_prev2', '108037568.0_prev3']
['108037568.0_prev2', '108037568.0_prev3', '108037568.0_prev4', '108037568.0_prev5']

300785148.0
['300785148.0_prev2', '300785148.0_prev3']
['300785148.0_prev2', '300785148.0_prev3', '300785148.0_prev4', '300785148.0_prev5']

108100382.0
['108100382.0_prev2', '108100382.0_prev3']
['108100382.0_prev2', '108100382.0_prev3', '108100382.0_prev4', '108100382.0_prev5']

300785150.0
['300785150.0_prev2', '300785150.0_prev3']
['300785150.0_prev2', '300785150.0_prev3', '300785150.0_prev4', '300785150.0_prev5']

300840018.0
['300840018.0_prev2', '300840018.0_prev3']
['300840018.0_prev2', '300840018.0_prev3', '300840018.0_prev4', '300840018.0_prev5']

108100362.0
['108100362.0_prev2', '108100362.0_prev3']
['108100362.0



['300776411.0_prev2', '300776411.0_prev3', '300776411.0_prev4', '300776411.0_prev5']

300776410.0
['300776410.0_prev2', '300776410.0_prev3']
['300776410.0_prev2', '300776410.0_prev3', '300776410.0_prev4', '300776410.0_prev5']

108037568.0
['108037568.0_prev2', '108037568.0_prev3']
['108037568.0_prev2', '108037568.0_prev3', '108037568.0_prev4', '108037568.0_prev5']

300785148.0
['300785148.0_prev2', '300785148.0_prev3']
['300785148.0_prev2', '300785148.0_prev3', '300785148.0_prev4', '300785148.0_prev5']

108100382.0
['108100382.0_prev2', '108100382.0_prev3']
['108100382.0_prev2', '108100382.0_prev3', '108100382.0_prev4', '108100382.0_prev5']

300785150.0
['300785150.0_prev2', '300785150.0_prev3']
['300785150.0_prev2', '300785150.0_prev3', '300785150.0_prev4', '300785150.0_prev5']

300840018.0
['300840018.0_prev2', '300840018.0_prev3']
['300840018.0_prev2', '300840018.0_prev3', '300840018.0_prev4', '300840018.0_prev5']

108100362.0
['108100362.0_prev2', '108100362.0_prev3']
['108100362.0

AttributeError: 'str' object has no attribute 'dt'