## Sqlite Database for Reproducability

This notebook extracts sample data from the live database. It includes the tables used for training, testing, and online serving

In [1]:
import sqlite3

In [2]:
conn = sqlite3.connect('data/db.db')

In [13]:
c = conn.cursor()

Connect to live db to extract and transfer data

In [3]:
import sqlalchemy as db
from sqlalchemy import create_engine
import psycopg2
import pandas as pd
import pickle

In [8]:
DATABASE_URI = 'postgres+psycopg2://postgres:power2020@host.docker.internal:5431/person_recommender'
engine = create_engine(DATABASE_URI)
meta = db.MetaData(engine)

## Pickling

I need to pickle every field that includes a array/list:
- clicked_before
- anbieterid_enc_user
- anbietermarktplatz_enc_user
- warengruppe_enc_user
- text_vec_user

In [47]:
def pickle_data(data):
    df = data.copy()
    columns = ['clicked_before', 'text_vec', 'anbieterid_enc_user', 'anbietermarktplatz_enc_user', 'warengruppe_enc_user', 'text_vec_user']
    for column in columns:
        try:
            df.loc[:,column] = [pickle.dumps(df.loc[i,column]) for i in range(len(df))]
        except KeyError:
            pass
    return (df)

In [48]:
def unpickle_data(data):
    df = data.copy()
    columns = ['clicked_before', 'text_vec', 'anbieterid_enc_user', 'anbietermarktplatz_enc_user', 'warengruppe_enc_user', 'text_vec_user']
    for column in columns:
        try:
            df[column] = [pickle.loads(df.loc[i,column]) for i in range(len(df))]
        except KeyError:
            pass
    return (df)

### Training data

In [9]:
train = pd.read_sql("SELECT * from target_training_enc limit 25000", engine)

In [10]:
train.head()

Unnamed: 0,index,datum_click,anbieter_artikelnummer,userid,clicked_before,pick,days_online_std,days_online_log_std,month_enc,anbietermarktplatz_enc,...,minve_log_std,usermkt_enc,anbieterid_enc_user,anbietermarktplatz_enc_user,warengruppe_enc_user,text_vec_user,preis_std_user,minve_std_user,preis_log_std_user,minve_log_std_user
0,2353009,2018-01-30 12:55:31,0035042810021768,1576653,"[00694944SC16045, 00694944EA6303, 007021001545...",0.0,1.399935,1.157657,1,1,...,1.664599,8,"[30, 70, 70, 70, 70, 70, 70, 93, 93, 93, 93, 9...","[1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, ...","[191, 32, 175, 139, 195, 53, 32, 82, 82, 50, 5...","[-0.218513947958127, -0.184354413151741, -0.53...",-0.100367,-0.008907,-1.04866,-0.985059
1,2353010,2018-01-30 12:55:32,003666136806,883068,"[00583832CH1004, 00583832CH1005, 003666138301,...",0.0,3.116889,1.532133,1,1,...,-1.06371,7,"[197, 197, 344, 344, 344, 344, 344]","[1, 1, 2, 2, 2, 2, 2]","[93, 93, 168, 168, 168, 168, 168]","[0.153136591560074, 0.00332540699413845, -0.05...",-0.091585,-0.014588,-0.733798,-2.530091
2,2353011,2018-01-30 12:55:37,0034309252378,1603793,[],1.0,-0.796577,-1.108962,1,1,...,1.461386,3,[],[],[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.0,0.0,0.807366,-0.011449
3,2353012,2018-01-30 12:55:44,00364442SCH-375b,623029,"[0012354691202, 0028237059490, 0033513018660, ...",0.0,1.835737,1.274403,1,1,...,0.067817,1,"[5, 70, 124, 157, 172]","[1, 1, 1, 1, 1]","[222, 149, 99, 59, 172]","[-0.122849941253662, -0.46886682510376, -0.401...",-0.117866,0.066932,-2.064024,2.105503
4,2353013,2018-01-30 12:55:44,004770784041 MIX,1501596,"[001895759005104, 001895759005108, 00189575900...",1.0,-0.22951,0.317311,1,8,...,0.399369,1,"[2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 29,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 189, 5, 182, 156, 156, 182, 156, 156, 182,...","[0.336847525388002, -0.332346421033144, -0.109...",-0.067205,0.029587,-0.122289,1.323163


In [11]:
train_pickled = pickle_data(train)

In [12]:
train_pickled.head()

Unnamed: 0,index,datum_click,anbieter_artikelnummer,userid,clicked_before,pick,days_online_std,days_online_log_std,month_enc,anbietermarktplatz_enc,...,minve_log_std,usermkt_enc,anbieterid_enc_user,anbietermarktplatz_enc_user,warengruppe_enc_user,text_vec_user,preis_std_user,minve_std_user,preis_log_std_user,minve_log_std_user
0,2353009,2018-01-30 12:55:31,0035042810021768,1576653,b'\x80\x03]q\x00(X\x0f\x00\x00\x0000694944SC16...,0.0,1.399935,1.157657,1,1,...,1.664599,8,b'\x80\x03]q\x00(K\x1eKFKFKFKFKFKFK]K]K]K]K]K]...,b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01K\x0...,b'\x80\x03]q\x00(K\xbfK K\xafK\x8bK\xc3K5K KRK...,b'\x80\x03]q\x00(G\xbf\xcb\xf8C\xda\x19\x99\xa...,-0.100367,-0.008907,-1.04866,-0.985059
1,2353010,2018-01-30 12:55:32,003666136806,883068,b'\x80\x03]q\x00(X\x0e\x00\x00\x0000583832CH10...,0.0,3.116889,1.532133,1,1,...,-1.06371,7,b'\x80\x03]q\x00(K\xc5K\xc5MX\x01MX\x01MX\x01M...,b'\x80\x03]q\x00(K\x01K\x01K\x02K\x02K\x02K\x0...,b'\x80\x03]q\x00(K]K]K\xa8K\xa8K\xa8K\xa8K\xa8e.',b'\x80\x03]q\x00(G?\xc3\x99\xfa\xd6I$\x89G?k=\...,-0.091585,-0.014588,-0.733798,-2.530091
2,2353011,2018-01-30 12:55:37,0034309252378,1603793,b'\x80\x03]q\x00.',1.0,-0.796577,-1.108962,1,1,...,1.461386,3,b'\x80\x03]q\x00.',b'\x80\x03]q\x00.',b'\x80\x03]q\x00.',b'\x80\x03]q\x00(G\x00\x00\x00\x00\x00\x00\x00...,0.0,0.0,0.807366,-0.011449
3,2353012,2018-01-30 12:55:44,00364442SCH-375b,623029,b'\x80\x03]q\x00(X\r\x00\x00\x000012354691202q...,0.0,1.835737,1.274403,1,1,...,0.067817,1,b'\x80\x03]q\x00(K\x05KFK|K\x9dK\xace.',b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01e.',b'\x80\x03]q\x00(K\xdeK\x95KcK;K\xace.',b'\x80\x03]q\x00(G\xbf\xbfs\x17\xff\xff\xff\xf...,-0.117866,0.066932,-2.064024,2.105503
4,2353013,2018-01-30 12:55:44,004770784041 MIX,1501596,b'\x80\x03]q\x00(X\x0f\x00\x00\x00001895759005...,1.0,-0.22951,0.317311,1,8,...,0.399369,1,b'\x80\x03]q\x00(K\x02K\x02K\x02K\x08K\x08K\x0...,b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01K\x0...,b'\x80\x03]q\x00(K\x05K\xbdK\x05K\xb6K\x9cK\x9...,b'\x80\x03]q\x00(G?\xd5\x8e\xe8\xecQ\xeb~G\xbf...,-0.067205,0.029587,-0.122289,1.323163


Create Table

In [14]:
train_pickled.dtypes

index                                   int64
datum_click                    datetime64[ns]
anbieter_artikelnummer                 object
userid                                 object
clicked_before                         object
pick                                  float64
days_online_std                       float64
days_online_log_std                   float64
month_enc                               int64
anbietermarktplatz_enc                  int64
anbieterid_enc                          int64
warengruppe_enc                         int64
text_vec                               object
preis_std                             float64
minve_std                             float64
preis_log_std                         float64
minve_log_std                         float64
usermkt_enc                             int64
anbieterid_enc_user                    object
anbietermarktplatz_enc_user            object
warengruppe_enc_user                   object
text_vec_user                     

In [15]:
c.execute('''CREATE TABLE target_training_enc
             ([index] integer PRIMARY KEY, [datum_click] datetime, [anbieter_artikelnummer] text, 
             [userid] text, [clicked_before] blob, [pick] integer, [days_online_std] real, [month_enc] integer, 
             [anbietermarktplatz_enc] integer, [anbieterid_enc] integer, [warengruppe_enc] integer, [text_vec] blob, 
             [preis_std] real, [minve_std] real, [usermkt_enc] integer, [anbieterid_enc_user] blob, 
             [anbietermarktplatz_enc_user] blob, [warengruppe_enc_user] blob, [text_vec_user] blob, [preis_std_user] real, 
             [minve_std_user] real, [days_online_log_std] real, [preis_log_std] real, [preis_log_std_user] real, 
             [minve_log_std] real, [minve_log_std_user] real)''')


<sqlite3.Cursor at 0x7f808be0cc70>

In [16]:
train_pickled.to_sql(name="target_training_enc",if_exists='append',con = conn, index = False)

Test extracting

In [17]:
# test
train_pickled = pd.read_sql("SELECT * from target_training_enc limit 1000", conn)

In [18]:
test = unpickle_data(train_pickled)

In [19]:
test.head()

Unnamed: 0,index,datum_click,anbieter_artikelnummer,userid,clicked_before,pick,days_online_std,month_enc,anbietermarktplatz_enc,anbieterid_enc,...,anbietermarktplatz_enc_user,warengruppe_enc_user,text_vec_user,preis_std_user,minve_std_user,days_online_log_std,preis_log_std,preis_log_std_user,minve_log_std,minve_log_std_user
0,2353009,2018-01-30 12:55:31,0035042810021768,1576653,"[00694944SC16045, 00694944EA6303, 007021001545...",0,1.399935,1,1,184,...,"[1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, ...","[191, 32, 175, 139, 195, 53, 32, 82, 82, 50, 5...","[-0.218513947958127, -0.184354413151741, -0.53...",-0.100367,-0.008907,1.157657,-1.115653,-1.04866,1.664599,-0.985059
1,2353010,2018-01-30 12:55:32,003666136806,883068,"[00583832CH1004, 00583832CH1005, 003666138301,...",0,3.116889,1,1,197,...,"[1, 1, 2, 2, 2, 2, 2]","[93, 93, 168, 168, 168, 168, 168]","[0.153136591560074, 0.00332540699413845, -0.05...",-0.091585,-0.014588,1.532133,0.759179,-0.733798,-1.06371,-2.530091
2,2353011,2018-01-30 12:55:37,0034309252378,1603793,[],1,-0.796577,1,1,177,...,[],[],"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.0,0.0,-1.108962,-1.423027,0.807366,1.461386,-0.011449
3,2353012,2018-01-30 12:55:44,00364442SCH-375b,623029,"[0012354691202, 0028237059490, 0033513018660, ...",0,1.835737,1,1,195,...,"[1, 1, 1, 1, 1]","[222, 149, 99, 59, 172]","[-0.122849941253662, -0.46886682510376, -0.401...",-0.117866,0.066932,1.274403,-0.667262,-2.064024,0.067817,2.105503
4,2353013,2018-01-30 12:55:44,004770784041 MIX,1501596,"[001895759005104, 001895759005108, 00189575900...",1,-0.22951,1,8,261,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 189, 5, 182, 156, 156, 182, 156, 156, 182,...","[0.336847525388002, -0.332346421033144, -0.109...",-0.067205,0.029587,0.317311,-0.434223,-0.122289,0.399369,1.323163


Same for test data

In [20]:
test = pd.read_sql("SELECT * from target_testing_enc limit 5000", engine)

In [21]:
test_pickled = pickle_data(test)

In [22]:
test_pickled.head()

Unnamed: 0,index,datum_click,anbieter_artikelnummer,userid,clicked_before,pick,days_online_std,days_online_log_std,month_enc,anbietermarktplatz_enc,...,minve_log_std,usermkt_enc,anbieterid_enc_user,anbietermarktplatz_enc_user,warengruppe_enc_user,text_vec_user,preis_std_user,minve_std_user,preis_log_std_user,minve_log_std_user
0,140290,2019-07-08 19:52:34,00101045TY37011,56506,b'\x80\x03]q\x00(X\r\x00\x00\x000010104517000q...,0.0,-0.801828,-1.157062,7,7,...,-1.06371,1,b'\x80\x03]q\x00(K.K.K.K.K.K.K.K?K?K?K?K?K?K?K...,b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01K\x0...,b'\x80\x03]q\x00(K\tKSKSK\xc2K\x9eK\tK\xc3K\xc...,b'\x80\x03]q\x00(G\xbf\xda\xf5b\x9e\x99\x99\x9...,-0.086228,-0.012997,-0.573079,-1.893238
1,140291,2019-07-08 19:52:34,0070854200000116,835762,b'\x80\x03]q\x00(X\x11\x00\x00\x0000481348MJ-K...,1.0,0.400565,0.772045,7,8,...,-1.06371,4,b'\x80\x03]q\x00(K\x03K\x05K\rK\x0eK;K=K>K>K>K...,b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01K\x0...,"b""\x80\x03]q\x00(KKK\xe2KKK\xc8K\x0eK\x9aK]K]K...","b""\x80\x03]q\x00(G\xbf\xb0Y\x853\n=rG?\xa6\xc6...",-0.072686,-0.002887,-0.237186,-0.256595
2,140292,2019-07-08 19:52:35,0034309254061,851967,b'\x80\x03]q\x00(X\x0e\x00\x00\x00006594839112...,1.0,-0.835082,-1.585571,7,1,...,1.175003,3,"b""\x80\x03]q\x00(K\x02K\xa8K\xa8K\xa8K\xa8K\xa...",b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01K\x0...,b'\x80\x03]q\x00(K\x05K\x14K\x14K\x14K\x14K\x1...,b'\x80\x03]q\x00(G?\xd8\xbf=\x06\x14z\xd9G\xbf...,-0.032945,-0.008406,0.431412,-0.906739
3,140293,2019-07-08 19:52:40,00762238V0101154,1678581,b'\x80\x03]q\x00.',0.0,-0.824581,-1.418454,7,2,...,-1.06371,5,b'\x80\x03]q\x00.',b'\x80\x03]q\x00.',b'\x80\x03]q\x00.',b'\x80\x03]q\x00(G\x00\x00\x00\x00\x00\x00\x00...,0.0,0.0,0.807366,-0.011449
4,140294,2019-07-08 19:52:41,00101045MEK3N000360,1525541,b'\x80\x03]q\x00(X\x15\x00\x00\x00004470993867...,0.0,-0.521795,-0.086512,7,7,...,-1.06371,6,b'\x80\x03]q\x00(K K K.K.K.K.K.K.K=K?K?K?K?K?K...,b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01K\x0...,b'\x80\x03]q\x00(K]K\xbdK2K\x0eK\xc3K\x94K\x94...,b'\x80\x03]q\x00(G\xbf\xd3\xb9\xd8S=p\xa3G\xbf...,-0.093543,0.035829,-0.797766,1.490141


In [23]:
c = conn.cursor()

In [24]:
c.execute('''CREATE TABLE target_testing_enc
             ([index] integer PRIMARY KEY, [datum_click] datetime, [anbieter_artikelnummer] text, 
             [userid] text, [clicked_before] blob, [pick] integer, [days_online_std] real, [month_enc] integer, 
             [anbietermarktplatz_enc] integer, [anbieterid_enc] integer, [warengruppe_enc] integer, [text_vec] blob, 
             [preis_std] real, [minve_std] real, [usermkt_enc] integer, [anbieterid_enc_user] blob, 
             [anbietermarktplatz_enc_user] blob, [warengruppe_enc_user] blob, [text_vec_user] blob, [preis_std_user] real, 
             [minve_std_user] real, [days_online_log_std] real, [preis_log_std] real, [preis_log_std_user] real, 
             [minve_log_std] real, [minve_log_std_user] real)''')


<sqlite3.Cursor at 0x7f8096573c70>

In [25]:
test_pickled.to_sql(name="target_testing_enc",if_exists='append',con = conn, index = False)

Ids to filter out

In [26]:
filter_ids = pd.read_sql("SELECT * from training_user_filter", engine)

In [27]:
filter_ids.head()

Unnamed: 0,userid
0,1559225
1,1608456
2,1657316
3,1575782
4,1587975


In [28]:
filter_ids.to_sql(name="training_user_filter",if_exists='append',con = conn, index = False)

### Serving Data

Items raw - just medium step in creation of data

In [29]:
items_raw = pd.read_sql("SELECT * from item_features_raw limit 50000", engine)

In [30]:
items_raw.head()

Unnamed: 0,anbieter_artikelnummer,anbieterid,anbietermarktplatz,warengruppe,text_clean,preis_euro,stueck_pro_ve,erstanlagedatum,datum_last_click
0,00382446LEY101408,382446,HU,Partyartikel_5246,"Suction Cup 3D Disney , Cars Suction Cup 3D Di...",0.807967,1.0,2015-04-01 15:09:48,2020-01-21 16:17:31
1,0017115816.06.58,171158,IT,Geschenkverpackung_5086,Wholesaler of checked fabric tape Checkered fa...,2.79,1.0,2020-01-08 10:30:43,2020-01-22 00:00:49
2,0000403434036,4034,DE,Lichterketten_5114,Christmas LED fairy lights made of copper wire...,2.1,20.0,2016-11-29 10:31:47,NaT
3,000042316333,4231,DE,Reiseartikel_5199,Luggage strap - New -. 4 different colors pack...,1.43,80.0,2016-11-04 11:23:29,NaT
4,00382446LEY101495,382446,HU,Haushaltswaren_5240,"Cutlery Set - 2 Piece Disney Verdas, Cars Plas...",0.538645,1.0,2015-05-10 08:33:45,2020-01-21 16:17:31


In [31]:
items_raw.to_sql(name="item_features_raw",if_exists='append',con = conn, index = False)

Items encoded

In [32]:
items = pd.read_sql("SELECT * from item_enc limit 50000", engine)

In [33]:
items.head()

Unnamed: 0,anbieter_artikelnummer,anbieterid_enc,anbietermarktplatz_enc,warengruppe_enc,text_vec,preis_std,minve_std,preis_log_std,minve_log_std,erstanlagedatum
0,00744013Cayley,501,8,81,"[0.0112834237515926, -0.117268852889538, 0.997...",-0.019027,-0.015527,0.951033,-1.06371,2018-05-10 17:47:30
1,00744013Christelle,501,8,33,"[0.158726245164871, -0.114233806729317, 0.7278...",-0.058951,-0.015527,0.618703,-1.06371,2018-06-17 12:19:51
2,00744013Clarissa,501,8,182,"[0.35150557756424, -0.203590989112854, 0.48789...",0.014842,-0.015527,1.151531,-1.06371,2018-06-17 12:23:37
3,00744013Dagna,501,8,43,"[-0.23165936768055, 0.0307953357696533, 0.5446...",-0.037405,-0.015527,0.815526,-1.06371,2018-04-20 23:57:34
4,00744013Hathor,501,8,100,"[0.11736374348402, 0.111603811383247, 0.895092...",-0.046042,-0.015527,0.742547,-1.06371,2018-05-03 18:04:14


In [34]:
items_pickled = pickle_data(items)

In [35]:
items_pickled.head()

Unnamed: 0,anbieter_artikelnummer,anbieterid_enc,anbietermarktplatz_enc,warengruppe_enc,text_vec,preis_std,minve_std,preis_log_std,minve_log_std,erstanlagedatum
0,00744013Cayley,501,8,81,b'\x80\x03]q\x00(G?\x87\x1b\xc3\x7f\xff\xff\xe...,-0.019027,-0.015527,0.951033,-1.06371,2018-05-10 17:47:30
1,00744013Christelle,501,8,33,b'\x80\x03]q\x00(G?\xc4Q$?\xff\xff\xf8G\xbf\xb...,-0.058951,-0.015527,0.618703,-1.06371,2018-06-17 12:19:51
2,00744013Clarissa,501,8,182,b'\x80\x03]q\x00(G?\xd6\x7f\x11@\x00\x00\tG\xb...,0.014842,-0.015527,1.151531,-1.06371,2018-06-17 12:23:37
3,00744013Dagna,501,8,43,b'\x80\x03]q\x00(G\xbf\xcd\xa7\x03\xa0\x00\x00...,-0.037405,-0.015527,0.815526,-1.06371,2018-04-20 23:57:34
4,00744013Hathor,501,8,100,b'\x80\x03]q\x00(G?\xbe\x0b\x8c\xdf\xff\xff\xe...,-0.046042,-0.015527,0.742547,-1.06371,2018-05-03 18:04:14


In [36]:
c.execute('''CREATE TABLE item_enc
             ([anbieter_artikelnummer] text PRIMARY_KEY, [anbieterid_enc] integer, [anbietermarktplatz_enc] integer,
             [warengruppe_enc] integer, [text_vec] blob, [preis_std] real, [minve_std] real, [preis_log_std] real, [minve_log_std] real, [erstanlagedatum] datetime  )''')

<sqlite3.Cursor at 0x7f8096573c70>

In [37]:
items_pickled.to_sql(name="item_enc",if_exists='append',con = conn, index = False)

Test

In [41]:
items_pickled = pd.read_sql("SELECT * FROM item_enc" , conn)

In [42]:
items_un = unpickle_data(items_pickled)

In [43]:
items.head()

Unnamed: 0,anbieter_artikelnummer,anbieterid_enc,anbietermarktplatz_enc,warengruppe_enc,text_vec,preis_std,minve_std,preis_log_std,minve_log_std,erstanlagedatum
0,00744013Cayley,501,8,81,"[0.0112834237515926, -0.117268852889538, 0.997...",-0.019027,-0.015527,0.951033,-1.06371,2018-05-10 17:47:30
1,00744013Christelle,501,8,33,"[0.158726245164871, -0.114233806729317, 0.7278...",-0.058951,-0.015527,0.618703,-1.06371,2018-06-17 12:19:51
2,00744013Clarissa,501,8,182,"[0.35150557756424, -0.203590989112854, 0.48789...",0.014842,-0.015527,1.151531,-1.06371,2018-06-17 12:23:37
3,00744013Dagna,501,8,43,"[-0.23165936768055, 0.0307953357696533, 0.5446...",-0.037405,-0.015527,0.815526,-1.06371,2018-04-20 23:57:34
4,00744013Hathor,501,8,100,"[0.11736374348402, 0.111603811383247, 0.895092...",-0.046042,-0.015527,0.742547,-1.06371,2018-05-03 18:04:14


Users

In [49]:
users = pd.read_sql("SELECT * from user_enc limit 10000", engine)

In [50]:
users_pickled = pickle_data(users)

In [51]:
users_pickled

Unnamed: 0,userid,datum_click,clicked_before,usermkt_enc,anbieterid_enc_user,anbietermarktplatz_enc_user,warengruppe_enc_user,text_vec_user,preis_std_user,minve_std_user,minve_log_std_user,preis_log_std_user
0,1617611,2019-02-14 20:30:24,b'\x80\x03]q\x00(X\x0c\x00\x00\x0000345941BB03...,4,b'\x80\x03]q\x00(K\xb1K\xb6K\xf4e.',b'\x80\x03]q\x00(K\x01K\x05K\x01e.',b'\x80\x03]q\x00(K~K!K\x8ae.',"b'\x80\x03]q\x00(G?\xda\xb4\xb5""\xaa\xaa\xb1G\...",-0.095440,0.021158,1.058279,-0.862858
1,1601360,2019-02-14 20:35:04,b'\x80\x03]q\x00X\r\x00\x00\x000004335013486q\...,7,b'\x80\x03]q\x00K a.',b'\x80\x03]q\x00K\x01a.',b'\x80\x03]q\x00Kqa.',b'\x80\x03]q\x00(G?\xed\x17\x08\x00\x00\x00\x0...,-0.085694,0.002542,0.172416,-0.558084
2,854652,2019-02-14 20:35:57,b'\x80\x03]q\x00(X\x10\x00\x00\x0000382446CTL6...,8,b'\x80\x03]q\x00(K\xd5K\xd5M=\x01M=\x01M\xa4\x...,b'\x80\x03]q\x00(K\x05K\x05K\x08K\x08K\x01e.',b'\x80\x03]q\x00(K\x13K\x13K\x13K\x13K\x13e.',b'\x80\x03]q\x00(G\xbf\xcb\r\\\xe6ff]G\xbf\xc3...,-0.000721,-0.015527,-3.127772,0.800185
3,1638264,2019-02-14 20:38:00,b'\x80\x03]q\x00(X\r\x00\x00\x0000690052Z121Zq...,8,b'\x80\x03]q\x00(K\x02K\x02K\x02K\x02K\x02K\x0...,b'\x80\x03]q\x00(K\x01K\x01K\x01K\x01K\x01K\x0...,b'\x80\x03]q\x00(K\x05K\x05K\x05K\x05K\x05K\x0...,b'\x80\x03]q\x00(G?\xdb\x14\xe1z\x00\x00\x01G\...,-0.106602,0.015169,0.831912,-1.328800
4,1650757,2019-01-20 18:42:37,b'\x80\x03]q\x00(X\x0c\x00\x00\x00000618246558...,7,b'\x80\x03]q\x00(K.M\xe6\x01M\x05\x02e.',b'\x80\x03]q\x00(K\x01K\x07K\x04e.',b'\x80\x03]q\x00(K=K\xc3K e.',b'\x80\x03]q\x00(G\xbf\xe2\xad\xefX\x00\x00\x0...,-0.091952,-0.009504,-0.480622,0.108621
5,1646805,2019-02-14 21:08:19,b'\x80\x03]q\x00(X\x0e\x00\x00\x000069462538.3...,3,b'\x80\x03]q\x00(K\xb1K\xd5M}\x01M\xae\x01M\xa...,b'\x80\x03]q\x00(K\x01K\x05K\x04K\x01K\x01K\x0...,b'\x80\x03]q\x00(K~K\x97K~KXKXKRK`e.',b'\x80\x03]q\x00(G?\xe1-^\xe0\x00\x00\x04G\xbf...,-0.082324,-0.000039,-0.014511,-0.467182
6,1635714,2018-10-20 19:50:37,b'\x80\x03]q\x00(X\x0e\x00\x00\x00003644421014...,8,b'\x80\x03]q\x00(K\x0eK\x0eK?K]KgK{K{K{K|K\xb1...,b'\x80\x03]q\x00(K\x01K\x01K\x07K\x06K\x07K\x0...,b'\x80\x03]q\x00(K\xaeK\xaeK\x9dK\xaeK\xaeK\x0...,b'\x80\x03]q\x00(G\xbf9\xfbn\x8b\xa2\xe0\x98G\...,-0.121471,0.086415,0.616961,-1.106562
7,1531696,2018-10-20 20:45:07,b'\x80\x03]q\x00(X\x10\x00\x00\x0000757137GTB5...,6,b'\x80\x03]q\x00(K\xb1K\xb1M\x08\x01M\x14\x02e.',b'\x80\x03]q\x00(K\x01K\x01K\x08K\x07e.',b'\x80\x03]q\x00(KqK\x00KKK\x00e.',b'\x80\x03]q\x00(G\xbf\xcd\x91)\xf7\x7f\xff\xf...,0.045416,0.013219,0.178045,-0.562482
8,1510136,2019-02-15 10:27:06,b'\x80\x03]q\x00(X\x10\x00\x00\x00003504281001...,4,b'\x80\x03]q\x00(K\xb8K\xb8M\x05\x02e.',b'\x80\x03]q\x00(K\x01K\x01K\x04e.',b'\x80\x03]q\x00(K<K<K<e.',b'\x80\x03]q\x00(G\xbf\xc2\x1f\xf5\x03\xff\xff...,-0.027047,-0.011694,-1.532166,0.507073
9,735670,2018-10-20 21:45:20,b'\x80\x03]q\x00(X\x0e\x00\x00\x0000288952CN51...,1,b'\x80\x03]q\x00(K\x9fMo\x01e.',b'\x80\x03]q\x00(K\x04K\x05e.',b'\x80\x03]q\x00(K3KKe.',b'\x80\x03]q\x00(G\xbf\xd1\xcb\x84\x93\xff\xff...,0.119194,-0.014706,-0.820592,1.416474


In [53]:
c.execute('''CREATE TABLE user_enc
             ([userid] text PRIMARY KEY, [datum_click] datetime, [clicked_before] blob, [usermkt_enc] integer, 
             [anbieterid_enc_user] blob, [anbietermarktplatz_enc_user] blob, [warengruppe_enc_user] blob, 
             [text_vec_user] blob, [preis_std_user] real, [minve_std_user] real, 
             [preis_log_std_user] real, [minve_log_std_user] real)''')


<sqlite3.Cursor at 0x7f8096573c70>

In [54]:
users_pickled.to_sql(name="user_enc",if_exists='append',con = conn, index = False)