In [1]:
import pandas as pd
import sqlite3

In [2]:
con = sqlite3.connect('../data/checking-logs.sqlite')

In [3]:
SQL_DROP = ''' DROP TABLE IF EXISTS datamart ; '''
SQL_CREATE = ''' CREATE TABLE IF NOT EXISTS datamart
( 
  uid text NULL,
  labname text NULL,
  first_commit_ts datetime64,
  first_view_ts datetime64
) '''
SQL_INSERT = ''' INSERT into datamart (uid, labname, first_commit_ts, first_view_ts)
select c.uid as uid, c.labname ,min(c."timestamp") as first_commit_ts, p.first_view_ts as first_view_ts from checker c
left join (select uid, min(datetime) as first_view_ts from pageviews group by uid) as p  on c.uid = p.uid 
where c.status = 'ready' and c.numTrials = 1 and c.labname in ('laba04', 'laba04s', 'laba05', 'laba06', 'laba06s', 'project1') and c.uid like 'user_%'
GROUP BY c.uid, c.labname; '''
SQL_SELECT = ''' select * from datamart '''
pd.io.sql.execute(SQL_DROP, con)
pd.io.sql.execute(SQL_CREATE, con)
pd.io.sql.execute(SQL_INSERT, con)
new_table = pd.io.sql.read_sql(SQL_SELECT, con, index_col='uid')
new_table['first_view_ts'] = new_table['first_view_ts'].apply(pd.to_datetime)
new_table['first_commit_ts'] = new_table['first_commit_ts'].apply(pd.to_datetime)

new_table



Unnamed: 0_level_0,labname,first_commit_ts,first_view_ts
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
user_1,laba04,2020-04-26 17:06:18.462708,2020-04-26 21:53:59.624136
user_1,laba04s,2020-04-26 17:12:11.843671,2020-04-26 21:53:59.624136
user_1,laba05,2020-05-02 19:15:18.540185,2020-04-26 21:53:59.624136
user_1,laba06,2020-05-17 16:26:35.268534,2020-04-26 21:53:59.624136
user_1,laba06s,2020-05-20 12:23:37.289724,2020-04-26 21:53:59.624136
...,...,...,...
user_8,laba04s,2020-04-19 10:22:35.761944,NaT
user_8,laba05,2020-05-02 13:28:07.705193,NaT
user_8,laba06,2020-05-16 17:56:15.755553,NaT
user_8,laba06s,2020-05-16 20:01:07.900727,NaT


In [4]:
test_df = new_table[new_table.first_view_ts.notna()]
test_df

Unnamed: 0_level_0,labname,first_commit_ts,first_view_ts
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
user_1,laba04,2020-04-26 17:06:18.462708,2020-04-26 21:53:59.624136
user_1,laba04s,2020-04-26 17:12:11.843671,2020-04-26 21:53:59.624136
user_1,laba05,2020-05-02 19:15:18.540185,2020-04-26 21:53:59.624136
user_1,laba06,2020-05-17 16:26:35.268534,2020-04-26 21:53:59.624136
user_1,laba06s,2020-05-20 12:23:37.289724,2020-04-26 21:53:59.624136
user_1,project1,2020-05-14 20:56:08.898880,2020-04-26 21:53:59.624136
user_10,laba04,2020-04-25 08:24:52.696624,2020-04-18 12:19:50.182714
user_10,laba04s,2020-04-25 08:37:54.604222,2020-04-18 12:19:50.182714
user_10,laba05,2020-05-01 19:27:26.063245,2020-04-18 12:19:50.182714
user_10,laba06,2020-05-19 11:39:28.885637,2020-04-18 12:19:50.182714


In [5]:
control_df = new_table[new_table.first_view_ts.isna()]
control_df['first_view_ts'] = new_table['first_view_ts'].mean(skipna = True)
control_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0_level_0,labname,first_commit_ts,first_view_ts
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
user_11,laba05,2020-05-03 21:06:55.970293,2020-04-27 00:40:05.761783552
user_11,project1,2020-05-03 23:45:33.673409,2020-04-27 00:40:05.761783552
user_12,laba04,2020-04-18 17:07:51.767358,2020-04-27 00:40:05.761783552
user_12,laba04s,2020-04-26 15:42:38.070593,2020-04-27 00:40:05.761783552
user_12,laba05,2020-05-03 08:39:25.174316,2020-04-27 00:40:05.761783552
...,...,...,...
user_8,laba04s,2020-04-19 10:22:35.761944,2020-04-27 00:40:05.761783552
user_8,laba05,2020-05-02 13:28:07.705193,2020-04-27 00:40:05.761783552
user_8,laba06,2020-05-16 17:56:15.755553,2020-04-27 00:40:05.761783552
user_8,laba06s,2020-05-16 20:01:07.900727,2020-04-27 00:40:05.761783552


In [6]:
control_df.to_sql('control', con, if_exists='replace')
test_df.to_sql('test', con, if_exists='replace')
con.commit()
con.close()
