# Test DB

In [None]:
import psycopg2
from sqlalchemy import create_engine
import pandas as pd

In [None]:
db_string = "postgresql://postgres:postgres@postgres/postgres"
db = create_engine(db_string)

In [None]:
# Custom notebook magic commands for loading sql.
from IPython.core.magic import register_line_cell_magic
def create_df_sql_magic(magic_name, conn):
  custom_func = sql_df
  custom_func.__name__ = magic_name
  register_line_cell_magic(custom_func)
create_df_sql_magic('sql_df', db)

In [None]:
def query_df(line_query, cell_query=None, conn=db):
    if cell_query==None:
      return pd.read_sql(line_query, conn)
    return pd.read_sql(cell_query, conn)

In [None]:
%%sql_df
SELECT * FROM channel LIMIT 5;

In [None]:
%%sql_df
SELECT * FROM message LIMIT 5;

In [None]:
%%sql_df
SELECT * FROM reply LIMIT 5;

In [None]:
%%sql_df
SELECT * FROM channel where channel_id='CSW4Z94SJ';

In [None]:
%%sql_df
SELECT * FROM reply JOIN channel on reply.channel_id=channel.channel_id LIMIT 10;

In [None]:
%%sql_df
SELECT * FROM message
LEFT JOIN reply on reply.thread_ts=message.ts
WHERE message.channel_id='CFBBHV7AT'
ORDER BY message.ts
LIMIT 5;

In [124]:
ds_gen_parents_n_replies_query = \
'''
SELECT 
    message.text AS p_text, message.reply_count, message.user_id as p_id, message.ts,
    reply.text AS c_text, reply.user_id as c_id
FROM message
LEFT JOIN reply on reply.thread_ts=message.ts
WHERE message.channel_id='CFBBHV7AT' AND message.reply_count > 0
ORDER BY message.ts, reply.ts;
'''
ds_gen_parents_n_replies_df = query_df(ds_gen_parents_n_replies_query)

In [125]:
ds_gen_parents_n_replies_df.head()

Unnamed: 0,p_text,reply_count,p_id,ts,c_text,c_id
0,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,Any resources for getting started with data sc...,UBVRP2ZFB
1,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,<http://learnjsdata.com>,UCC6UCZM4
2,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,<https://ml5js.org>,UCC6UCZM4
3,<@UBVRP2ZFB> i use a python library which use...,5,UAMEM2PGC,1551123000.0,<@UBVRP2ZFB> i use a python library which use...,UAMEM2PGC
4,<@UBVRP2ZFB> i use a python library which use...,5,UAMEM2PGC,1551123000.0,that's really just a visualization comment tho,UAMEM2PGC


In [126]:
ds_gen_parents_n_replies_df['replies'] = ds_gen_parents_n_replies_df.c_text.shift(-1)

In [127]:
ds_gen_parents_n_replies_df.head()

Unnamed: 0,p_text,reply_count,p_id,ts,c_text,c_id,replies
0,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,Any resources for getting started with data sc...,UBVRP2ZFB,<http://learnjsdata.com>
1,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,<http://learnjsdata.com>,UCC6UCZM4,<https://ml5js.org>
2,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,<https://ml5js.org>,UCC6UCZM4,<@UBVRP2ZFB> i use a python library which use...
3,<@UBVRP2ZFB> i use a python library which use...,5,UAMEM2PGC,1551123000.0,<@UBVRP2ZFB> i use a python library which use...,UAMEM2PGC,that's really just a visualization comment tho
4,<@UBVRP2ZFB> i use a python library which use...,5,UAMEM2PGC,1551123000.0,that's really just a visualization comment tho,UAMEM2PGC,Whatcha wanna know? My knowledge is open sourc...


In [128]:
df = ds_gen_parents_n_replies_df

In [129]:
df = df.groupby('ts', as_index=False).apply(lambda x: x.iloc[:-1])

In [130]:
df.head()

Unnamed: 0,Unnamed: 1,p_text,reply_count,p_id,ts,c_text,c_id,replies
0,0,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,Any resources for getting started with data sc...,UBVRP2ZFB,<http://learnjsdata.com>
0,1,Any resources for getting started with data sc...,2,UBVRP2ZFB,1549849000.0,<http://learnjsdata.com>,UCC6UCZM4,<https://ml5js.org>
1,3,<@UBVRP2ZFB> i use a python library which use...,5,UAMEM2PGC,1551123000.0,<@UBVRP2ZFB> i use a python library which use...,UAMEM2PGC,that's really just a visualization comment tho
1,4,<@UBVRP2ZFB> i use a python library which use...,5,UAMEM2PGC,1551123000.0,that's really just a visualization comment tho,UAMEM2PGC,Whatcha wanna know? My knowledge is open sourc...
1,5,<@UBVRP2ZFB> i use a python library which use...,5,UAMEM2PGC,1551123000.0,Whatcha wanna know? My knowledge is open sourc...,UBVRP2ZFB,thanks!


In [None]:
query = '''SELECT * FROM reply
JOIN channel on reply.channel_id=channel.channel_id
WHERE channel.channel_name='ds_general';'''
ds_general = query_df(query)

In [None]:
from datetime import datetime

def unix_to_datetime(a_series):
    x = a_series.astype('float')
    x = a_series.apply(datetime.fromtimestamp)
    return x

ds_general.thread_ts = unix_to_datetime(ds_general.thread_ts)
ds_general.ts = unix_to_datetime(ds_general.ts)

In [None]:
ds_general['text_len'] = ds_general.text.apply(len)

In [None]:
ds_general.text_len.describe()

In [None]:
ds_general.shape

In [None]:
ds_general.head()