In [None]:
import psycopg2
from sqlalchemy import create_engine
import pandas as pd

In [None]:
db_string = "postgresql://postgres:postgres@postgres/postgres"
# db_string = "postgresql://postgres:postgres@postgres/dev4slack"
db = create_engine(db_string)

def query_df(line_query, cell_query=None, conn=db):
    if cell_query==None:
      return pd.read_sql(line_query, conn)
    return pd.read_sql(cell_query, conn)

# Custom notebook magic commands for loading sql.
from IPython.core.magic import register_line_cell_magic
def create_df_sql_magic(magic_name, conn):
    def sql_df(line_query, cell_query=None, conn=db):
        if cell_query==None:
          return pd.read_sql(line_query, conn)
        return pd.read_sql(cell_query, conn)
    custom_func = sql_df
    custom_func.__name__ = magic_name
    register_line_cell_magic(custom_func)
create_df_sql_magic('sql_df', db)

In [None]:
hired_query = \
'''
SELECT 
    message.text AS p_text, message.reply_count, message.user_id as p_id, message.ts,
    reply.text AS c_text, reply.user_id as c_id
FROM message
LEFT JOIN reply on reply.thread_ts=message.ts
WHERE message.channel_id='CB6GPKRPT' AND message.reply_count > 0
ORDER BY message.ts, reply.ts;
'''
df = query_df(hired_query)

In [None]:
# Create new column of replies to each element in c_text.
df['replies'] = df.c_text.shift(-1)

# Drop rows where a reply refers to an unrelated parent.
df = df.groupby('ts', as_index=False).apply(lambda x: x.iloc[:-1])
df.shape

In [None]:
df = df.drop_duplicates(subset=['p_text'])
df = df[['p_text']]
mask = df['p_text'].str.len() > 1000
df = df[mask]
df.shape

In [None]:
mask = df.p_text.str.contains('Happy Friday')
df = df[~mask]
df.shape

In [None]:
df.head()

In [None]:
example = """Good Evening Lambda Fam! I will keep this as short as possible as to save space for other future new hires! Just wanted to say wayyy back July of 2018 when WEBPT2 started, I made it a goal to be able post in here. Many many nights of working late, and grinding, little by little I kept learning new things.There were times when quitting would have been much easier, but it’s never been a part of who I am and it’s never been a part of who you are either. Keep plowing forward relentlessly. I started job searching late November, fell flat on my face a couple times in interviews since I wasn’t familiar with the technical interview format, didn’t know what to expect. When this happened, I went back and figured out what went wrong, and how I can improve.I recently accepted a React Dev job in Dublin, OH and I’m excited, but I realize this is just the start of my learning journey. There is much more to learn on my goal to becoming a senior web developer one day. Always stay curious, never stop learning.I just want to thank all of the Lambda Staff, a few that stand out are @dan.frehner, you’re an amazing teacher, loved every JS and React lesson we had. @Elissa thanks for explaining CS topics so well and making them very understandable. @Diandra Ryan-Mas thank you for all the songs and excellent back end and testing lessons. All the TL’s that served in WebPT2 @KingAtoki @Julian and so many more.  Thanks to Lambda for believing in me, and taking a chance on me out of the 1000's of applicants to the program. I am forever grateful.Lastly, thanks to all the career folks, @Meaghan Barber @Kelsey @Austin Lieberman and everyone else! You all are amazing.Lastly, thanks to all the amazing students in WebPT12 for all the kind words. We have had so much fun in after-hours stretching our learning together. Thanks @Keiran Kozlowski for being so supportive, and @Michael and @KingAtoki once again for being amazing SL’s in that section. TL for the two units I did was some of the most fun I’ve had professionally. I can’t wait to see everyone in this cohort light up the hired board real soon!!"""

In [None]:
len(example)

In [None]:
# pip install scikit-learn

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
# vec = TfidfVectorizer()
# example_vec = vec.fit_transform([example])
# df['vecs'] = vec.transform(df.p_text)

# cosine_similarity(df.vecs, pd.DataFrame(len(df) * example_vec[0]))

In [None]:
df.to_csv('hired')

In [None]:
df.shape

In [None]:
import random
choices = df.p_text.tolist()

In [None]:
print(random.choice(choices))