In [1]:
import keras
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tweepy
import time
import random

import glob
import numpy as np
import pandas as pd
from nltk.tokenize import TweetTokenizer, casual_tokenize
from keras.models import load_model
import keras.preprocessing.text as kpt
from keras.preprocessing.text import text_to_word_sequence, one_hot, Tokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import Counter

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def sample(preds):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / 0.2
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate_tweets(model, corpus, char_to_idx, idx_to_char, n_tweets=10): 
    
    tweets = []
    spaces_in_corpus = np.array([idx for idx in range(CORPUS_LENGTH) if text[idx] == ' '])
    for i in range(1, n_tweets + 1):
        begin = np.random.choice(spaces_in_corpus)
        tweet = u''
        sequence = text[begin:begin + MAX_SEQ_LENGTH]
        tweet += sequence
        for _ in range(100):
            x = np.zeros((1, MAX_SEQ_LENGTH, N_CHARS))
            for t, char in enumerate(sequence):
                x[0, t, char_to_index[char]] = 1.0

            preds = model.predict(x, verbose=0)[0]
            next_idx = sample(preds)
            next_char = index_to_char[next_idx]

            tweet += next_char
            sequence = sequence[1:] + next_char
        tweets.append(tweet)
    return tweets


In [3]:
path = 'data/'
os.chdir(path)
result = [i for i in glob.glob('*.csv')]

df_tweets = pd.DataFrame()
for file in result:
    temp = pd.read_csv(file, index_col=0)
    df_tweets = df_tweets.append(temp, ignore_index=True)
    
df_tweets['time'] = pd.to_datetime(df_tweets['time'])
df_tweets = df_tweets.sort_values('time').reset_index(drop=True)

df_noat = df_tweets.loc[df_tweets['Tweet text'].str.find('@') != 0].copy()
df_noat['text_lower'] = df_noat['Tweet text'].str.lower()
df_noat['text_lower'].iloc[0]
nw_words = [casual_tokenize(tweet, reduce_len=True, strip_handles=True) for tweet in df_noat['text_lower']]
crystal_corpus = [word for sub in nw_words for word in sub if 'http' not in word]
crystal_chars = [char for word in crystal_corpus for char in word]
chars_in_corpus = Counter()
text = []

for tweet in nw_words:
    for word in tweet:
        if 'http' not in word:

            text.append(word + ' ')
    text.append(' ` ')

text = "".join(text)

for char in text:
    chars_in_corpus.update(char)
    lexicon = list(chars_in_corpus.keys())
    char_to_index = {char: i for (i, char) in enumerate(lexicon)}
    index_to_char = {i: char for (i, char) in enumerate(lexicon)}

In [4]:
CORPUS_LENGTH = len(text)
MAX_SEQ_LENGTH = 140
SEQ_STEP = 2
N_SEQS = None
N_CHARS = len(lexicon)

In [5]:
model = load_model('botwan_v2.h5')

In [6]:
np.random.seed(78)

In [7]:
tweets = generate_tweets(model, text, char_to_index, index_to_char, n_tweets=69)

  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
tweets

[" in bark in the park matches  ` oh my god  ` :(  ` downloading twitter data is so ugh  ` i'm about fire sauce  ` the p in pvalue is silent  $7😨$7\U0001f923🌯$$🍔$😦$$🍔$75$75$$🍔$75$8😨$75🎶$😨🎶$757$757🌯$😦$🎶🌯$7\U0001f923🌯$7\U0001f923🌯$$🍔$7\U0001f9235$$🍔$😨$7\U0001f923🌯$$🍔$7\U0001f923🌯$75🎶$7😦$75$$🍔$🎶5$$",
 " twitter is root for ya !  ` looking to leave academia , anyone ?  ` whoa !  ` it's #pugchat ! !  ` interesting  ` my tl is filled with brig$7😨$$🍔$75$😦🎶$$🍔$7😨8$😦$$🍔$$🍔$7\U0001f923🌯$$🍔$75$$🍔$7😨$75$75🎶$j5$7😨$$🍔$758$757$75$$🍔$😨8$😦🎶$$🍔$$🍔$😨🎶$$🍔$😦$758$7😨",
 ' only 16 % ( n = 48 ) reported being in a boot camp or something like it  ` as for some of the measures ... they are somewhat narrow : resea$7😦$$🍔$75$$🍔$75$8🌯$$🍔$75$75$$🍔$75🎶$75$$🍔$7😨7$75$8😨7$😨$$🍔$😨$😦$75f$75$$🍔$😦🎶$8😨$7\U0001f923😦$$🍔$75$75$$🍔$8🌯$$🍔$7',
 '  ` raisel making me shout louder than library voice at brewriver rn hot damn  ` samwell out here calling for the cidatel to do more replica$7\U0001f9235$75f$7\U0001f923🌯$$🍔$75$$🍔$7😨$-5$$🍔$75

In [38]:
e_100 = '$7'

In [49]:
new_tweets = []
for subtweets in tweets:
    temp = subtweets.split('`')
    for tweet in temp:
        if e_100 in tweet:
            new_tweets.append(tweet[:tweet.find('$7')])
        else:
            new_tweets.append(tweet)

In [50]:
new_tweets

[' in bark in the park matches  ',
 ' oh my god  ',
 ' :(  ',
 ' downloading twitter data is so ugh  ',
 " i'm about fire sauce  ",
 ' the p in pvalue is silent  ',
 ' twitter is root for ya !  ',
 ' looking to leave academia , anyone ?  ',
 ' whoa !  ',
 " it's #pugchat ! !  ",
 ' interesting  ',
 ' my tl is filled with brig',
 ' only 16 % ( n = 48 ) reported being in a boot camp or something like it  ',
 ' as for some of the measures ... they are somewhat narrow : resea',
 '  ',
 ' raisel making me shout louder than library voice at brewriver rn hot damn  ',
 ' samwell out here calling for the cidatel to do more replica',
 ' are the bee\'s knees ! " anyone ?  ',
 ' grape soda tastes like carbonated melted grape otter pop  ',
 ' tfw you run all cells but see something you',
 ' back  ',
 ' bunts everywhere . triples everywhere .  ',
 ' 28 hits  ',
 ' fierce pitching right here  ',
 ' factory resetting my phone cc  ',
 " that didn't fi$$🍔$$🍔",
 ' . rts appreciated  ',
 " whoa . i'm a tw

# To do: 
* Automated tweets (on a timer?) 
* reply/interactive tweeting  
* go back and see why it's training on weird stuff 
* mine more tweets from other "nick wan esque" tweeters (???)  

load from:  
../twitch/botwan/data/botwan_v2.h5

In [None]:
access = pd.read_csv('access.csv', header=None, index_col=0) 
auth = tweepy.OAuthHandler(access.loc[0].values[0], access.loc[1].values[0])
auth.set_access_token(access.loc[2].values[0], access.loc[3].values[0])
api = tweepy.API(auth)

In [165]:
for DFJAKLFDS in range(90):
    timeline = api.mentions_timeline(count=10)
    botwan_follows = api.friends('bot__wan')
    follow_list = [followee.screen_name for followee in botwan_follows]

    most_recent_people = list(set([tweet.author.screen_name for tweet in timeline]))

    reply_list = []
    for person in most_recent_people:
        if person in follow_list:
            reply_list.append(person)

    for reply_tweet in timeline:
        if reply_tweet.author.screen_name in reply_list:
            api.update_status(status='@' + reply_tweet.author.screen_name + ' ' + new_tweets[random.choice(range(len(new_tweets)))], in_reply_to_status_id=reply_tweet.id)
            reply_list.remove(reply_tweet.author.screen_name)
    time.sleep(28800) 