In [1]:
import GetOldTweets3 as got

In [2]:
twitter_user_name = 'LambdaSchool'
count = 20 # very small for efficiency during testing

In [3]:
#  Create object to execute queries
tweetGetter = got.manager.TweetCriteria().setUsername(twitter_user_name).setMaxTweets(count)

In [4]:
retrieved_tweets = got.manager.TweetManager.getTweets(tweetGetter)

In [5]:
# type(retrieved_tweets)

In [6]:
test_tweet = retrieved_tweets[1]
# type(test_tweet.urls)
# 
# dir (test_tweet)

In [7]:
def tweet_to_dict(twt):
    """Munges a twt object into a dict, using names of attributes of
    object as keys in dict.
    'favorites' is a count of 'likes'
    'hashtags' is a string that is a space-separated series of hashtags
    'mentions' is a string that is a space-separated series of ats (@s)
    'urls' is a string that is a space-separated series of URLs
    """
    return {'date' : twt.date
            , 'favorites' : twt.favorites
            , 'formatted_date' : twt.formatted_date
            , 'geo' : twt.geo
            , 'hashtags' : twt.hashtags
            , 'id' : twt.id
            , 'mentions' : twt.mentions
            , 'permalink' : twt.permalink
            , 'replies' : twt.replies
            , 'retweets' : twt.retweets
            , 'text' : twt.text
            , 'to' : twt.to
            , 'urls' : twt.urls
            , 'username' : twt.username}   

In [8]:
def munge_date(dt):
    """Munges a datetime.datetime object into a dict, using names of attributes of
    object as keys in dict.
    'day_of_week' is [0-7] with 0 being 'Monday'
    'minute_of_day' is count of minutes from midnight"""
    return {'year' : dt.year 
            , 'month' : dt.month
            , 'day' : dt.day
            , 'day_of_week' : dt.weekday()
            , 'hour' : dt.hour
            , 'minute' : dt.minute
            , 'minute_of_day' : (60 * dt.hour) + dt.minute}
munge_date(test_tweet.date)

{'year': 2020,
 'month': 4,
 'day': 11,
 'day_of_week': 5,
 'hour': 22,
 'minute': 32,
 'minute_of_day': 1352}

In [9]:
def munge_twt(twt):
    return {**tweet_to_dict(twt), **munge_date(twt.date)}

unge_twt(test_tweet)

{'date': datetime.datetime(2020, 4, 11, 22, 32, 2, tzinfo=datetime.timezone.utc),
 'favorites': 24,
 'formatted_date': 'Sat Apr 11 22:32:02 +0000 2020',
 'geo': '',
 'hashtags': '#LambdaSchool',
 'id': '1249102938540093440',
 'mentions': '@CapitalOne @amazon @Infosys @eBay @Cisco',
 'permalink': 'https://twitter.com/LambdaSchool/status/1249102938540093440',
 'replies': 1,
 'retweets': 3,
 'text': 'Of the 284 full-time #LambdaSchool students that graduated in H1 2019, the job placement rate was 71%. These graduates have been hired by over 150 employers, including @CapitalOne, @amazon, @Infosys, @eBay, @Cisco and more. Details: https://bit.ly/39htNvO',
 'to': None,
 'urls': 'https://bit.ly/39htNvO',
 'username': 'LambdaSchool',
 'year': 2020,
 'month': 4,
 'day': 11,
 'day_of_week': 5,
 'hour': 22,
 'minute': 32,
 'minute_of_day': 1352}

In [10]:
def remove_duplicates_from_list(l):
    """Stupid Python does not have this."""
    return list(set(l))

def mapcar(func, lst):
    """Stupid Python does not have mapcar."""
    return list(map(func, lst))

def flatten_lists_one_layer(ls):
    from functools import reduce
    """Stupid Python does not have a built-in function to 
    join an arbitrary list of lists into one list."""
    return reduce(lambda accumulator, element: accumulator + element, ls, [])

def join_list_of_dicts(dicts):
    """This is good. Thanks, Python!"""
    return {key: val for dict in dicts for key, val in dict.items()}
 

In [11]:
def list_of_tweets_hashtags(tweet_object):
    """NAIVE: assumes 'hashtags' is already a space-delimited
    string of hashtags."""
    return tweet_object.hashtags.split()

def list_of_tweets_mentions(tweet_object):
    """NAIVE: assumes 'mentions' is already a space-delimited
    string of @phrases."""
    return tweet_object.mentions.split()

all_hashtags = remove_duplicates_from_list(
                    flatten_lists_one_layer(
                        mapcar(list_of_tweets_hashtags, retrieved_tweets)))

all_mentions = remove_duplicates_from_list(
                    flatten_lists_one_layer(
                        mapcar(list_of_tweets_mentions, retrieved_tweets)))


In [31]:
import pandas as pd
# import dictionary as dataframe
# drop useless columns

recs = list(map(munge_twt, retrieved_tweets))

index_col_name = 'id'

useless_columns = ['date', 'formatted_date', 'permalink', 'username', 'hour', 'minute', 'geo']

df = pd.DataFrame.from_records(recs, index=index_col_name, exclude=useless_columns)

In [33]:
df.head()

Unnamed: 0_level_0,favorites,hashtags,mentions,replies,retweets,text,to,urls,year,month,day,day_of_week,minute_of_day
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1249389342663065600,15,,,0,0,Happy Easter everyone These maybe difficult ti...,,,2020,4,12,6,1050
1249102938540093440,24,#LambdaSchool,@CapitalOne @amazon @Infosys @eBay @Cisco,1,3,Of the 284 full-time #LambdaSchool students th...,,https://bit.ly/39htNvO,2020,4,11,5,1352
1249029951602425857,11,#datascience #tech,,1,0,"Shout out to Ofer Barav, #datascience student ...",,https://bit.ly/2xcMwv4,2020,4,11,5,1062
1248781591553298432,10,#C2CPodcast,,0,2,“When you develop and take care of the [studen...,bevylabs,https://twitter.com/bevylabs/status/1248725936...,2020,4,11,5,75
1248719916955549701,39,#job #tech,@businessinsider,5,5,.@businessinsider lists Lambda School in the 1...,,https://bit.ly/2UUJeWS,2020,4,10,4,1270


In [39]:
s = "hello there"
cols = s.split()
foo = pd.DataFrame(columns=cols)

In [40]:
foo

Unnamed: 0,hello,there
