In [57]:
# import necessary libraries
import pandas as pd
import numpy as np
import requests
import os
import json
from timeit import default_timer as timer
# to query twitter api
import tweepy

In [58]:
# import WeRateDogs Twitter archive file to a df
twitArchEnhanc = pd.read_csv('twitter-archive-enhanced.csv')

In [59]:
# create copies of datasets before manipulating to preserve raw data
twitArchEnhanc_clean = twitArchEnhanc.copy()

In [60]:
# Code:
twitArchEnhanc_clean = twitArchEnhanc_clean.replace(r'None', np.nan, regex=True)
# Test:
columns = ['name', 'doggo', 'floofer', 'pupper', 'puppo']
twitArchEnhanc_clean[columns].sample(15)

Unnamed: 0,name,doggo,floofer,pupper,puppo
569,Ronnie,,,,
1147,Sprout,,,,
1214,Michelangelope,,,,
151,Zooey,,,,
1328,Lucy,,,,
1814,,,,pupper,
1885,,,,,
1991,,,,pupper,
1224,,,,,
694,Oakley,,,,


In [61]:
# Code:
cols = ['doggo', 'floofer', 'pupper', 'puppo']
# concatenate original dog type columns so we can have just one
twitArchEnhanc_clean["dog_type"] = twitArchEnhanc_clean[cols].apply(lambda x: ','.join(x.dropna()), axis=1)
# check
twitArchEnhanc_clean.dog_type.value_counts()
# for the tweets where multiple dog types were recorded, change categorization to 'multiple'
twitArchEnhanc_clean["dog_type"].replace(['doggo,pupper', 'doggo,puppo', 'doggo,floofer'], 'multiple', inplace=True)
# replace empty strings with NaN
twitArchEnhanc_clean["dog_type"].replace(r'', np.nan, regex=True, inplace=True)
# drop original dog type cols
twitArchEnhanc_clean.drop(['pupper', 'doggo', 'puppo', 'floofer'], axis=1, inplace=True)
# change new dog type col to category dtype
twitArchEnhanc_clean.dog_type.astype('category')
# Test:
twitArchEnhanc_clean.sample(10)

Unnamed: 0,tweet_id,in_reply_to_status_id,in_reply_to_user_id,timestamp,source,text,retweeted_status_id,retweeted_status_user_id,retweeted_status_timestamp,expanded_urls,rating_numerator,rating_denominator,name,dog_type
965,750429297815552001,,,2016-07-05 20:41:01 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Arnie. He's a Nova Scotian Fridge Floo...,,,,https://twitter.com/dog_rates/status/750429297...,12,10,Arnie,
1110,733822306246479872,,,2016-05-21 00:50:46 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Moose. He's a Polynesian Floofer. Dapp...,,,,https://twitter.com/dog_rates/status/733822306...,10,10,Moose,floofer
48,882992080364220416,,,2017-07-06 15:58:11 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Rusty. He wasn't ready for the first p...,,,,https://twitter.com/dog_rates/status/882992080...,13,10,Rusty,
187,856282028240666624,,,2017-04-23 23:01:59 +0000,"<a href=""http://twitter.com/download/iphone"" r...","This is Cermet, Paesh, and Morple. They are ab...",,,,https://twitter.com/dog_rates/status/856282028...,14,10,Cermet,
1831,676215927814406144,,,2015-12-14 01:43:35 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Herm. He just wants to be like the oth...,,,,https://twitter.com/dog_rates/status/676215927...,9,10,Herm,
143,864197398364647424,,,2017-05-15 19:14:50 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Paisley. She ate a flower just to prov...,,,,https://twitter.com/dog_rates/status/864197398...,13,10,Paisley,
1804,676946864479084545,,,2015-12-16 02:08:04 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This pups goal was to get all four feet as clo...,,,,https://twitter.com/dog_rates/status/676946864...,12,10,,
2032,671763349865160704,,,2015-12-01 18:50:38 +0000,"<a href=""http://twitter.com/download/iphone"" r...",Say hello to Mark. He's a good dog. Always rea...,,,,https://twitter.com/dog_rates/status/671763349...,9,10,Mark,
764,777953400541634568,,,2016-09-19 19:31:59 +0000,"<a href=""http://twitter.com/download/iphone"" r...",RT @dog_rates: Meet Gerald. He's a fairly exot...,7.681934e+17,4196984000.0,2016-08-23 21:09:14 +0000,https://twitter.com/dog_rates/status/768193404...,8,10,Gerald,doggo
1957,673583129559498752,,,2015-12-06 19:21:47 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Sandy. She loves her spot by the tree....,,,,https://twitter.com/dog_rates/status/673583129...,11,10,Sandy,
