# Case Study: Movie Data Analysis

In [None]:
!ls -la ./movielens

In [None]:
!cat ./movielens/movies.csv

In [None]:
!cat ./movielens/movies.csv | wc -l

In [None]:
!head -5 ./movielens/movies.csv

In [None]:
!head -5 ./movielens/ratings.csv

# Use Pandas to read Movies

In [1]:
import pandas as pd

In [2]:
movie_data = pd.read_csv('./movielens/movies.csv', sep=',')

In [3]:
movie_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
tags = pd.read_csv('./movielens/tags.csv')
tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,18,4141,Mark Waters,1240597180
1,65,208,dark hero,1368150078
2,65,353,dark hero,1368150079
3,65,521,noir thriller,1368149983
4,65,592,dark hero,1368150078


In [5]:
ratings = pd.read_csv('./movielens/ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [6]:
del ratings['timestamp']
del tags['timestamp']

# Data Structures

### Series

In [7]:
row_0 = tags.iloc[0]
row_0

userId              18
movieId           4141
tag        Mark Waters
Name: 0, dtype: object

In [8]:
row_0['userId']

18

In [9]:
'rating' in row_0

False

In [10]:
row_0.name

0

In [11]:
row_0.name = 'first_row'

In [12]:
row_0

userId              18
movieId           4141
tag        Mark Waters
Name: first_row, dtype: object

# DataFrames

In [13]:
tags.head()

Unnamed: 0,userId,movieId,tag
0,18,4141,Mark Waters
1,65,208,dark hero
2,65,353,dark hero
3,65,521,noir thriller
4,65,592,dark hero


In [14]:
tags.index

RangeIndex(start=0, stop=465564, step=1)

In [15]:
tags.columns

Index(['userId', 'movieId', 'tag'], dtype='object')

In [16]:
tags.iloc[[0, 11, 2000]]

Unnamed: 0,userId,movieId,tag
0,18,4141,Mark Waters
11,65,1783,noir thriller
2000,910,68554,conspiracy theory


# Descriptive Statistics

In [17]:
ratings['rating'].describe()

count    2.000026e+07
mean     3.525529e+00
std      1.051989e+00
min      5.000000e-01
25%      3.000000e+00
50%      3.500000e+00
75%      4.000000e+00
max      5.000000e+00
Name: rating, dtype: float64

In [18]:
ratings['rating'].mean()

3.5255285642993797

In [19]:
ratings['rating'].min(), ratings['rating'].max(), ratings['rating'].std()

(0.5, 5.0, 1.0519889192942424)

In [20]:
ratings['rating'].mode()

0    4.0
dtype: float64

In [21]:
ratings.corr()

Unnamed: 0,userId,movieId,rating
userId,1.0,-0.00085,0.001175
movieId,-0.00085,1.0,0.002606
rating,0.001175,0.002606,1.0


In [22]:
filter_1 = ratings['rating'] > 5

In [23]:
filter_1.any()

False

In [24]:
filter_2 = ratings['rating'] > 0

In [25]:
filter_2.all()

True

# Data Cleaning: Handling missing data

In [26]:
movie_data.shape

(27278, 3)

In [27]:
movie_data.isnull().any()

movieId    False
title      False
genres     False
dtype: bool

In [28]:
ratings.shape

(20000263, 3)

In [29]:
ratings.isnull().any()

userId     False
movieId    False
rating     False
dtype: bool

In [30]:
tags.shape

(465564, 3)

In [31]:
tags.isnull().any()

userId     False
movieId    False
tag         True
dtype: bool

In [32]:
null_ = tags['tag'].isnull()
null_

0         False
1         False
2         False
3         False
4         False
          ...  
465559    False
465560    False
465561    False
465562    False
465563    False
Name: tag, Length: 465564, dtype: bool

In [33]:
null_tags = tags[null_]
null_tags

Unnamed: 0,userId,movieId,tag
373276,116460,123,
373277,116460,346,
373281,116460,1184,
373288,116460,1785,
373289,116460,2194,
373291,116460,2691,
373299,116460,4103,
373301,116460,4473,
373303,116460,4616,
373319,116460,7624,


In [34]:
null_tag_movies = null_tags['movieId']
pd.DataFrame(movie_data, index=null_tag_movies)

Unnamed: 0_level_0,movieId,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
123,125.0,Flirting With Disaster (1996),Comedy
346,350.0,"Client, The (1994)",Drama|Mystery|Thriller
1184,1210.0,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Sci-Fi
1785,1869.0,Black Dog (1998),Action|Thriller
2194,2279.0,Urban Legend (1998),Horror|Thriller
2691,2777.0,Cobra (1925),Drama
4103,4197.0,Real Life (1979),Comedy
4473,4568.0,Best of the Best (1989),Action
4616,4711.0,Theremin: An Electronic Odyssey (1993),Documentary
7624,8024.0,"Thing Called Love, The (1993)",Comedy|Drama|Romance


In [35]:
tags = tags.dropna()

In [36]:
tags.isnull().any()

userId     False
movieId    False
tag        False
dtype: bool

In [37]:
tags.shape

(465548, 3)

# Slicing out columns

In [44]:
tags['tag'].head()

0      Mark Waters
1        dark hero
2        dark hero
3    noir thriller
4        dark hero
Name: tag, dtype: object

In [45]:
movie_data[['title', 'genres']].head()

Unnamed: 0,title,genres
0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,Jumanji (1995),Adventure|Children|Fantasy
2,Grumpier Old Men (1995),Comedy|Romance
3,Waiting to Exhale (1995),Comedy|Drama|Romance
4,Father of the Bride Part II (1995),Comedy


In [46]:
tag_counts = tags['tag'].value_counts()
tag_counts[:10]

sci-fi             3384
based on a book    3281
atmospheric        2917
comedy             2779
action             2657
surreal            2427
BD-R               2334
twist ending       2323
funny              2072
dystopia           1991
Name: tag, dtype: int64

In [47]:
ratings.iloc[:10]

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5
5,1,112,3.5
6,1,151,4.0
7,1,223,4.0
8,1,253,4.0
9,1,260,4.0


# Filters for selected rows

In [49]:
is_highly_rated = ratings['rating'] >= 4.0

In [50]:
ratings[is_highly_rated][-5:]

Unnamed: 0,userId,movieId,rating
20000256,138493,66762,4.5
20000257,138493,68319,4.5
20000258,138493,68954,4.5
20000259,138493,69526,4.5
20000261,138493,70286,5.0


In [51]:
is_animation = movie_data['genres'].str.contains('Animation')

In [52]:
movie_data[is_animation].head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
12,13,Balto (1995),Adventure|Animation|Children
47,48,Pocahontas (1995),Animation|Children|Drama|Musical|Romance
236,239,"Goofy Movie, A (1995)",Animation|Children|Comedy|Romance
241,244,Gumby: The Movie (1995),Animation|Children


# Group by and Aggregate

In [53]:
ratings_count = ratings[['movieId', 'rating']].groupby('rating').count()
ratings_count

Unnamed: 0_level_0,movieId
rating,Unnamed: 1_level_1
0.5,239125
1.0,680732
1.5,279252
2.0,1430997
2.5,883398
3.0,4291193
3.5,2200156
4.0,5561926
4.5,1534824
5.0,2898660


In [54]:
average_rating = ratings[['movieId', 'rating']].groupby('movieId').mean()
average_rating.head()

Unnamed: 0_level_0,rating
movieId,Unnamed: 1_level_1
1,3.92124
2,3.211977
3,3.15104
4,2.861393
5,3.064592


In [55]:
movie_count = ratings[['movieId', 'rating']].groupby('movieId').count()
movie_count.head()

Unnamed: 0_level_0,rating
movieId,Unnamed: 1_level_1
1,49695
2,22243
3,12735
4,2756
5,12161


# Merge DataFrames

In [56]:
tags.head()

Unnamed: 0,userId,movieId,tag
0,18,4141,Mark Waters
1,65,208,dark hero
2,65,353,dark hero
3,65,521,noir thriller
4,65,592,dark hero


In [57]:
movie_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [58]:
merged_movie = movie_data.merge(tags, on='movieId', how='inner')
merged_movie

Unnamed: 0,movieId,title,genres,userId,tag
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1644,Watched
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1741,computer animation
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1741,Disney animated feature
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1741,Pixar animation
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1741,TÃ©a Leoni does not star in this movie
...,...,...,...,...,...
465543,131258,The Pirates (2014),Adventure,28906,bandits
465544,131258,The Pirates (2014),Adventure,28906,Korea
465545,131258,The Pirates (2014),Adventure,28906,mutiny
465546,131258,The Pirates (2014),Adventure,28906,pirates


# Combine aggregation, merging and filters to get useful analytics

In [59]:
avg_ratings = ratings.groupby('movieId', as_index='False').mean()
del avg_ratings['userId']
avg_ratings.head()

Unnamed: 0_level_0,rating
movieId,Unnamed: 1_level_1
1,3.92124
2,3.211977
3,3.15104
4,2.861393
5,3.064592


In [60]:
box_office = movie_data.merge(avg_ratings, on='movieId', how='inner')
box_office.head()

Unnamed: 0,movieId,title,genres,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92124
1,2,Jumanji (1995),Adventure|Children|Fantasy,3.211977
2,3,Grumpier Old Men (1995),Comedy|Romance,3.15104
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,2.861393
4,5,Father of the Bride Part II (1995),Comedy,3.064592


In [61]:
is_highly_rated = box_office['rating'] >= 4.0
box_office[is_highly_rated][-5:]

Unnamed: 0,movieId,title,genres,rating
26737,131250,No More School (2000),Comedy,4.0
26738,131252,Forklift Driver Klaus: The First Day on the Jo...,Comedy|Horror,4.0
26739,131254,Kein Bund für's Leben (2007),Comedy,4.0
26740,131256,"Feuer, Eis & Dosenbier (2002)",Comedy,4.0
26743,131262,Innocence (2014),Adventure|Fantasy|Horror,4.0


In [62]:
is_comedy = box_office['genres'].str.contains('Comedy')
box_office[is_comedy][:5]

Unnamed: 0,movieId,title,genres,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.92124
2,3,Grumpier Old Men (1995),Comedy|Romance,3.15104
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,2.861393
4,5,Father of the Bride Part II (1995),Comedy,3.064592
6,7,Sabrina (1995),Comedy|Romance,3.366484


In [63]:
box_office[is_comedy & is_highly_rated][-5:]

Unnamed: 0,movieId,title,genres,rating
26736,131248,Brother Bear 2 (2006),Adventure|Animation|Children|Comedy|Fantasy,4.0
26737,131250,No More School (2000),Comedy,4.0
26738,131252,Forklift Driver Klaus: The First Day on the Jo...,Comedy|Horror,4.0
26739,131254,Kein Bund für's Leben (2007),Comedy,4.0
26740,131256,"Feuer, Eis & Dosenbier (2002)",Comedy,4.0


# Vectorized String Operations

In [64]:
movie_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


### Split 'genres' into multiple columns

In [65]:
movie_genres = movie_data['genres'].str.split('|', expand=True)

In [66]:
movie_genres[:10]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Adventure,Animation,Children,Comedy,Fantasy,,,,,
1,Adventure,Children,Fantasy,,,,,,,
2,Comedy,Romance,,,,,,,,
3,Comedy,Drama,Romance,,,,,,,
4,Comedy,,,,,,,,,
5,Action,Crime,Thriller,,,,,,,
6,Comedy,Romance,,,,,,,,
7,Adventure,Children,,,,,,,,
8,Action,,,,,,,,,
9,Action,Adventure,Thriller,,,,,,,


### Add a new column for comedy genre flag

In [67]:
movie_genres['isComedy'] = movie_data['genres'].str.contains('Comedy')

In [68]:
movie_genres[:10]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,isComedy
0,Adventure,Animation,Children,Comedy,Fantasy,,,,,,True
1,Adventure,Children,Fantasy,,,,,,,,False
2,Comedy,Romance,,,,,,,,,True
3,Comedy,Drama,Romance,,,,,,,,True
4,Comedy,,,,,,,,,,True
5,Action,Crime,Thriller,,,,,,,,False
6,Comedy,Romance,,,,,,,,,True
7,Adventure,Children,,,,,,,,,False
8,Action,,,,,,,,,,False
9,Action,Adventure,Thriller,,,,,,,,False


### Extract year from title e.g (1995)

In [69]:
movie_data['year'] = movie_data['title'].str.extract(".*\((.*)\).*", expand=True)

In [70]:
movie_data.tail()

Unnamed: 0,movieId,title,genres,year
27273,131254,Kein Bund für's Leben (2007),Comedy,2007
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy,2002
27275,131258,The Pirates (2014),Adventure,2014
27276,131260,Rentun Ruusu (2001),(no genres listed),2001
27277,131262,Innocence (2014),Adventure|Fantasy|Horror,2014


# Parsing Timestamps

In [71]:
tags = pd.read_csv('./movielens/tags.csv', sep=',')

In [72]:
tags.dtypes

userId        int64
movieId       int64
tag          object
timestamp     int64
dtype: object

### Unix time/ POSIX time/ epoch time records time in seconds since midnight Coordinated Universal Time (UTC) of January 1, 1970 

In [73]:
tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,18,4141,Mark Waters,1240597180
1,65,208,dark hero,1368150078
2,65,353,dark hero,1368150079
3,65,521,noir thriller,1368149983
4,65,592,dark hero,1368150078


In [74]:
tags['parsed_time'] = pd.to_datetime(tags['timestamp'], unit='s')

In [75]:
tags.head()

Unnamed: 0,userId,movieId,tag,timestamp,parsed_time
0,18,4141,Mark Waters,1240597180,2009-04-24 18:19:40
1,65,208,dark hero,1368150078,2013-05-10 01:41:18
2,65,353,dark hero,1368150079,2013-05-10 01:41:19
3,65,521,noir thriller,1368149983,2013-05-10 01:39:43
4,65,592,dark hero,1368150078,2013-05-10 01:41:18


In [76]:
tags['parsed_time'].dtypes

dtype('<M8[ns]')

### Selecting rows based on timestamps

In [77]:
greater_than_t = tags['parsed_time'] > '2015-02-01'
selected_rows = tags[greater_than_t]

In [78]:
selected_rows.head()

Unnamed: 0,userId,movieId,tag,timestamp,parsed_time
301,318,260,1970s,1424472169,2015-02-20 22:42:49
302,318,260,fantasy,1424472169,2015-02-20 22:42:49
303,318,260,sci-fi,1424472169,2015-02-20 22:42:49
304,318,115149,Action,1424534310,2015-02-21 15:58:30
305,318,115149,Revenge,1424534283,2015-02-21 15:58:03


In [79]:
tags.shape, selected_rows.shape

((465564, 5), (12130, 5))

### Sorting data using timestamps

In [80]:
tags.sort_values(by='parsed_time')[:10]

Unnamed: 0,userId,movieId,tag,timestamp,parsed_time
333932,100371,2788,monty python,1135429210,2005-12-24 13:00:10
333927,100371,1732,coen brothers,1135429236,2005-12-24 13:00:36
333924,100371,1206,stanley kubrick,1135429248,2005-12-24 13:00:48
333923,100371,1193,jack nicholson,1135429371,2005-12-24 13:02:51
333939,100371,5004,peter sellers,1135429399,2005-12-24 13:03:19
333922,100371,47,morgan freeman,1135429412,2005-12-24 13:03:32
333921,100371,47,brad pitt,1135429412,2005-12-24 13:03:32
333936,100371,4011,brad pitt,1135429431,2005-12-24 13:03:51
333937,100371,4011,guy ritchie,1135429431,2005-12-24 13:03:51
333920,100371,32,bruce willis,1135429442,2005-12-24 13:04:02


In [81]:
selected_rows.sort_values(by='parsed_time')[:10]

Unnamed: 0,userId,movieId,tag,timestamp,parsed_time
431961,130446,113565,christianity,1422749043,2015-02-01 00:04:03
431964,130446,113565,cult,1422749051,2015-02-01 00:04:11
431965,130446,113565,drama,1422749058,2015-02-01 00:04:18
431966,130446,113565,fake documentary,1422749066,2015-02-01 00:04:26
431967,130446,113565,found footage,1422749072,2015-02-01 00:04:32
431958,130446,113565,born-again,1422749081,2015-02-01 00:04:41
431970,130446,113565,Jonestown,1422749090,2015-02-01 00:04:50
431969,130446,113565,horror,1422749095,2015-02-01 00:04:55
431963,130446,113565,community,1422749110,2015-02-01 00:05:10
431962,130446,113565,closed community,1422749129,2015-02-01 00:05:29


# Average movie ratings over time

### Are movie ratings related to the year of launch

In [82]:
average_rating = ratings[['movieId', 'rating']].groupby('movieId', as_index=False).mean()
average_rating.tail()

Unnamed: 0,movieId,rating
26739,131254,4.0
26740,131256,4.0
26741,131258,2.5
26742,131260,3.0
26743,131262,4.0


In [83]:
joined = movie_data.merge(average_rating, on='movieId', how='inner')
joined.head()
joined.corr()

Unnamed: 0,movieId,rating
movieId,1.0,-0.090369
rating,-0.090369,1.0


In [84]:
joined.head()

Unnamed: 0,movieId,title,genres,year,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995,3.92124
1,2,Jumanji (1995),Adventure|Children|Fantasy,1995,3.211977
2,3,Grumpier Old Men (1995),Comedy|Romance,1995,3.15104
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,1995,2.861393
4,5,Father of the Bride Part II (1995),Comedy,1995,3.064592


In [85]:
yearly_average = joined[['year', 'rating']].groupby('year', as_index=False).mean()
yearly_average[:10]

Unnamed: 0,year,rating
0,1891,3.0
1,1893,3.375
2,1894,3.071429
3,1895,3.125
4,1896,3.183036
5,1898,3.85
6,1899,3.625
7,1900,3.166667
8,1901,5.0
9,1902,3.738189


In [86]:
d = {'one' : pd.Series([100.,200.], index=['apple','orange']),
    'two' : pd.Series([111.,211], index=['apple','orange'])}
df = pd.DataFrame(d)

In [87]:
df

Unnamed: 0,one,two
apple,100.0,111.0
orange,200.0,211.0


In [88]:
rating_mask = joined.rating  > 4
year_mask = joined.year > '2010'

In [89]:
test_joined = joined[rating_mask & year_mask]
test_joined = test_joined.reset_index()
test_joined

Unnamed: 0,index,movieId,title,genres,year,rating
0,18140,91007,I Want to Be a Soldier (2011),Drama,2011,4.500000
1,18268,91529,"Dark Knight Rises, The (2012)",Action|Adventure|Crime|IMAX,2012,4.000210
2,18484,92259,Intouchables (2011),Comedy|Drama,2011,4.132396
3,18550,92535,Louis C.K.: Live at the Beacon Theater (2011),Comedy,2011,4.062349
4,18939,94466,Black Mirror (2011),Drama|Sci-Fi,2011,4.182990
...,...,...,...,...,...,...
133,26544,130219,The Dark Knight (2011),Action|Crime|Drama|Thriller,2011,4.500000
134,26547,130294,Drevo (2014),Drama,2014,4.500000
135,26561,130374,India's Daughter (2015),Documentary,2015,4.500000
136,26604,130524,Flying Home (2014),Romance,2014,4.500000


In [90]:
import numpy as np
titles = np.array(test_joined.title)
for index, title in np.ndenumerate(titles):
    test_joined.loc[index, 'hashtags'] = "#" + title.split("(")[0].replace(" ", "")
    

In [91]:
test_joined

Unnamed: 0,index,movieId,title,genres,year,rating,hashtags
0,18140,91007,I Want to Be a Soldier (2011),Drama,2011,4.500000,#IWanttoBeaSoldier
1,18268,91529,"Dark Knight Rises, The (2012)",Action|Adventure|Crime|IMAX,2012,4.000210,"#DarkKnightRises,The"
2,18484,92259,Intouchables (2011),Comedy|Drama,2011,4.132396,#Intouchables
3,18550,92535,Louis C.K.: Live at the Beacon Theater (2011),Comedy,2011,4.062349,#LouisC.K.:LiveattheBeaconTheater
4,18939,94466,Black Mirror (2011),Drama|Sci-Fi,2011,4.182990,#BlackMirror
...,...,...,...,...,...,...,...
133,26544,130219,The Dark Knight (2011),Action|Crime|Drama|Thriller,2011,4.500000,#TheDarkKnight
134,26547,130294,Drevo (2014),Drama,2014,4.500000,#Drevo
135,26561,130374,India's Daughter (2015),Documentary,2015,4.500000,#India'sDaughter
136,26604,130524,Flying Home (2014),Romance,2014,4.500000,#FlyingHome


In [92]:
movie_str = "Toy Story (1995)"

In [93]:
movie_str.split("(")[0]

'Toy Story '

# Twitter API access

In [108]:
# Dependencies

import pickle
import os
from pprint import pprint
import config
import time

In [95]:
if not os.path.exists('secret_twitter_credentials.pkl'):
    Twitter={}
    Twitter['Consumer Key'] = config.consumer_key
    Twitter['Consumer Secret'] = config.consumer_secret
    Twitter['Access Token'] = config.access_token
    Twitter['Access Token Secret'] = config.access_token_secret
    with open('secret_twitter_credentials.pkl','wb') as f:
        pickle.dump(Twitter, f)
else:
    Twitter=pickle.load(open('secret_twitter_credentials.pkl','rb'))

In [96]:
import twitter

auth = twitter.oauth.OAuth(Twitter['Access Token'],
                           Twitter['Access Token Secret'],
                           Twitter['Consumer Key'],
                           Twitter['Consumer Secret'])

twitter_api = twitter.Twitter(auth=auth)

### Where on Earth ID number.. this helps to find location based on ID


In [97]:
WORLD_WOE_ID = 1
US_WOE_ID = 23424977
LOCAL_WOE_ID=2357024 # Atlanta WOEID

In [98]:
world_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)
us_trends = twitter_api.trends.place(_id=US_WOE_ID)
local_trends = twitter_api.trends.place(_id=LOCAL_WOE_ID)

In [121]:
def twitter_pull():
    status_list = []
    for hashtag in test_joined.hashtags:
        time.sleep(5)
        statuses={}
        topic = hashtag
        number=1
        search_results = twitter_api.search.tweets(q=topic, count=number)
        statuses["user_name"] = search_results['statuses'][0]['user']['screen_name']
        statuses["comment"] = search_results['statuses'][0]['text']
        status_list.append(dict(statuses))
    return status_list

In [122]:
twitter_pull()

IndexError: list index out of range

In [120]:
statuses[0]['user']['screen_name']

'regos_istvan'

In [166]:
hashtags = np.array(test_joined.hashtags)
try:
    for hashtag in np.nditer(hashtags):    
        statuses={}
        topic = hashtag
        number=1
        search_results = twitter_api.search.tweets(q=topic, count=number)
        screen_name = search_results["statuses"][0]['user']['screen_name']
except: 
    print('Something went wrong')

Something went wrong


In [160]:
topic = test_joined.hashtags[2].split(',')[0]
search_results = twitter_api.search.tweets(q=topic, count=number)

In [161]:
search_results["statuses"][0]['user']['screen_name']

'regos_istvan'

In [162]:
print(hashtags)

['#IWanttoBeaSoldier' '#DarkKnightRises,The' '#Intouchables'
 '#LouisC.K.:LiveattheBeaconTheater' '#BlackMirror' '#Samsara'
 '#UnderAfricanSkies' '#Hunt,The' '#TwoRabbits' '#Guilty'
 '#TerryPratchett:ChoosingtoDie' "#ThisAin'tCalifornia"
 '#Starrystarrynight' '#ChronicleofMyMother' '#DjangoUnchained'
 '#EyeoftheStorm,The' '#SunKissed' '#K-11' '#SimpleLife,A' '#TepeninArdi'
 '#DayIsDone' '#FrozenPlanet' '#Abendland'
 '#ShiningNight:APortraitofComposerMortenLauridsen' '#Smitty'
 '#Shepard&Dark' '#AmericanWinter' '#DeadManandBeingHappy,The'
 '#DonosdePortugal' '#HolidaysbytheSea' '#StarsinShorts' '#OnlytheYoung'
 '#LouisC.K.:OhMyGod' '#MissYouCanDoIt' '#SomethingRealandGood'
 '#TT3D:ClosertotheEdge' '#Motivation,The' '#StoryofLuke,The'
 '#CrimsonPetalandtheWhite,The' '#Linotype:TheFilm'
 '#McKennaShootsfortheStars' '#OneSmallHitch' '#B-Side' '#OnlyDaughter'
 '#Mía' '#Precision:TheMeasureofAllThings'
 '#ShockandAwe:TheStoryofElectricity' '#StuckBetweenStations' '#ReelRock7'
 '#BadFucking' 

In [159]:
for hashtag in np.nditer(hashtags):
    print(hashtag)

TypeError: Iterator operand or requested dtype holds references, but the REFS_OK flag was not enabled

In [157]:
x

NameError: name 'x' is not defined