In [30]:
import twitter, json, glob
import pandas as pd

from datetime import datetime

### 1. Authorization

- **Step 1** Save your credential as following in a json file
- **Step 1** Name the json file "creds.json" and save it in the directory this jupyter notebook is located.

```
{'consumer_key':        CONSUMER_KEY,
 'consumer_secret':     CONSUMER_SECRET,
 'access_token_key':    ACCESS_TOKEN,
 'access_token_secret': ACCESS_TOKEN_SECRET}```

In [31]:
creds_file = open('./creds.json', 'r')
twitter_keys = json.loads(creds_file.read())

In [32]:
api = twitter.Api(consumer_key         =   twitter_keys['consumer_key'],
                  consumer_secret      =   twitter_keys['consumer_key_secret'],
                  access_token_key     =   twitter_keys['access_token'],
                  access_token_secret  =   twitter_keys['access_token_secret'])

### 2. Function: get_tweets()
- Search for tweets with `keyword` and `geo_list`
- Saves the result in a csv
- Note: Create a directory where this notebook is located and name it "geo_tweets".
____
**Parameters**
- `keyword`: *string* | keyword to search for
- `geo_list`: *list of strings* | *"latitude,longitude"* | list of geographic coordinates to search for

In [34]:
def get_tweets(keyword, geo_list):
    for i, geo in enumerate(geo_list):
        
        # Track progress
        prog = i +1
        
        # Search for tweets
        coor = geo
        tweets = api.GetSearch(term = keyword,
              geocode = coor)
        
        # Grab result from the search
        created_at = []

        user_id = []
        user_location = []
        user_name = []

        location_type = []
        location = []
        coordinate = []

        hashtags = [] 
        text = []

        for tweet in tweets:
            td = tweet.__dict__
            created_at.append(td["created_at"])
            hashtags.append([hg.text for hg in td["hashtags"]])
            user_id.append(td["id"])
            location_type.append(td["place"]["place_type"])
            location.append(td["place"]["full_name"])
            coordinate.append(td["place"]["bounding_box"]["coordinates"])
            user_location.append(td["user"].location)
            user_name.append(td["user"].screen_name)
            text.append(td["text"])

        # Save the result in a dataframe
        keys = ["created_at", "user_id", "user_location", "user_name",
                "location_type", "location", "coordinate",
                "hashtags", "text"]
        values = [created_at, user_id, user_location, user_name,
                  location_type, location, coordinate,
                  hashtags, text]
        out_df = {}

        for key, value in zip(keys, values):
            out_df[key] = value

        # Save the dataframe as csv
        result = pd.DataFrame(out_df)
        now = datetime.now()
        result.to_csv("./geo_tweets/"+keyword+"_"+str(prog)+now.strftime("_%m%d%y")+".csv", index=False)
        
        # Notify the progress
        print(f"{prog}/{len(geo_list)} finished")
        
        # Wait 5 seconds to avoid reaching rate limit
        time.sleep(5)

### 3. Test run

In [56]:
# Geographical coordinate example
geo_list = ["34.086139,-118.480466", "35.086139,-119.480466"]

In [37]:
get_tweets("fire", geo_list)

1/2 finished
2/2 finished


### 4. Function: merge_by_keyword()
- Merge all csv files generated from **get_tweets()** by keyword.
- Save the merged files into a dataframe.
- if `keyword` is True, it saves the dataframe in a csv file.
- Note: Create a directory in 'geo_tweets' and name it 'merged'.
---
**Parameters**
- `keyword`: *string* | keyword to search for
- `save_csv`: *boolean* | if True, it saves the 


In [49]:
def merge_by_keyword(keyword, save_csv = False):
    datas = glob.glob("./geo_tweets/"+keyword+"*")
    merged_df = pd.concat([pd.read_csv(data) for data in datas])
    if save_csv:
        now = datetime.now()
        merged_df.to_csv("./geo_tweets/merged/"+keyword+now.strftime("_%m%d%y")+".csv", index=False)
    return merged_df

### 5. Test run

In [53]:
df = merge_by_keyword("fire", save_csv= True)

In [54]:
df

Unnamed: 0,created_at,user_id,user_location,user_name,location_type,location,coordinate,hashtags,text
0,Mon Oct 28 19:17:07 +0000 2019,1188897501278900224,Los Angeles,abc7robhayes,neighborhood,"Brentwood, Los Angeles","[[[-118.528736, 34.041356], [-118.457499, 34.0...",[],Before/After the Getty Fire. \n\nThis is 1510 ...
1,Wed Oct 30 07:50:37 +0000 2019,1189449512961495040,"Los Angeles, CA",jintakhan,poi,Mount St. Mary's College Campus Center,"[[[-118.48168318584761, 34.08396202249944], [-...",[],Up at Mount St. Mary’s University. The wind is...
2,Tue Oct 29 16:31:26 +0000 2019,1189218195116158976,santa monica . maine . boston,runawaykat,neighborhood,"Brentwood, Los Angeles","[[[-118.528736, 34.041356], [-118.457499, 34.0...",[],"los angeles renters, i just asked my leasing o..."


In [55]:
pd.read_csv("./geo_tweets/merged/fire_110119.csv")

Unnamed: 0,created_at,user_id,user_location,user_name,location_type,location,coordinate,hashtags,text
0,Mon Oct 28 19:17:07 +0000 2019,1188897501278900224,Los Angeles,abc7robhayes,neighborhood,"Brentwood, Los Angeles","[[[-118.528736, 34.041356], [-118.457499, 34.0...",[],Before/After the Getty Fire. \n\nThis is 1510 ...
1,Wed Oct 30 07:50:37 +0000 2019,1189449512961495040,"Los Angeles, CA",jintakhan,poi,Mount St. Mary's College Campus Center,"[[[-118.48168318584761, 34.08396202249944], [-...",[],Up at Mount St. Mary’s University. The wind is...
2,Tue Oct 29 16:31:26 +0000 2019,1189218195116158976,santa monica . maine . boston,runawaykat,neighborhood,"Brentwood, Los Angeles","[[[-118.528736, 34.041356], [-118.457499, 34.0...",[],"los angeles renters, i just asked my leasing o..."
