In [None]:
import datetime
import sys
import pandas as pd
import numpy as np
import re
from datetime import date
today = date.today()

from textblob import TextBlob
from nltk.corpus import stopwords
from collections import Counter
import nltk
#nltk.download('stopwords')


## Sets up yaml for Twitter credentials 

In [None]:
import yaml

config = dict(
    search_tweets_api = dict(
        account_type = 'premium',
        endpoint = 'https://api.twitter.com/1.1/tweets/search/fullarchive/label.json',
        consumer_key = '',
        consumer_secret = ''
    )
)

with open('twitter_keys_fullarchive.yaml', 'w') as config_file:
    yaml.dump(config, config_file, default_flow_style=False)

## Loads credentials 

In [None]:
from searchtweets import load_credentials

premium_search_args = load_credentials("twitter_keys_fullarchive.yaml",
                                       yaml_key="search_tweets_api",
                                       env_overwrite=False)
print(premium_search_args)

## Establishes payload rule

In [None]:
from searchtweets import gen_rule_payload
hashtag = "#stockmarket OR #federalreserve"
rule = gen_rule_payload(hashtag, 
                        results_per_call=100,
                        from_date="2019-09-25 07:15",
                        to_date="2019-12-04 23:11"
                       )

## Combines credentials and payload rule 


In [None]:
from searchtweets import ResultStream

rs = ResultStream(rule_payload=rule,
                  max_results=5000,
                  **premium_search_args)
print(rs)

## Connects with API and loads Tweets into .jsonl file for distribution

In [None]:
import json

with open('twitter_premium_api_demo.jsonl', 'a', encoding='utf-8') as f:
    n = 0
    for tweet in rs.stream():
        n += 1
        if n % 100 == 0:
            print('{0}: {1}'.format(str(n), tweet['created_at']))
        json.dump(tweet, f)
        f.write('\n')
print('done')

## Opens .jsonl file and loads all tweet data into a dataframe 

In [None]:
json_df = pd.DataFrame()

with open('twitter_premium_api_demo.jsonl', 'r') as json_file:
    json_list = list(json_file)

from pandas.io.json import json_normalize
for json_str in json_list:
    result = json_normalize(json.loads(json_str))
    json_df = json_df.append(result)


## Creates new df for full texts from all tweet types and created date 

In [None]:
new_df = json_df[['retweeted_status.retweeted',
                  'retweeted_status.extended_tweet.full_text',  
                  'extended_tweet.full_text', 
                  'created_at']].copy()

new_df['final_tweet'] = np.where(new_df['retweeted_status.retweeted'] == False, new_df['retweeted_status.extended_tweet.full_text'], new_df['extended_tweet.full_text'])

## Defines functions for date time strip 

In [None]:
def date_strip(col):
    dt_object1 = datetime.datetime.strptime(col, "%a %b %d %H:%M:%S %z %Y")
    return dt_object1

def actual_date(col):
    match = re.search('\d{4}-\d{2}-\d{2}',str(col))
    test = datetime.datetime.strptime(match.group(), '%Y-%m-%d').date()
    return test
    
    
new_df['date_time'] = new_df['created_at'].apply(date_strip).copy()

new_df['final_date'] = new_df['date_time'].apply(actual_date).copy()
