# Getting tweets about Korean Dramas
A few years ago, I started watching Korean dramas. While I have never posted about them online, I was interested in analyzing different tweets with the hashtag "#kdrama" and #netflix. [Twitter Search API](https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/overview)

In [None]:
import json
import tweepy
import emojis
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
consumer_key = ''xxxxxxxxxxxxx''
consumer_secret =''xxxxxxxxxxxxx''
access_token = ''xxxxxxxxxxxxx''
access_token_secret =''xxxxxxxxxxxxx''

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

#The search term you want to find
query = '%23netflix%20(%23kdrama%20OR%20%23koreandrama%20OR%20%23kdramas)'

#Calling the user_timeline function with our parameters
results_1 = api.search(q = query, result_type = 'recent', count = 100, lang = 'en', include_entities = True)
results_2 = api.search(q = query, result_type = 'recent', count = 100, lang = 'en', include_entities = True, max_id = 1355164636417409035)
results_3 = api.search(q = query, result_type = 'recent', count = 100, lang = 'en', include_entities = True, max_id = 1354550627926482944)
results_4 = api.search(q = query, result_type = 'recent', count = 100, lang = 'en', include_entities = True, max_id = 1354174810851577865)
results_5 = api.search(q = query, result_type = 'recent', count = 100, lang = 'en', include_entities = True, max_id = 1354043302891319298)

In [None]:
def json_to_dict(result_set):
    tweets = []
    for index in range(0, len(result_set)):
        status = result_set[index]
        json_str = json.dumps(status._json)
        tweet_dict = json.loads(json_str)
        tweets.append(tweet_dict)
    return tweets

In [None]:
tweets_dict_1 = json_to_dict(results_1)
tweets_dict_2 = json_to_dict(results_2)
tweets_dict_3 = json_to_dict(results_3)
tweets_dict_4 = json_to_dict(results_4)
tweets_dict_5 = json_to_dict(results_5)

In [None]:
def get_tweets_information(tweets_dict):
    
    new_tweets_dict = {'date_created': [],'tweet_text': [], 
                       'tweet_id': [], 'hashtags': [], 
                       'user_id': [], 'location': [], 
                       'retweeted': [],'lang': [],
                       'retweeted_text': [],'retweeted_hashtags': []}

    for tweet in tweets_dict:

        date_created = tweet['created_at']
        new_tweets_dict['date_created'].append(date_created)

        tweet_id = tweet['id']
        new_tweets_dict['tweet_id'].append(tweet_id)

        tweet_text = tweet['text']
        new_tweets_dict['tweet_text'].append(tweet_text)

        #Getting the hashtags
        hashtags_list = []
        for hashtag in tweet['entities']['hashtags']:
            hashtag = hashtag['text']
            hashtags_list.append(hashtag)
        new_tweets_dict['hashtags'].append(hashtags_list)

        user_id = tweet['user']['id']
        new_tweets_dict['user_id'].append(user_id)

        location = tweet['user']['location']
        new_tweets_dict['location'].append(location)
        

        lang =  tweet['lang']
        new_tweets_dict['lang'].append(lang)
        
        if 'retweeted_status' in tweet:
            retweet_text = tweet['retweeted_status']['text']
            new_tweets_dict['retweeted'].append(True)
            new_tweets_dict['retweeted_text'].append(retweet_text)
            
            retweet_hashtags_list = []
            for text in tweet['retweeted_status']['entities']['hashtags']:
                hashtag = text['text']
                retweet_hashtags_list.append(hashtag)
            new_tweets_dict['retweeted_hashtags'].append(retweet_hashtags_list)
        
        else:
            new_tweets_dict['retweeted'].append(False)
            new_tweets_dict['retweeted_text'].append(np.nan)
            new_tweets_dict['retweeted_hashtags'].append(np.nan)
            
    return new_tweets_dict

In [None]:
new_tweets_dict = get_tweets_information(tweets_dict)
df = pd.DataFrame.from_dict(new_tweets_dict)
new_tweets_dict_2 = get_tweets_information(tweets_dict_2)
df2 = pd.DataFrame.from_dict(new_tweets_dict_2)
new_tweets_dict_3 = get_tweets_information(tweets_dict_3)
df3 = pd.DataFrame.from_dict(new_tweets_dict_3)
new_tweets_dict_4 = get_tweets_information(tweets_dict_4)
df4 = pd.DataFrame.from_dict(new_tweets_dict_4)
new_tweets_dict_5 = get_tweets_information(tweets_dict_5)
df5 = pd.DataFrame.from_dict(new_tweets_dict_5)

In [None]:
df = pd.concat([df, df2, df3, df4, df5], axis = 'index', ignore_index = True)
df.to_csv('kdrama_tweets', index = False)