In [102]:
import numpy as np
import pandas as pd
import sys

# tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

# pymongo
import config
import ppp
import pymongo
from pymongo import MongoClient
import dnspython
import urllib.parse
import regex
import re

In [2]:
# Twitter API
access_token = config.access_token
access_token_secret = config.access_token_secret
consumer_key = config.consumer_key
consumer_secret = config.consumer_secret

## SQLITE

In [27]:
import sqlite3

In [28]:
conn = sqlite3.connect('test.db')
c = conn.cursor()

In [29]:
c.execute("create table coffee (main_id varchar(40), id varchar(20), name varchar(50), followers integer null, following integer null)")

<sqlite3.Cursor at 0x7f125d4397a0>

## JSON to SQLite

In [30]:
import json
with open("data/test-out (2)", "r") as f1:
    for line in f1:
        try:
            data = json.loads(line)
            c.execute("insert into coffee values (?, ?, ?, ?, ?)",
                      [data['id_str'], data['user']['id'], data['user']['screen_name'], data['user']['followers_count'], data['user']['friends_count']])
        except:
            continue

In [156]:
cursor = conn.execute("select name from coffee where main_id == '1253319701473587209'")
rows = cursor.fetchall()

In [157]:
for row in rows:
    print(row)

('kristiavalenzu1',)


## JSON to MongoDB

In [64]:
# setting mongodb config
username = urllib.parse.quote_plus(ppp.username)
password = urllib.parse.quote_plus(ppp.password)
cluster = MongoClient('mongodb+srv://%s:%s@cluster0-tlu5n.mongodb.net/test?retryWrites=true&w=majority' % (username, password))

In [65]:
# defining cluster and collection
db = cluster["coffee_twitter"]
collection = db["tweets"]

In [11]:
# reading json and pushing tweet contents to MongoDB database
with open("data/test-out (2)", "r") as f1:
    for line in f1:
        try:
            data = json.loads(line)
            idx = data['id_str']
            full_tweet = data['retweeted_status']['extended_tweet']['full_text']
            lang = data['lang']
            retweeted = data['retweeted']
            reply_count = data['reply_count']
            retweet_count = data['retweet_count']
            
            post = {"id": idx, "full_tweet": full_tweet, "lang": lang, "retweeted": retweeted, "reply_count": reply_count,
                    "retweet_count": retweet_count}
            
            collection.insert_one(post)
        except:
            continue

## Find Information

In [6]:
# query
results = collection.find({"id": "1253317430220337152"})
print(results)

<pymongo.cursor.Cursor object at 0x7f125d3b2550>


In [7]:
for r in results:
    print(r['full_tweet'] + "\n***")

Man, this quarantine has changed a lot.  My eating is better, my alcohol intake is at all-time lows as of a week or so ago (see: none).  BUT... my black coffee intake is at an all-time high.
***


In [50]:
# query text
results = collection.find( { "$text": { "$search": "starbucks" } } )
print(results)

<pymongo.cursor.Cursor object at 0x7f125d337668>


In [46]:
# query exact phrase
results = collection.find( { "$text": { "$search": "\"black rifle\"" } } )
print(results)

<pymongo.cursor.Cursor object at 0x7f125d337588>


In [52]:
# query term exclusion
results = collection.find( { "$text": { "$search": "-black sugar" } } )
print(results)

<pymongo.cursor.Cursor object at 0x7f125d335128>


In [38]:
# query
results = collection.find({"lang": "ja"})
print(results)

<pymongo.cursor.Cursor object at 0x7f125d337198>


In [53]:
for r in results:
    print(r['full_tweet'] + "\n***")

I typically brew my coffee with milk and sugar all at the same time lol. Btw I will drink all of this alone in about an hour or 2 https://t.co/Q9xn6A2qKN
***
I typically brew my coffee with milk and sugar all at the same time lol. Btw I will drink all of this alone in about an hour or 2 https://t.co/Q9xn6A2qKN
***
hey guys, so if you add milk/creamer/sugar/etc to coffee do they count as condiments because you add them to cover to give it flavour? if so, is coffee also a condiment because you add it to water to give it flavour?
***
hey guys, so if you add milk/creamer/sugar/etc to coffee do they count as condiments because you add them to cover to give it flavour? if so, is coffee also a condiment because you add it to water to give it flavour?
***
hey guys, so if you add milk/creamer/sugar/etc to coffee do they count as condiments because you add them to cover to give it flavour? if so, is coffee also a condiment because you add it to water to give it flavour?
***
hey guys, so if you a

In [74]:
# extract start with "hey" regex
results = collection.find( { "full_tweet": { "$regex": "^hey" } } )
print(results)

<pymongo.cursor.Cursor object at 0x7f1254304438>


In [75]:
for r in results:
    print(r['full_tweet'])

hey guys, so if you add milk/creamer/sugar/etc to coffee do they count as condiments because you add them to cover to give it flavour? if so, is coffee also a condiment because you add it to water to give it flavour?
hey guys, so if you add milk/creamer/sugar/etc to coffee do they count as condiments because you add them to cover to give it flavour? if so, is coffee also a condiment because you add it to water to give it flavour?
hey guys, so if you add milk/creamer/sugar/etc to coffee do they count as condiments because you add them to cover to give it flavour? if so, is coffee also a condiment because you add it to water to give it flavour?
hey guys, so if you add milk/creamer/sugar/etc to coffee do they count as condiments because you add them to cover to give it flavour? if so, is coffee also a condiment because you add it to water to give it flavour?


In [149]:
def find_tweet_hashtags(hasht = None, user = None, find = 'hash'):
    if hasht is not None:
        look = "((#" + hasht + "))"
    if user is not None:
        look = "((@" + user + "))"
    results = collection.find( { "full_tweet": { "$regex": look } } )
    
    for r in results:
        hashtag = r['full_tweet']
        if find == 'hash':
            x = re.findall(r"(#\w+)", hashtag)
        if find == 'user':
            x = re.findall(r"(@\w+)", hashtag)
        if find == 'full_tweet':
            x = hashtag
        print(x)


In [150]:
find_tweet_hashtags(hasht = "dalgona", find = 'hash')

['#dalgonacoffeechallenge', '#Dalgonacoffee', '#FluffyCoffee', '#coffee', '#whippedcoffee', '#Louisiana', '#Acadiana', '#Lafayette']
['#dalgonacoffeechallenge', '#Dalgonacoffee', '#FluffyCoffee', '#coffee', '#whippedcoffee', '#Louisiana', '#Acadiana', '#Lafayette']
['#dalgonacoffeechallenge', '#Dalgonacoffee', '#FluffyCoffee', '#coffee', '#whippedcoffee', '#Louisiana', '#Acadiana', '#Lafayette']
['#dalgonacoffeechallenge', '#Dalgonacoffee', '#FluffyCoffee', '#coffee', '#whippedcoffee', '#Louisiana', '#Acadiana', '#Lafayette']


In [151]:
find_tweet_hashtags(user = "coffee", find = 'user')

['@lumberjack', '@coffeebreak']
['@lumberjack', '@coffeebreak']


In [152]:
find_tweet_hashtags(hasht = "java", find = 'full_tweet')

What do you believe to be the number one requirement to be a successful developer? 🤔

i am doubt full about the answer being mostly coffee🙄😂☕

#100DaysOfCode #html #IoT #CodeNewbie #javascript #react #301DaysOfCode #GameDev
What do you believe to be the number one requirement to be a successful developer? 🤔

i am doubt full about the answer being mostly coffee🙄😂☕

#100DaysOfCode #html #IoT #CodeNewbie #javascript #react #301DaysOfCode #GameDev
What do you believe to be the number one requirement to be a successful developer? 🤔

i am doubt full about the answer being mostly coffee🙄😂☕

#100DaysOfCode #html #IoT #CodeNewbie #javascript #react #301DaysOfCode #GameDev
What do you believe to be the number one requirement to be a successful developer? 🤔

i am doubt full about the answer being mostly coffee🙄😂☕

#100DaysOfCode #html #IoT #CodeNewbie #javascript #react #301DaysOfCode #GameDev
What do you believe to be the number one requirement to be a successful developer? 🤔

i am doubt full ab

### Mongo + SQLite function

In [206]:
def find_by_word(word = None, tweet = None):

    results = collection.find({ "$text": { "$search": word } })
    
    idx = []
    lang = []
    user_id = []
    full_text = []
    for r in results:
        idx.append(r['id'])
        lang.append(r['lang'])
        full_text.append(r['full_tweet'])
        
        #sql
        cursor = conn.execute("select name from coffee where main_id == {}".format(r['id']))
        rows = cursor.fetchall()
        
        for row in rows:
            user_id.append(row)
    
    for i, val in enumerate((idx)):
        print(idx[i], ":::", lang[i], ":::", user_id[i])
        if tweet is not None:
            print(":::", full_text[i], "***\n")

In [207]:
find_by_word(word = 'chai')

1253338334488756224 ::: in ::: ('Hanniballad',)
1253337426048585729 ::: in ::: ('Hanniballad',)
1253336524176756738 ::: in ::: ('Mbusih',)
1253336073985380357 ::: in ::: ('Mbusih',)
1253335602537193474 ::: in ::: ('florswaggle',)
1253338334488756224 ::: in ::: ('florswaggle',)
1253337426048585729 ::: in ::: ('Ddaaante',)
1253336524176756738 ::: in ::: ('Ddaaante',)
1253336073985380357 ::: in ::: ('AishaWanjiku_',)
1253335602537193474 ::: in ::: ('AishaWanjiku_',)
1253327849966104576 ::: in ::: ('Hanniballad',)
1253327849966104576 ::: in ::: ('Hanniballad',)


In [208]:
find_by_word(word = 'java', tweet = True)

1253330996243558407 ::: en ::: ('bigevilbeard',)
::: How about some java with your java this morning? ☕️ Learn to code, get inspired and connect with #softwaredevelopers through @CiscoDevNet: https://t.co/3CAKsrR64Y #CiscoCert https://t.co/oiMoKQcLut ***

1253330996243558407 ::: en ::: ('bigevilbeard',)
::: How about some java with your java this morning? ☕️ Learn to code, get inspired and connect with #softwaredevelopers through @CiscoDevNet: https://t.co/3CAKsrR64Y #CiscoCert https://t.co/oiMoKQcLut ***

1253317504069500929 ::: en ::: ('bigevilbeard',)
::: UPCOMING "BEERS"
FRI: Flashlight Mango Hard Seltzer
MON: Flashlight Key Lime Hard Seltzer w/ Lemon &amp; Coconut
Next FRI? Terra Java Coffee Baltic Porter
2 weeks out: Scioto Coast Double IPA
In the Tank: Southern Terminus Mexican Lager
In the Tank: Beta Flash NE IPA (Batch #23) https://t.co/ysHztgMocB ***



In [153]:
# query text
results = collection.find( { "$text": { "$search": "starbucks" } } )
for r in results:
    print(r['id'])

1253335854606503943
1253335854606503943
1253331701201113089
1253331701201113089
1253323123706884096
1253323123706884096
1253325778881675265
1253325778881675265
1253320864377929729
1253320864377929729
1253337848524050433
1253335803125620739
1253337848524050433
1253335803125620739
1253335545041707008
1253335545041707008
1253330401579331591
1253330180686282759
1253330401579331591
1253330180686282759
1253331613762457602
1253331613762457602
1253331572398280712
1253331572398280712
1253322273508880386
1253322273508880386
1253338461597097990
1253335014982344704
1253338461597097990
1253334095867703302
1253331695056515075
1253335014982344704
1253334095867703302
1253331695056515075
1253325805490339841
1253325805490339841
1253322116679659521
1253321836361850880
1253320164222722049
1253322116679659521
1253321836361850880
1253320164222722049
1253331508061855744
1253331508061855744
1253321796750721024
1253321796750721024


In [210]:
def find_user_lang(lang = None):
    # query
    results = collection.find({"lang": lang})
    
    idx = []
    full_tweet = []
    user_id = []
    for r in results:
        full_tweet.append(r['full_tweet'])
        idx.append(r['id'])
        
        #sql
        cursor = conn.execute("select name from coffee where main_id == {}".format(r['id']))
        rows = cursor.fetchall()
        
        for row in rows:
            user_id.append(row)
            
    for i, val in enumerate(idx):
        print(user_id[i], "->  ", full_tweet[i])

In [211]:
find_user_lang(lang = 'ja')

('jdabc0507s2',) ->   SAZA COFFEEさんとのコラボButterfly Blendを差し入れし、さっそく試飲しているところを盗撮、…いや、物陰から撮影Ꙭ

なにを撮りたかったかというと、素敵なスウェットで美味しいコーヒーを入れるイメージ動画です🎞

TKによるオンラインコーヒーをどうぞ…！☕️ https://t.co/0HBA9BN2ED
('KAN55805962',) ->   今日もフォリオにおります♪

#ソメイティ #someity #オリンピック #olympic #クーマーイーツ #kumaeats #カフェフォリオ #caffeefoglio #ラテアート #latteart #代官山 #daikanyama #coffee #cafe #cafelatte #cuppccino #art #picture #illustlation #japan #漫画 #animation #アニメ #tokyo #movie https://t.co/4h5zkgzqWs
('KAN55805962',) ->   SAZA COFFEEさんとのコラボButterfly Blendを差し入れし、さっそく試飲しているところを盗撮、…いや、物陰から撮影Ꙭ

なにを撮りたかったかというと、素敵なスウェットで美味しいコーヒーを入れるイメージ動画です🎞

TKによるオンラインコーヒーをどうぞ…！☕️ https://t.co/0HBA9BN2ED
('Sayo_490_Ling',) ->   Dalgona Coffee it is this week! 
You guys requested no blender🤦🏻‍♂️
So...yeah let’s see if my arm can take that lol
이번주는 트렌디한 달고나 커피를 만들어보겠습니다!^^
今週は話題になっているダルゴナコーヒーを作ってみまーす！

#KEVINSKITCHEN 👨‍🍳
TWITTER + IG LIVE 
SATURDAY 12pm KST https://t.co/r3kxCVOLHe
('ACbelivev',) ->   Dalgona Coffee it is this week! 
You guys requested n