# Data Analysis
## Tweet data

In [1]:
import sqlite3
import pandas as pd
from afinn import Afinn
import json

import sys
sys.path.insert(0, "../src")
import tjson

# Fetch data from db
db = sqlite3.connect('../tweets.db')
c = db.cursor()

In [2]:
c.execute("SELECT * FROM tweets")

rows = c.fetchall()

tweets = pd.DataFrame(columns=['id', 'date', 'json', 'filter'], data=rows)
tweets['date'] = pd.to_datetime(tweets['date'],format='%Y-%m-%d %H:%M:%S')

In [3]:
afinn = Afinn()
tweets['sentiment_score'] = tweets['json'].apply(lambda x: afinn.score(tjson.tweet_text(x)))
tweets['text'] = tweets['json'].apply(lambda x: tjson.tweet_text(x))
tweets['retweet'] = tweets['json'].apply(lambda x: tjson.is_retweet(x))
tweets['symbols'] = tweets['json'].apply(lambda x: tjson.tweet_symbols(x))

In [4]:
pd.options.display.max_colwidth = 3000
tweets[['sentiment_score','text','retweet']][tweets['sentiment_score']>0].sample(5)


Unnamed: 0,sentiment_score,text,retweet
418,2.0,"RT @Aruwba: 💥FLASH SALE!💥\n Come join my uncensored snap, with fresh daily posts! \n🚨Get it now for ONLY $25 FOR LIFE🚨\n\n⬇️Click here⬇️ \nhttps…",True
582,4.0,#Free #Bedava\n\n3 #CZE Token #Airdrop\n\nWorth $15(Değeri 15$)\n\nhttps://t.co/H0bRtNrcKy\n\n#airdrop #cryptocurrency #Blockchain #Free #bitcoin #bitcoinCash #XRP #dash #Bytecoin #blackcoin #emercoin #ethereum #Reddcoin $ETH $XRP $BTC $DASH #ICO #BTC #TRX #bounty #XRP #LTC #Airdrops,False
77,5.0,"Looking for honest and profitable trading calls, recommendations and advice, join:\n\nhttps://t.co/yLJvy1pghP\n\n$BTC $ETH $ETC $BCH $LTC $XRP $DASH $XLM $XMR $ZEC $ADA $SYS $NEO $LUX $OMG $POWR $VTC $XEM $LSK $DGB $DOGE $XVG $VEN $ICX $ZCL $DRGN $WTC $IC $TRX $QTUM $LSK $ZRX 94907",False
140,3.0,Im giving 10x coin for hold now.\n\nhttps://t.co/1gZiI7TmBj\n\n$STORJ $ETH $BTC $BTS $ENRG $BLOCK $VTC $FCT $MTL $GAS $BAT $MAID $EFL $BURST $ARK $FUN $NXT $DCT $XAS $XEM $KMD $UBQ $BCH $CVC $GNT $XZC $PIVX $XVG $ZEC $ZCL \n472R1obsl8 https://t.co/2V1efky0R4,False
596,2.0,"RT @DAR_crypto: DAR Top Cryptocurrency News - Feb 27th, 2018 - https://t.co/QmsOjQ3KHN #Bitcoin $BTC #CircleFinancial #Poloniex @BoostVC…",True


In [5]:
tweets[['retweet','text','symbols','filter']].sample(5)

Unnamed: 0,retweet,text,symbols,filter
288,False,@money2020 can payment be made with #bitcoin? $btc #Money2020 #cryptocurrency @monaco_card,"[{'text': 'btc', 'indices': [46, 50]}]",$btc
473,False,5 min #RSI Signals:\n\n$USDT - $BTC: 24.2\n$USDT - $NXT: 24.77\n$USDT - $NEO: 25.64\n$BTC - $BCC: 26.49\n$USDT - $XVG: 27.45\n\n$XRP $AMP $OMG,"[{'text': 'USDT', 'indices': [21, 26]}, {'text': 'BTC', 'indices': [29, 33]}, {'text': 'USDT', 'indices': [40, 45]}, {'text': 'NXT', 'indices': [48, 52]}, {'text': 'USDT', 'indices': [60, 65]}, {'text': 'NEO', 'indices': [68, 72]}, {'text': 'BTC', 'indices': [80, 84]}, {'text': 'BCC', 'indices': [87, 91]}, {'text': 'USDT', 'indices': [99, 104]}, {'text': 'XVG', 'indices': [107, 111]}, {'text': 'XRP', 'indices': [120, 124]}, {'text': 'AMP', 'indices': [125, 129]}, {'text': 'OMG', 'indices': [130, 134]}]",$btc
370,False,"For people who are confused, @BittrexExchange will be supporting $ZCL $BTC airdrop for $BTCP. There wont be maket for $BTCP in Bittrex, bt you will be able to withdraw them to supported exchanges.\n\nSuch a confusing statement and that too in the 11th hour.\n\nhttps://t.co/CUbjkPcHOM https://t.co/8qFg8kmk5v","[{'text': 'ZCL', 'indices': [65, 69]}, {'text': 'BTC', 'indices': [70, 74]}, {'text': 'BTCP', 'indices': [87, 92]}]",$btc
523,True,"RT @NoSleepCrypto: Another $BTC H&amp;S chart for you all. A break of the neckline suggests a fib target of 15K, with a measured move target of…","[{'text': 'BTC', 'indices': [27, 31]}]",$btc
174,True,RT @TraderCobb: $BTC ### I AM VERY EXCITED TO ANNOUNCE ###....... The https://t.co/9LT8ldTQU0 website is ready for launch. All courses and…,"[{'text': 'BTC', 'indices': [16, 20]}]",$btc


## Processed Data with different time resolution

In [8]:
c.execute("SELECT * FROM vart_10min")

rows = c.fetchall()
varT= pd.DataFrame(columns=['date', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13'], data=rows)

In [9]:
varT.tail()

Unnamed: 0,date,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13
84,2018-02-28 08:30:00,0,0,0,0,0,0,0,0,0,0,0,0,0
85,2018-02-28 08:40:00,108,15,5,58,49,12,2,45,28,11,69,17,41
86,2018-02-28 08:50:00,167,18,18,67,55,17,4,82,46,16,105,27,50
87,2018-02-28 09:00:00,197,31,16,65,48,4,1,112,56,28,113,30,48
88,2018-02-28 09:10:00,111,17,16,53,35,18,8,57,44,10,57,26,27
