# Data Analysis
## Tweet data

In [41]:
import sqlite3
import pandas as pd
from afinn import Afinn
import json

# Fetch data from db
db = sqlite3.connect('../tweets.db')
c = db.cursor()

In [42]:
c.execute("SELECT * FROM tweets")

rows = c.fetchall()

tweets = pd.DataFrame(columns=['id', 'date', 'json', 'filter'], data=rows)
tweets['date'] = pd.to_datetime(tweets['date'],format='%Y-%m-%d %H:%M:%S')

In [43]:
afinn = Afinn()

def tweet_text(data):
    if 'retweeted_status' in json.loads(data):
        text = json.loads(data)['retweeted_status']['text']
    else:
        text = json.loads(data)['text']
    return text

def sentiment_score(data):
    return afinn.score(tweet_text(data))



In [44]:
tweets['sentiment_score'] = tweets['json'].apply(lambda x: sentiment_score(x))
tweets['text'] = tweets['json'].apply(lambda x: tweet_text(x))

In [45]:
pd.options.display.max_colwidth = 144
tweets[['sentiment_score','text']][tweets['sentiment_score']<0]


Unnamed: 0,sentiment_score,text
1,-2.0,TimeToken $TTK Airdrop #1 has arrived! We are hosting a crazy $25 airdrop! \nYou will only receive $5 for filling ou‚Ä¶ https://t.co/S73I80oOBh
3,-1.0,#crypto mkt cap winners last 2 hours\n\n$BTC $OMG $DGD $XLM $DASH $ETC $LSK $BTS \nLowest fees in trading‚Ä¶ https://t.co/ZGilOWgpWB
13,-4.0,"$BTC Update: Retested bear trend resistance at $10.8-10.9k, seems to have rejected but BTC has a habit of faking mo‚Ä¶ https://t.co/wjLV3SWh7p"
17,-2.0,U can still get 14000 https://t.co/n2lwMcjQMo for free = no risk. The bridge is aiming to be completed by 8th Marc‚Ä¶ https://t.co/gv7SQq6zd0
36,-2.0,"dont miss out on registering on Binance, before they close registrations again\n\n https://t.co/fxrPmg6hbe\n\n $BTC‚Ä¶ https://t.co/aEhPtVGS9s"
43,-7.0,It is tempting to cut your losses and go all-in on one coin to regain what is lost. Just for a short term. What do‚Ä¶ https://t.co/sT2PeC6YOZ
46,-1.0,Block: 511295 \nSize: 1060.98 kb \nFee: 2.23$/kb \nPrice: 10807.1$ | 68385.71¬• \n#Bitcoin $BTC #BTC $XBT #XBT‚Ä¶ https://t.co/HvjQppH8XQ
53,-1.0,"‚ÄúBitcoin, for us, is not stopping at buying and selling,‚Äù Square CEO Jack Dorsey said. $BTC $SQ https://t.co/FhLKfKOz9C"
65,-4.0,"Craig Wright, who claimed that he created #SatoshiNakamot is being sued for stealing $5 billion in #bitcoin from a‚Ä¶ https://t.co/OuOH0nDZ09"
66,-2.0,"If this is your first time experience of a $BTC correction, make sure you note down all the mistakes you did, inspe‚Ä¶ https://t.co/UEbZLAD9nj"


## Processed Data with different time resolution

In [9]:
c.execute("SELECT * FROM vart_10min")

rows = c.fetchall()
varT= pd.DataFrame(columns=['date', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13'], data=rows)

In [10]:
varT.tail()

Unnamed: 0,date,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13
84,2018-02-28 08:30:00,0,0,0,0,0,0,0,0,0,0,0,0,0
85,2018-02-28 08:40:00,108,15,5,58,49,12,2,45,26,11,71,15,43
86,2018-02-28 08:50:00,167,18,18,67,55,17,4,82,45,16,106,26,51
87,2018-02-28 09:00:00,197,31,16,65,48,4,1,112,56,28,113,30,48
88,2018-02-28 09:10:00,111,17,16,53,35,18,8,57,44,10,57,26,27
