# Read metadata database

From s3://multimedia-commons/tools/etc/yfcc100m_dataset.sql

In [8]:
import sqlite3
import pandas as pd

In [2]:
db_path = '/mnt/efs/yfcc100m_dataset.sql'

In [3]:
con = sqlite3.connect(db_path)
cur = con.cursor()

## Schema

In [4]:
# https://stackoverflow.com/a/41007154/11262633
newline_indent = '\n   '
con.text_factory = str

result = cur.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
table_names = sorted(list(zip(*result))[0])
print ("\ntables are:"+newline_indent+newline_indent.join(table_names))

for table_name in table_names:
    result = cur.execute("PRAGMA table_info('%s')" % table_name).fetchall()
    column_names = list(zip(*result))[1]
    print (("\ncolumn names for %s:" % table_name)
           +newline_indent
           +(newline_indent.join(column_names)))

print ("\nComplete")


tables are:
   yfcc100m_dataset

column names for yfcc100m_dataset:
   photoid
   uid
   unickname
   datetaken
   dateuploaded
   capturedevice
   title
   description
   usertags
   machinetags
   longitude
   latitude
   accuracy
   pageurl
   downloadurl
   licensename
   licenseurl
   serverid
   farmid
   secret
   secretoriginal
   ext
   marker

Complete


## Sample data

In [10]:
df = pd.read_sql_query('select * from yfcc100m_dataset limit 10', con)
df.iloc[:10]

Unnamed: 0,photoid,uid,unickname,datetaken,dateuploaded,capturedevice,title,description,usertags,machinetags,...,pageurl,downloadurl,licensename,licenseurl,serverid,farmid,secret,secretoriginal,ext,marker
0,28605,34427465634@N01,Emily,2004-04-30 23:33:51.0,1083393231,,C%26H+ascii,,,,...,http://www.flickr.com/photos/34427465634@N01/2...,http://farm1.staticflickr.com/1/28605_d138ef7b...,Attribution-NonCommercial-ShareAlike License,http://creativecommons.org/licenses/by-nc-sa/2.0/,1,1,d138ef7b45,d138ef7b45,jpg,0
1,29060,34427466731@N01,striatic,2004-05-02 17:12:01.0,1083543121,,Christmas+Panorama+%7Bnotes%7D,a+link+to+a+much+higher+resolution+version%2C+...,"alberta,canada,christmas,edmonton,family,indoo...",,...,http://www.flickr.com/photos/34427466731@N01/2...,http://farm1.staticflickr.com/1/29060_e90c2a0c...,Attribution License,http://creativecommons.org/licenses/by/2.0/,1,1,e90c2a0c1c,e90c2a0c1c,jpg,0
2,29205,34427469121@N01,George,2004-05-03 10:16:04.0,1083604564,,La+Luna,From+outside+my+window.,"moon,vankie",,...,http://www.flickr.com/photos/34427469121@N01/2...,http://farm1.staticflickr.com/1/29205_64d31038...,Attribution-NonCommercial-NoDerivs License,http://creativecommons.org/licenses/by-nc-nd/2.0/,1,1,64d31038b1,64d31038b1,jpg,0
3,29209,34427469121@N01,George,2004-05-03 10:16:06.0,1083604566,,From+the+entry,This+is+what+you+see+-+with+no+furniture.,apartment,,...,http://www.flickr.com/photos/34427469121@N01/2...,http://farm1.staticflickr.com/1/29209_6d55fad2...,Attribution-NonCommercial-NoDerivs License,http://creativecommons.org/licenses/by-nc-nd/2.0/,1,1,6d55fad203,6d55fad203,jpg,0
4,29294,37912375204@N01,Schlomo+Rabinowitz,2004-05-01 16:49:37.0,1083625746,SONY+DSC-V1,detail,detail+of+painting+by+Romanowski+and+friends,"abstract,graffiti,painting,romanowski,urban",,...,http://www.flickr.com/photos/37912375204@N01/2...,http://farm1.staticflickr.com/1/29294_a0fc568d...,Attribution-NonCommercial-NoDerivs License,http://creativecommons.org/licenses/by-nc-nd/2.0/,1,1,a0fc568df3,a0fc568df3,jpg,0
5,29604,37912375204@N01,Schlomo+Rabinowitz,2004-04-23 15:51:18.0,1083731910,SONY+DSC-V1,meandwayne,on+his+rockstar+tourbus,"friend,me,mirror,notes,schlomo,unfound,wayne",,...,http://www.flickr.com/photos/37912375204@N01/2...,http://farm1.staticflickr.com/1/29604_878d5a7d...,Attribution-NonCommercial-NoDerivs License,http://creativecommons.org/licenses/by-nc-nd/2.0/,1,1,878d5a7ddd,878d5a7ddd,jpg,0
6,29812,35034347309@N01,Josh+Mishell,2004-05-05 16:38:20.0,1083800300,,butterfly3,butterfy+pavilion%2C+denver.,"butterfly,photos+i+took",,...,http://www.flickr.com/photos/35034347309@N01/2...,http://farm1.staticflickr.com/1/29812_a4b0eac0...,Attribution-NonCommercial-NoDerivs License,http://creativecommons.org/licenses/by-nc-nd/2.0/,1,1,a4b0eac007,a4b0eac007,jpg,0
7,29814,35034347309@N01,Josh+Mishell,2004-05-05 16:38:21.0,1083800301,,butterfly5,butterfy+pavilion%2C+denver.,"butterfly,photos+i+took",,...,http://www.flickr.com/photos/35034347309@N01/2...,http://farm1.staticflickr.com/1/29814_04e328d9...,Attribution-NonCommercial-NoDerivs License,http://creativecommons.org/licenses/by-nc-nd/2.0/,1,1,04e328d9a0,04e328d9a0,jpg,0
8,29857,34427465504@N01,Trinity,2004-05-05 20:01:24.0,1083812484,,ninja+bee,%3Ca+href%3D%22http%3A%2F%2Fwww.burningprose.c...,2004,,...,http://www.flickr.com/photos/34427465504@N01/2...,http://farm1.staticflickr.com/1/29857_67ac7fa6...,Attribution-ShareAlike License,http://creativecommons.org/licenses/by-sa/2.0/,1,1,67ac7fa6e4,67ac7fa6e4,jpg,0
9,29872,34427465504@N01,Trinity,2004-05-05 21:30:05.0,1083817805,,bruise+5%2F2%2F4,I+got+this+bruise+at+work...wonder+if+I+can+ge...,"2004,bruise,me,photo,unfound",,...,http://www.flickr.com/photos/34427465504@N01/2...,http://farm1.staticflickr.com/1/29872_c787b1b0...,Attribution-ShareAlike License,http://creativecommons.org/licenses/by-sa/2.0/,1,1,c787b1b056,c787b1b056,jpg,0


## # Rows in table

In [None]:
result = cur.execute('select count(*) from yfcc100m_dataset').fetchall()
print(result)