In [1]:
import sqlite3
import random
import itertools
import subprocess
import os
import shlex

In [2]:
# path on rte
db_path = "/home/rte/data/db/arxiv_db_images.sqlite3"

In [3]:
# Here we import the sqlite3 database and create a cursor

db = sqlite3.connect(db_path)
c = db.cursor()

In [4]:
# test that we can fetch the pragma for each table

c.execute('PRAGMA TABLE_INFO({})'.format("metadata"))
info = c.fetchall()

print("\nColumn Info:\nID, Name, Type, NotNull, DefaultVal, PrimaryKey")
for col in info:
    print(col)


Column Info:
ID, Name, Type, NotNull, DefaultVal, PrimaryKey
(0, 'id', 'INTEGER', 0, None, 1)
(1, 'identifier', 'TEXT', 0, None, 0)
(2, 'created', 'TEXT', 0, None, 0)
(3, 'cat', 'TEXT', 0, None, 0)
(4, 'authors', 'TEXT', 0, None, 0)
(5, 'title', 'TEXT', 0, None, 0)
(6, 'abstract', 'TEXT', 0, None, 0)
(7, 'licence', 'TEXT', 0, None, 0)


In [5]:
c.execute('PRAGMA TABLE_INFO({})'.format("images"))
info = c.fetchall()

print("\nColumn Info:\nID, Name, Type, NotNull, DefaultVal, PrimaryKey")
for col in info:
    print(col)


Column Info:
ID, Name, Type, NotNull, DefaultVal, PrimaryKey
(0, 'id', 'INTEGER', 0, None, 1)
(1, 'identifier', 'TEXT', 0, None, 0)
(2, 'filename', 'TEXT', 0, None, 0)
(3, 'filesize', 'INT', 0, None, 0)
(4, 'path', 'TEXT', 0, None, 0)
(5, 'x', 'INT', 0, None, 0)
(6, 'y', 'INT', 0, None, 0)
(7, 'imageformat', 'TEXT', 0, None, 0)


In [6]:
# Get total number of images per primary category only

targetDate = "2018-10-01"

c.execute('''
    SELECT substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1), count(images.identifier)
    FROM images
    LEFT JOIN metadata ON images.identifier = metadata.identifier
    WHERE metadata.created BETWEEN date(?) 
    AND date(?, "start of month","+1 month","-1 day")
    AND images.x != ''
    GROUP BY substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1)
    ORDER BY count(images.identifier) DESC    
    ''', (targetDate, targetDate,))
categories = c.fetchall()
for row in categories:
    print(row)

('cs.CV', 14622)
('cs.LG', 11040)
('astro-ph.GA', 6779)
('hep-ph', 5509)
('math.NA', 4213)
('astro-ph.HE', 3694)
('stat.ML', 3504)
('astro-ph.SR', 3378)
('quant-ph', 3351)
('cs.RO', 2594)
('math.OC', 2589)
('astro-ph.CO', 2437)
('hep-th', 2188)
('physics.flu-dyn', 2061)
('cs.CL', 2040)
('cond-mat.mes-hall', 2017)
('stat.ME', 1971)
('cond-mat.str-el', 1889)
('astro-ph.EP', 1736)
('cond-mat.mtrl-sci', 1604)
('eess.SP', 1592)
('gr-qc', 1577)
('cs.CR', 1572)
('physics.comp-ph', 1548)
('astro-ph.IM', 1377)
('cs.IT', 1346)
('cs.NI', 1337)
('nucl-th', 1312)
('cs.AI', 1265)
('cs.DC', 1239)
('stat.AP', 1229)
('hep-ex', 1189)
('math.GT', 1162)
('eess.IV', 1160)
('cond-mat.stat-mech', 1146)
('cs.GR', 1072)
('cs.SY', 1033)
('cond-mat.soft', 976)
('cond-mat.quant-gas', 809)
('cs.CG', 799)
('cs.IR', 784)
('cs.DS', 756)
('physics.ins-det', 756)
('physics.optics', 735)
('cs.DB', 722)
('hep-lat', 697)
('cs.SI', 676)
('physics.soc-ph', 672)
('math.AP', 654)
('math.ST', 648)
('physics.chem-ph', 625)
('ma

In [7]:
print(len(categories))

148


In [8]:
print(categories[0][0])

cs.CV


In [9]:
# remove entries with less than 144 total images

catlist = []

for cat in categories:
    if cat[1] >= 144:
        catlist.append(cat)
        print(cat)

print('-' * 20)
print("total number of categories with required entries: ")
print(len(catlist))

('cs.CV', 14622)
('cs.LG', 11040)
('astro-ph.GA', 6779)
('hep-ph', 5509)
('math.NA', 4213)
('astro-ph.HE', 3694)
('stat.ML', 3504)
('astro-ph.SR', 3378)
('quant-ph', 3351)
('cs.RO', 2594)
('math.OC', 2589)
('astro-ph.CO', 2437)
('hep-th', 2188)
('physics.flu-dyn', 2061)
('cs.CL', 2040)
('cond-mat.mes-hall', 2017)
('stat.ME', 1971)
('cond-mat.str-el', 1889)
('astro-ph.EP', 1736)
('cond-mat.mtrl-sci', 1604)
('eess.SP', 1592)
('gr-qc', 1577)
('cs.CR', 1572)
('physics.comp-ph', 1548)
('astro-ph.IM', 1377)
('cs.IT', 1346)
('cs.NI', 1337)
('nucl-th', 1312)
('cs.AI', 1265)
('cs.DC', 1239)
('stat.AP', 1229)
('hep-ex', 1189)
('math.GT', 1162)
('eess.IV', 1160)
('cond-mat.stat-mech', 1146)
('cs.GR', 1072)
('cs.SY', 1033)
('cond-mat.soft', 976)
('cond-mat.quant-gas', 809)
('cs.CG', 799)
('cs.IR', 784)
('cs.DS', 756)
('physics.ins-det', 756)
('physics.optics', 735)
('cs.DB', 722)
('hep-lat', 697)
('cs.SI', 676)
('physics.soc-ph', 672)
('math.AP', 654)
('math.ST', 648)
('physics.chem-ph', 625)
('ma

In [10]:
print(catlist[0][0])

cs.CV


In [None]:
# Get each image entry with a particular category in a given month

sql = ("SELECT metadata.cat, images.path, images.filename, images.identifier, metadata.created, images.id "
    "FROM images "
    "LEFT JOIN metadata ON images.identifier = metadata.identifier "
    "WHERE metadata.created BETWEEN date('2018-10-01') "
    "AND date('2018-10-31') "
    "AND images.x != '' "
    "AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ? ")

c.execute(sql, (catlist[0][0],))
rows = c.fetchall()

In [41]:
# Get each image entry with a particular category in a given month
# Specifically, get all images from cs.CV from 2012

sql = ("SELECT metadata.authors, metadata.title, metadata.created, metadata.identifier  "
    "FROM images "
    "LEFT JOIN metadata ON images.identifier = metadata.identifier "
    "WHERE metadata.created BETWEEN date('2012-01-01') "
    "AND date('2012-12-31') "
    "AND images.x != '' "
    "AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ? ")

c.execute(sql, ('cs.CV',))
rows = c.fetchall()

In [11]:
# Get each image entry with a particular category in a given month
# Specifically, get all images from stat.ML from 2012

sql = ("SELECT metadata.authors, metadata.title, metadata.created, metadata.identifier  "
    "FROM images "
    "LEFT JOIN metadata ON images.identifier = metadata.identifier "
    "WHERE metadata.created BETWEEN date('2012-01-01') "
    "AND date('2012-12-31') "
    "AND images.x != '' "
    "AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ? ")

c.execute(sql, ('stat.ML',))
rows = c.fetchall()

In [12]:
print(len(rows))

2889


In [13]:
for row in rows[:100]:
    print(row)

("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Susemihl, Alex; ']", 'Temporal Autoencoding Restricted Boltzmann Machine', '2012-10-31', '1210.8353')
("['Häusler, Chris; Suse

In [44]:
without_duplicates = []
for row in rows[:]:
    if row not in without_duplicates:
        without_duplicates.append(row)

In [45]:
print(len(without_duplicates))
for row in without_duplicates[:]:
    print(row)

199
("['Hashemi, Jordan; Spina, Thiago Vallin; Tepper, Mariano; Esler, Amy; Morellas, Vassilios; Papanikolopoulos, Nikolaos; Sapiro, Guillermo; ']", 'Computer vision tools for the non-invasive assessment of autism-related\n  behavioral markers', '2012-10-25', '1210.7014')
("['Bagon, Shai; Galun, Meirav; ']", 'A Multiscale Framework for Challenging Discrete Optimization', '2012-10-26', '1210.7070')
("['Bagon, Shai; ']", 'Discrete Energy Minimization, beyond Submodularity: Applications and\n  Approximations', '2012-10-27', '1210.7362')
("['Rebagliati, Nicola; Solé-Ribalta, Albert; Pelillo, Marcello; Serratosa, Francesc; ']", 'On the Relation Between the Common Labelling and the Median Graph', '2012-10-31', '1210.8262')
("['Sarhrouni, Elkebir; Hammouch, Ahmed; Aboutajdine, Driss; ']", 'Dimensionality Reduction and Classification Feature Using Mutual\n  Information Applied to Hyperspectral Images: A Wrapper Strategy Algorithm\n  Based on Minimizing the Error Probability Using the Inequalit

In [46]:
rows = without_duplicates

In [33]:
# TESTING ONLY

# Get each image entry with a particular category in a given month
# Specifically, get all images from cs.CV from 2012

sql = ("SELECT metadata.authors, metadata.title, metadata.created, metadata.identifier  "
    "FROM metadata "
    "WHERE metadata.created BETWEEN date('2012-01-01') "
    "AND date('2012-12-31') "
    "AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ? ")

c.execute(sql, ('cs.CV',))
rows = c.fetchall()

In [40]:
print(len(rows))

422


In [52]:
for row in rows[:100]:
    print(row)

(("['Hashemi, Jordan; Spina, Thiago Vallin; Tepper, Mariano; Esler, Amy; Morellas, Vassilios; Papanikolopoulos, Nikolaos; Sapiro, Guillermo; ']", 'Computer vision tools for the non-invasive assessment of autism-related\n  behavioral markers', '2012-10-25', '1210.7014'),)
(("['Bagon, Shai; Galun, Meirav; ']", 'A Multiscale Framework for Challenging Discrete Optimization', '2012-10-26', '1210.7070'),)
(("['Bagon, Shai; ']", 'Discrete Energy Minimization, beyond Submodularity: Applications and\n  Approximations', '2012-10-27', '1210.7362'),)
(("['Rebagliati, Nicola; Solé-Ribalta, Albert; Pelillo, Marcello; Serratosa, Francesc; ']", 'On the Relation Between the Common Labelling and the Median Graph', '2012-10-31', '1210.8262'),)
(("['Sarhrouni, Elkebir; Hammouch, Ahmed; Aboutajdine, Driss; ']", 'Dimensionality Reduction and Classification Feature Using Mutual\n  Information Applied to Hyperspectral Images: A Wrapper Strategy Algorithm\n  Based on Minimizing the Error Probability Using the 

In [51]:
# convert to tuples
rows = [(x,) for x in rows[:]]

### Get accreditations formatted

In [53]:
# go through all of the retrieved SQL rows and format as an accreditation
# if html is True, format with <a href=####> for web usage

import re

accreditations = []

# html = True
html = False

# grab the details and check each
for row in rows:
    s = row[0][0]
    start = "['"
    end = "']"
    author = s[s.find(start)+len(start):s.rfind(end)]
    author = author[:-2]
    print(author)
    
    title = row[0][1]
    # replace line breaks and double spaces
    title = title.replace("\n", "").replace("  "," ")
    print(title)
    
    date = row[0][2].split("-")[0]
    print(date)
    
    identifier = row[0][3]
    print(identifier)
#     reg_exp = re.compile("/[^\d]\d{2}[^\d]/")
    longest_digits = max(re.findall(r'\d+', identifier), key = len)
    print(len(longest_digits))
    
    # if the identifier contains seven consecutive numbers, add a slash
    if len(longest_digits) == 7:
        print("----- regex match -----")
        reverse = identifier[::-1]
        print(reverse)
        identifier_reverse = reverse[:7] + "/" + reverse[7:]
        identifier = identifier_reverse[::-1]
    else:
        # otherwise we can leave the identifier how it is
        print("----- no match -----")
    print(identifier)
    url = "https://arxiv.org/abs/" + identifier
    print(url)
#     print(row[0][0])

    # format string and append
    if html:
        fmt_str = '{}: {}, {}, <a href="{}">{}</a>'
        accreditations.append(fmt_str.format(author, title, str(date), url, url))  
    else:
        fmt_str = '{}: {}, {}, {}'
        accreditations.append(fmt_str.format(author, title, str(date), url, url))    

    # harder way of doing it
#     accreditations.append([author + ': "' + title + '", ' + str(date) + ", " + '<a href="' + url + '">' + url + '</a>'])

Hashemi, Jordan; Spina, Thiago Vallin; Tepper, Mariano; Esler, Amy; Morellas, Vassilios; Papanikolopoulos, Nikolaos; Sapiro, Guillermo
Computer vision tools for the non-invasive assessment of autism-related behavioral markers
2012
1210.7014
4
----- no match -----
1210.7014
https://arxiv.org/abs/1210.7014
Bagon, Shai; Galun, Meirav
A Multiscale Framework for Challenging Discrete Optimization
2012
1210.7070
4
----- no match -----
1210.7070
https://arxiv.org/abs/1210.7070
Bagon, Shai
Discrete Energy Minimization, beyond Submodularity: Applications and Approximations
2012
1210.7362
4
----- no match -----
1210.7362
https://arxiv.org/abs/1210.7362
Rebagliati, Nicola; Solé-Ribalta, Albert; Pelillo, Marcello; Serratosa, Francesc
On the Relation Between the Common Labelling and the Median Graph
2012
1210.8262
4
----- no match -----
1210.8262
https://arxiv.org/abs/1210.8262
Sarhrouni, Elkebir; Hammouch, Ahmed; Aboutajdine, Driss
Dimensionality Reduction and Classification Feature Using Mutual In

In [54]:
# print out with line breaks
for row in accreditations:
    print(row)

Hashemi, Jordan; Spina, Thiago Vallin; Tepper, Mariano; Esler, Amy; Morellas, Vassilios; Papanikolopoulos, Nikolaos; Sapiro, Guillermo: Computer vision tools for the non-invasive assessment of autism-related behavioral markers, 2012, https://arxiv.org/abs/1210.7014
Bagon, Shai; Galun, Meirav: A Multiscale Framework for Challenging Discrete Optimization, 2012, https://arxiv.org/abs/1210.7070
Bagon, Shai: Discrete Energy Minimization, beyond Submodularity: Applications and Approximations, 2012, https://arxiv.org/abs/1210.7362
Rebagliati, Nicola; Solé-Ribalta, Albert; Pelillo, Marcello; Serratosa, Francesc: On the Relation Between the Common Labelling and the Median Graph, 2012, https://arxiv.org/abs/1210.8262
Sarhrouni, Elkebir; Hammouch, Ahmed; Aboutajdine, Driss: Dimensionality Reduction and Classification Feature Using Mutual Information Applied to Hyperspectral Images: A Wrapper Strategy Algorithm Based on Minimizing the Error Probability Using the Inequality of Fano, 2012, https://a

### Get random images

In [None]:
# Get 144 random images

sql = ("SELECT metadata.cat, images.path, images.filename, images.identifier, metadata.created, images.id "
    "FROM images "
    "LEFT JOIN metadata ON images.identifier = metadata.identifier "
    "WHERE images.x != '' "
    "AND images.id IN (SELECT images.id FROM images ORDER BY RANDOM() LIMIT 144) ")

c.execute(sql, )
rows = c.fetchall()

In [None]:
# Get 144 images from stat.ML

# maybe move this???

sql = ("SELECT metadata.cat, images.path, images.filename, images.identifier, metadata.created, images.id "
    "FROM images "
    "LEFT JOIN metadata ON images.identifier = metadata.identifier "
    "WHERE images.x != '' "
    "AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ? "
    "AND images.id IN (SELECT images.id FROM images ORDER BY RANDOM() LIMIT 144) ")

c.execute(sql, ("stat.ML", ))
rows = c.fetchall()

In [14]:
# Get 144 images from stat.ML from October 2012

# maybe move this???

sql = ("SELECT metadata.cat, images.path, images.filename, images.identifier, metadata.created, images.id "
    "FROM images "
    "LEFT JOIN metadata ON images.identifier = metadata.identifier "
    "WHERE images.x != '' "
    "AND metadata.created BETWEEN date('2012-10-01') "
    "AND date('2012-10-31') "
    "AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ? "
    "AND images.id IN (SELECT images.id FROM images ORDER BY RANDOM() LIMIT 144) ")

c.execute(sql, ("stat.ML", ))
rows = c.fetchall()

In [None]:
# Get 16 random images

sql = ("SELECT metadata.cat, images.path, images.filename, images.identifier, metadata.created, images.id "
    "FROM images "
    "LEFT JOIN metadata ON images.identifier = metadata.identifier "
    "WHERE images.x != '' "
    "AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ? "
    "ORDER BY RANDOM() LIMIT 144 ")

c.execute(sql, ("stat.ML", ))
rows = c.fetchall()

In [15]:
print(len(rows))

0


In [16]:
for row in rows:
    print(row)

In [17]:
# check to see if there are multiple categories
for row in rows:
    if len(row[0]) > 5:
        print(row)

### Shuffle list and then grab the filepath and id

In [None]:
# shuffle first, then get ids and paths

random.shuffle(rows)

In [None]:
ids = []
filepaths = []

for row in rows[:144]:
    print(row[5])
    ids.append(row[5])
    path = row[1] + '/' + row[2]
    print(path)
    filepaths.append(path)

In [None]:
print(filepaths)

In [None]:
print(ids)

### Used for generating figures for paper

In [None]:
# get a montage of some images

# print the current working directory
os.chdir('/home/rte/re-imaging/sqlite-scripts/')
print(os.getcwd())

os.chdir('/home/rte/arXiv/')
print(os.getcwd())

prearg = shlex.split("-colorspace CMYK")
# arguments = shlex.split("-colorspace sRGB -background white -alpha background -trim +repage -flatten -geometry 240x240+2+2 -tile 12x /home/rte/documentation/data-samples/test_py_montage.jpg")

# for bigger montage of 12x12
arguments = shlex.split("-colorspace sRGB -background white -alpha background -geometry 240x240+2+2 -tile 12x")
# for smaller montage of 4x4
# arguments = shlex.split("-colorspace sRGB -background white -alpha background -geometry 480x480+2+2 -tile 4x")

outputname = ["/home/rte/documentation/data-samples/random_montage_12x12_stat.ML.jpg"]
# print(arguments)

filelist = []
# this takes a slice of the larger shuffled list
for filepath in itertools.islice(filepaths, 0, 144):
#     print(filepath)
    # put the filepath into the list but add the directory, remove the dot
    # and also add [0] to only use the first page of multi-page image documents
    filelist.append("src_all" + filepath.replace('./','/') + '[0]')
           
# print(filelist)

# call the montage command and parse list of files and arguments
montage_cmd = ["montage"] + prearg + filelist + arguments + outputname

result = subprocess.Popen(montage_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = result.communicate()
print(out)
print(err)
print("subprocess finished")
print("-" * 40)


In [None]:
# use this to copy to other files, paste as variable
print(ids)

In [None]:
# testing writing filenames

# os.chdir("/home/rte/re-imaging/sqlite-scripts/")
os.chdir("/home/rte/documentation/data-samples/")
print(os.getcwd())

# fname = "2018-10_" + catlist[0][0] + "_ids.txt"
fname = "random_montage_4x4_v2_ids.txt"
f = open(fname, "w+")
for row in ids:
    f.write(str(row) + "\n")
f.close()

In [None]:
for cat in catlist:
    print(cat[0])

In [None]:
print(len(filelist))

for row in filelist:
    print(row)

In [None]:
# testing writing filenames

fname = "2018-10_" + catlist[0][0] + ".txt"
f = open(fname, "w+")
for row in filelist:
    f.write(row + "\n")
f.close()

In [None]:
print(targetDate[:7])

In [None]:
targetYM = targetDate[:7]
savepath = "/home/rte/Documents/documentation/data-samples/montages/category/" + targetYM + "/"

if os.path.isdir(savepath):
    print("directory exists, saving to: " + savepath)
else:
    try:
        os.makedirs(savepath)
    except OSError:
        print("failed to create directory: " + savepath)
    else:
        print("successfully created the directory: " + savepath)

In [None]:
print(len(catlist))

for row in catlist:
    print(row[0])

The following block of code takes the list of categories, queries the SQL database for a list of images that match each category and a given year, then shuffles this list, selects a subset, and uses that to run the montage command ^_^ 

In [None]:
# added date formatting
sql = ('''
    SELECT metadata.cat, images.path, images.filename, images.identifier, metadata.created
    FROM images
    LEFT JOIN metadata ON images.identifier = metadata.identifier
    WHERE metadata.created BETWEEN date(?) AND date(?, 'start of month','+1 month','-1 day')
    AND images.x != ''
    AND substr(trim(metadata.cat),1,instr(trim(metadata.cat)||' ',' ')-1) = ?
    ''')

targetYM = targetDate[:7]
savepath = "/home/rte/Documents/documentation/data-samples/montages/category/" + targetYM + "/"

if os.path.isdir(savepath):
    print("saving to: " + savepath)
else:
    try:
        os.makedirs(path)
    except OSError:
        print("Failed to create directory: " + savepath)
    else:
        print("Successfully created the directory: " + savepath)
        

for cat in catlist:
    print("querying for category: " + str(cat[0]))
    c.execute(sql, (targetDate, targetDate, cat[0], ))
    rows = c.fetchall()
    
    print("total number of images found: " + str(len(rows)))
    
    filepaths = []

    for row in rows:
        path = row[1] + '/' + row[2]
    #     print(path)
        filepaths.append(path)

    # shuffle the whole list
    random.shuffle(filepaths)
    
    os.chdir('/home/rte/re-imaging/sqlite-scripts/')

    # os.chdir('src_all')
#     print(os.getcwd())

    # format the arguments for montage
    arguments = shlex.split("-colorspace sRGB -units PixelsPerInch -density 300 -background white -alpha off -geometry 240x240+2+2 -tile 12x")

    filelist = []
    # this takes a slice of the larger shuffled list
    for filepath in itertools.islice(filepaths, 0, 144):
        # put the filepath into the list but add the directory, remove the dot
        # and also add [0] to only use the first page of multi-page image documents
        filelist.append("src_all" + filepath.replace('./','/') + '[0]')

    # write list of images to file (for debugging purposes, mostly)
    fname = savepath + targetYM + "_" + cat[0] + "_" + str(cat[1]) + ".txt"
    f = open(fname, "w+")
    for row in filelist:
        f.write(row + "\n")
    f.close()
    
#     outputname = "test_py_montage.jpg"
    outputname = [savepath + "montage_" + targetYM + "_" + cat[0] + "_" + str(cat[1]) + ".jpg"]

    print("calling montage")
    # call the montage command and parse list of files and arguments
    montage_cmd = ["montage"] + filelist + arguments + outputname
#     print(montage_cmd)
    result = subprocess.Popen(montage_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = result.communicate()
    print(out)
    print(err)
    print("subprocess finished")
    print("-" * 40)

In [None]:
# make list of categories
# make list of dates (year/month?)
# iterate through with a new sqlite select command
# run montage