## This file contains the source code to generate paired charts.

In [1]:
import altair as alt
from altair_saver import save
import pydataset 
from vega_datasets import data
import itertools
import json
import random
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import os
import math
import toolz
import glob as glob
import copy

In [2]:
save_Folder = os.path.join('sampleFolder')

In [3]:
## Get dataset
dataset = data.cars()
datasetList = json.loads(dataset.to_json(orient = 'records'))
numFields = ['Miles_per_Gallon', 'Cylinders', 'Displacement', 'Horsepower', 'Weight_in_lbs', 'Acceleration']
dataset.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [4]:
## Init Vega Specifications
vSpec = json.loads('{"data": {"values": []}, "width": 800, "height": 300, "mark": "bar", "encoding": {"x": {"field": "name", "type": "ordinal", "scale": {"type": "band", "paddingInner": 0}, "axis":{}, "sort": null, "title": null}, "y": {"title": null, "field": "age", "type": "quantitative", "axis": {"grid": false}}}}')
vSpec

{'data': {'values': []},
 'width': 800,
 'height': 300,
 'mark': 'bar',
 'encoding': {'x': {'field': 'name',
   'type': 'ordinal',
   'scale': {'type': 'band', 'paddingInner': 0},
   'axis': {},
   'sort': None,
   'title': None},
  'y': {'title': None,
   'field': 'age',
   'type': 'quantitative',
   'axis': {'grid': False}}}}

In [5]:
## Parameters
Height = 400
BarRanges = list(range(5,6,1))
AspectRatioRanges = [x/Height for x in list(range(200,800,100))]
BandRanges = [b/100 for b in list(range(30,101,30))]
TextRotationRanges = [0, -45, -90]
OrientationRanges = [0, 1]  ## 0-column (vertical); 1-bar (horizontal)
MaxCharRanges = list(range(3,6,1))

## Compute the number of pairs > MinN that makes each value equally distributed
MinN = 20
LCM = np.lcm.reduce([len(BarRanges), len(AspectRatioRanges), len(BandRanges), \
                     len(TextRotationRanges), len(OrientationRanges), len(MaxCharRanges)])
N = math.ceil(MinN / LCM) * LCM
print('number of pairs needed: ', N)

number of pairs needed:  24


In [6]:
## Generate random comparison pairs
def tile2N(array, N):
    arr = np.tile(array, int(N / len(array)))
    np.random.shuffle(arr)
    return arr

bars = tile2N(BarRanges, N)
maxChar = tile2N(MaxCharRanges, N)

aspectRatio1 = tile2N(AspectRatioRanges, N)
bands1 = tile2N(BandRanges, N)
orientation1 = tile2N(OrientationRanges, N)
axisLabelRotation1 = tile2N(TextRotationRanges, N)

aspectRatio2 = tile2N(AspectRatioRanges, N)
bands2 = tile2N(BandRanges, N)
orientation2 = tile2N(OrientationRanges, N)
axisLabelRotation2 = tile2N(TextRotationRanges, N)

paraDF = pd.DataFrame(data={'NBar': bars, 'maxChar': maxChar, \
                            'aspectRatio1': aspectRatio1, 'band1': bands1, \
                            'orientation1': orientation1, 'axisLabelRotation1': axisLabelRotation1, \
                            'aspectRatio2': aspectRatio2, 'band2': bands2, \
                            'orientation2': orientation2, 'axisLabelRotation2': axisLabelRotation2})

paraDF.head()

Unnamed: 0,NBar,maxChar,aspectRatio1,band1,orientation1,axisLabelRotation1,aspectRatio2,band2,orientation2,axisLabelRotation2
0,5,5,0.5,0.3,1,-45,0.75,0.6,1,-90
1,5,3,0.75,0.6,0,-45,1.5,0.6,1,0
2,5,5,1.75,0.9,0,-90,0.5,0.6,1,-45
3,5,4,1.0,0.3,0,-90,1.0,0.3,1,-90
4,5,5,0.75,0.3,0,-45,1.0,0.9,0,-90


In [7]:
## generate vega specifications according to the parameters
def render(_vSpec, _field, aspectRatio, band, rotation, orientation):
    _vSpecNum = json.loads('{"title": null, "field": "", "type": "quantitative", "axis": {"grid": false}}')
    _vSpecNum['field'] = _field
    
    _vSpecCat = json.loads('{"field": "", "type": "ordinal", "scale": {"type": "band", "paddingInner": 0}, "axis":{}, "sort": null, "title": null}')
    _vSpecCat['field'] = 'Name'
    _vSpecCat['band'] = band
    _vSpecCat['axis']['labelAngle'] = rotation 
   
    _vSpec['width'] = Height * aspectRatio
    _vSpec['height'] = Height
        
    if orientation == 0:
        _vSpec['encoding']['y'] = _vSpecNum
        _vSpec['encoding']['x'] = _vSpecCat
    else:       
        _vSpec['encoding']['x'] = _vSpecNum
        _vSpec['encoding']['y'] = _vSpecCat
        
    return _vSpec

In [8]:
## for desk-reject charts
def ifViolateRule(nbar, maxChar, aspectRatio, band, rotation, orientation):
    ## do not rotate text labels in horizontal bar charts
    if orientation == 1 and rotation != 0:
        return True
    
    ## axis labels cannot overlap with each other
    width = Height * aspectRatio
    actualBand = (width / nbar)
    labelLength = 6 * maxChar # 6 for 10px font-size
    if (actualBand < labelLength and rotation == 0):
        return True

    return False

In [9]:
pairsForCrowdSourcing = []
pairsWithDeskRejects = [] # ['img1', 'img2', 'good', 'bad', 'cIdx', 'goodCount', 'badCount']
records = []
for idx, row in paraDF.iterrows():
    flag1 = ifViolateRule(row['NBar'], row['maxChar'], row["aspectRatio1"], \
                          row['band1'],row['axisLabelRotation1'],row['orientation1'])
    flag2 = ifViolateRule(row['NBar'], row['maxChar'], row["aspectRatio2"], \
                          row['band2'],row['axisLabelRotation2'],row['orientation2'])
    
    ## discard if both violates
    if (flag1 and flag2):
        continue
        
    NBar = int(row['NBar'])
    maxChar = int(row['maxChar'])
    
    _vSpec = copy.deepcopy(vSpec)
    _field = random.sample(numFields, 1)[0]
    _df = dataset[['Name', _field]].copy()
    _df['Name'] = _df['Name'].apply(lambda x: x[:maxChar])
    maxChar = max(_df['Name'].apply(lambda x: len(x)).values)
    
    uniq = toolz.unique(json.loads(_df.to_json(orient = 'records')), key=lambda x: x['Name'])
    _vSpec['data']['values'] = random.sample(list(uniq), NBar)
    
    
    pair = []
    for j in ['1', '2']:
        _vSpecHere = render(_vSpec,_field,row['aspectRatio' + j], \
                     row['band' + j],row['axisLabelRotation' + j],row['orientation' + j])
        chart = alt.Chart.from_dict(_vSpecHere)

        namePrefix = str(idx) + '_' + str(j)

        records.append([namePrefix, NBar, maxChar, row['aspectRatio'+j], row['band'+j], \
                        row['axisLabelRotation'+j],row['orientation'+j]])

        chart.save(os.path.join(save_Folder, namePrefix + '.png'))
        with open(os.path.join(save_Folder, namePrefix + '.json'), 'w') as f:
            json.dump(_vSpecHere, f)

        pair.append("https://anonymousLink/" + save_Folder + "/" + namePrefix + '.png')
    
    if (not flag1 and not flag2):
        pairsForCrowdSourcing.append(pair)
    else:
        r = [] # ['img1', 'img2', 'good', 'bad', 'cIdx', 'goodCount', 'badCount'];
        if flag1 == True and flag2 == False: ## 1 overlapping
            r = [pair[0], pair[1], pair[1], pair[0], str(idx), 3 , 0]
        elif flag1 == False and flag2 == True: ## 1 overlapping
            r = [pair[0], pair[1], pair[0], pair[1], str(idx), 3 , 0]
        pairsWithDeskRejects.append(r)

In [10]:
deskRejectDf = pd.DataFrame(pairsWithDeskRejects, columns=['img1', 'img2', 'good', 'bad', 'cIdx', 'goodCount', 'badCount'])
deskRejectDf.to_csv(os.path.join(save_Folder, 'deskRejectResult.csv'), index =False)

In [11]:
deskRejectDf.head()

Unnamed: 0,img1,img2,good,bad,cIdx,goodCount,badCount
0,https://anonymousLink/sampleFolder/2_1.png,https://anonymousLink/sampleFolder/2_2.png,https://anonymousLink/sampleFolder/2_1.png,https://anonymousLink/sampleFolder/2_2.png,2,3,0
1,https://anonymousLink/sampleFolder/3_1.png,https://anonymousLink/sampleFolder/3_2.png,https://anonymousLink/sampleFolder/3_1.png,https://anonymousLink/sampleFolder/3_2.png,3,3,0
2,https://anonymousLink/sampleFolder/6_1.png,https://anonymousLink/sampleFolder/6_2.png,https://anonymousLink/sampleFolder/6_1.png,https://anonymousLink/sampleFolder/6_2.png,6,3,0
3,https://anonymousLink/sampleFolder/9_1.png,https://anonymousLink/sampleFolder/9_2.png,https://anonymousLink/sampleFolder/9_1.png,https://anonymousLink/sampleFolder/9_2.png,9,3,0
4,https://anonymousLink/sampleFolder/10_1.png,https://anonymousLink/sampleFolder/10_2.png,https://anonymousLink/sampleFolder/10_1.png,https://anonymousLink/sampleFolder/10_2.png,10,3,0


In [12]:
recordDf = pd.DataFrame(records, columns = ['fileName', 'NBar', 'maxChar', 'aspectRatio', \
                                 'band', 'axisLabelRotation', 'orientation'])
recordDf.head()
recordDf.to_csv(os.path.join(save_Folder, 'paraRecord.csv'), index =False)

In [13]:
recordDf.head()

Unnamed: 0,fileName,NBar,maxChar,aspectRatio,band,axisLabelRotation,orientation
0,1_1,5,3,0.75,0.6,-45.0,0.0
1,1_2,5,3,1.5,0.6,0.0,1.0
2,2_1,5,5,1.75,0.9,-90.0,0.0
3,2_2,5,5,0.5,0.6,-45.0,1.0
4,3_1,5,4,1.0,0.3,-90.0,0.0


In [14]:
## Each HIT contains 9 comparison pairs (plus a duplicate one which is implemented at the MTurk interface)
def zipPairs4MTurk(pairs):
    perBatch = 9
    zipPairs = [list(itertools.chain(*pairs[i:i+perBatch])) for i in range(0, len(pairs), perBatch)]
    columnNames = []
    for i in range(1, perBatch + 1):
        columnNames.append('img' + str(i) + '_1')
        columnNames.append('img' + str(i) + '_2')
    return pd.DataFrame(zipPairs, columns=columnNames)
zipPairsDF = zipPairs4MTurk(pairsForCrowdSourcing)
zipPairsDF.to_csv(os.path.join(save_Folder, 'mTurk.csv'), index = False)

In [15]:
zipPairsDF.head()

Unnamed: 0,img1_1,img1_2,img2_1,img2_2,img3_1,img3_2,img4_1,img4_2,img5_1,img5_2,img6_1,img6_2,img7_1,img7_2,img8_1,img8_2,img9_1,img9_2
0,https://anonymousLink/sampleFolder/1_1.png,https://anonymousLink/sampleFolder/1_2.png,https://anonymousLink/sampleFolder/4_1.png,https://anonymousLink/sampleFolder/4_2.png,https://anonymousLink/sampleFolder/5_1.png,https://anonymousLink/sampleFolder/5_2.png,https://anonymousLink/sampleFolder/7_1.png,https://anonymousLink/sampleFolder/7_2.png,https://anonymousLink/sampleFolder/13_1.png,https://anonymousLink/sampleFolder/13_2.png,https://anonymousLink/sampleFolder/15_1.png,https://anonymousLink/sampleFolder/15_2.png,https://anonymousLink/sampleFolder/17_1.png,https://anonymousLink/sampleFolder/17_2.png,https://anonymousLink/sampleFolder/18_1.png,https://anonymousLink/sampleFolder/18_2.png,https://anonymousLink/sampleFolder/19_1.png,https://anonymousLink/sampleFolder/19_2.png
1,https://anonymousLink/sampleFolder/23_1.png,https://anonymousLink/sampleFolder/23_2.png,,,,,,,,,,,,,,,,
