In [895]:
import jinja2
import os
import cv2
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup, Tag

In [896]:
def describe_images(folder):
    desc=[]
    for i in os.listdir(folder):
        image =folder+'/'+i
        img =  cv2.imread(image)
        height, width, depth = img.shape
        desc.append({'file':image,'w':width,'h':height,'aspect_ratio':height/width})
        
    desc=pd.DataFrame(desc)
    
    return desc

In [897]:
html = open("reference.html").read()
soup = BeautifulSoup(html)
filtered = soup.find_all('article')

tags=[]
for f in filtered:
    tags.append({
        'class':' '.join(f.get("class")),
        'full': f.find("a").get("href"),
        'thumb':f.find("img").get("src")
        
    })

In [898]:
tags=pd.DataFrame(tags)

In [899]:
fulls=describe_images('images/fulls')

In [900]:
thumbs=describe_images('images/thumbs')
thumbs.columns=['tfile','tw','th','taspect_ratio']

In [901]:
tags=tags.merge(fulls,left_on='full',right_on='file',how='left').merge(thumbs,left_on='thumb',right_on='tfile',how='left')

In [902]:
class_order=tags['class'].to_list()

In [903]:
#tmp thumbs descriptors
tmpt=describe_images('images/tmp_density/thumbs')
tmpt['tmp_thumb_filename']=tmpt.file.str.split('/',-1,expand=True)[3]
tmpt['caption']=tmpt.tmp_thumb_filename.str.replace('_',' ').str.replace('|',', N° Tweets: ').str.rsplit('.',1,expand=True)[0]
tmpt=tmpt.rename(columns={'file':'tmptfile','w':'tmpttw','h':'tmpth','aspect_ratio':'tmptaspect_ratio'})

In [904]:
#tmp fulls descriptors
tmpf=describe_images('images/tmp_density/fulls')
tmpf['tmp_full_filename']=tmpf.file.str.split('/',-1,expand=True)[3]
tmpf=tmpf.rename(columns={'file':'tmpffile','w':'tmpftw','h':'tmpfh','aspect_ratio':'tmpfaspect_ratio'})

In [905]:
#merged tmp descriptor
tmp=pd.merge(tmpt,tmpf,left_on='tmp_thumb_filename',right_on='tmp_full_filename',how='left')

In [906]:
def match_images(thumbs,tags):
    df=tags.copy()
    tags_idxs=[]
    for index, row in thumbs.iterrows():
        #print('tmp index')
        #print(index)
        if df.shape[0]<1:
            #print('resetting size')
            df=tags.copy()
        aspect_ratio=row['tmptaspect_ratio']
        #print(aspect_ratio)
        df2=df.iloc[(df['taspect_ratio']-aspect_ratio).abs().argsort()[:1]] #closest aspect ratio
        w=df.tw.iloc[0]
        h=df.th.iloc[0]
        #print(w)
        #print(h)
        #print('new size')
        df=df.drop(df2.index[0],axis=0)
        #print(df.shape[0])
        tags_idxs.append(df2.index[0])
        
    df=tags.copy()
    df['index']=df.index
    thumbs['tag_idx']=tags_idxs
    thumbs=thumbs.merge(df,left_on='tag_idx',right_on='index',how='left')
    thumbs['index']=thumbs.index
        
    return thumbs

In [907]:
tags=match_images(tmp,tags)

In [908]:
def tags_order(tags,order):
    df=tags.copy()
    
    idxs=order[0:7]
    for i in range(1,int(np.floor(df.shape[0]/8))):
        if i == 1:
            idx=i*7-1
        else:
            idx=i*8-1
        idxs.extend(order[idx:idx+8])
        
        
    order=[]    
    for idx in idxs:
        df2=df.loc[df['class']==idx,'index']
        if df2.shape[0]>0:
            #print('appending')
            order.append(df2.index[0])
            df=df.drop(df2.index[0],axis=0)
            
    order.extend(df.index.to_list())
    tags['order']=order
    tags=tags.sort_values(by='order')
    tags['switch']=np.floor(tags['order']/8).diff().diff().fillna(0)
    tags.loc[tags['order']==0,'switch']=0
    if tags.shape[0]>8:
        tags.loc[tags['order'].isin([7,8,9,10,11]),'switch']=[1,-1,0,0,0]
    tags['index']=tags['order']
    tags.index=tags['index']

    introw=274 #row de frame de intro
    tags['wcontrol']=tags['tmpttw']
    first_row=tags.loc[0,'wcontrol']+introw
    tags.loc[0,'wcontrol']=first_row
    tags['wcontrol']=np.floor(tags['wcontrol'].cumsum()/tags.loc[0:7,'wcontrol'].sum()).diff().diff().fillna(0)
    #tags.loc[0,'wcontrol']=0
    #tags['wcontrol']=tags['switch']-tags['wcontrol']
    tags['switch']=tags['wcontrol']
    
        
    return tags

In [909]:
tags=tags_order(tags,class_order)

In [910]:
tags=tags[['switch','index','class','caption','tmpffile','tmptfile','order']].sort_values(by='order').rename(columns={'tmptfile':'thumb','tmpffile':'full'}).to_dict(orient='records')

In [911]:
tags

[{'switch': 0.0,
  'index': 0,
  'class': 'item thumb span-3',
  'caption': 'new york-new jersey-philadelphia, N° Tweets: 2MM',
  'full': 'images/tmp_density/fulls/new_york-new_jersey-philadelphia|2MM.png',
  'thumb': 'images/tmp_density/thumbs/new_york-new_jersey-philadelphia|2MM.png',
  'order': 0},
 {'switch': 0.0,
  'index': 1,
  'class': 'item thumb span-2',
  'caption': 'tokio-yokohama, N° Tweets: 2MM',
  'full': 'images/tmp_density/fulls/tokio-yokohama|2MM.png',
  'thumb': 'images/tmp_density/thumbs/tokio-yokohama|2MM.png',
  'order': 1},
 {'switch': 0.0,
  'index': 2,
  'class': 'item thumb span-3',
  'caption': 'minneapolis-st paul',
  'full': 'images/tmp_density/fulls/minneapolis-st_paul.png',
  'thumb': 'images/tmp_density/thumbs/minneapolis-st_paul.png',
  'order': 2},
 {'switch': 0.0,
  'index': 3,
  'class': 'item thumb span-2',
  'caption': 'lower countries, N° Tweets: 570K',
  'full': 'images/tmp_density/fulls/lower_countries|570K.png',
  'thumb': 'images/tmp_density/th

In [912]:
from jinja2 import Template
with open('template.html') as file_:
    template = Template(file_.read())


In [913]:
rendered=template.render(tags=tags)

In [914]:
with open('density.html', 'w') as f:
    f.write(rendered)