In [3]:
import numpy as np
import random
from tqdm import tqdm_notebook

In [4]:
# Input Class: List of Photos
class photo():
    
    def __init__(self, photoID, orient, tagsNum, tags):
        
        self.photoID = photoID
        self.orient = orient
        self.tags = tags
        self.tagsNum = tagsNum

# Output Class: List of Slides
class slide():
    
    def __init__(self, slideType, photos):
        
        self.slideType = slideType
        self.photos = photos
        
        self.tags = set([])
        
        for photo in self.photos:
            self.tags = self.tags.union(photo.tags)

def calcTransScores(slide1, slide2):
    
    numCommon = len(slide1.tags.intersection(slide2.tags))
    numDiff1 = len(slide1.tags.difference(slide2.tags))
    numDiff2 = len(slide2.tags.difference(slide1.tags))
    
    return min(numCommon, numDiff1, numDiff2)

def calcSlideshowScores(listSlides):
    
    prevSlide = None
    currSlide = None
    
    totalScore = 0
    for s in listSlides:
        prevSlide = currSlide
        currSlide = s
        
        if prevSlide == None:
            continue
        
        totalScore += calcTransScores(prevSlide, currSlide)
    
    return totalScore

In [5]:
def num_of_photos(filename):
   lines = open(filename).readlines()
   N = int(lines[0].split()[0])

   return N

def input_parser(filepath):
    lines = open(filepath).readlines()
    i = 0
    photos = []
    alltags = []
    for line in lines:
        if i > 0:
            photo_info = line.split(' ')
            orientation = photo_info[0]
            tag_num = int(photo_info[1])
            tags = []
            for j in range(tag_num):
                tags.append(photo_info[2+j].rstrip())
                alltags.extend(tags)
            photo_id = i - 1
            photos.append(photo(photoID=photo_id, orient=orientation, tagsNum = tag_num, tags = tags))
        i += 1
        
    return photos, alltags

In [6]:
def output_slideshow(filename, slide_list):
    """
    returns: Output File given a list of slide class
    """
    """Writes an output file with the required format."""
    with open(filename, 'w') as f:

        f.write(f"{len(slide_list)}\n")

        for slide in slide_list:
            if slide.slideType == 'V':
                V1, V2 = slide.photos
                f.write(f"{V1.photoID} {V2.photoID}\n")
            else:
                H = slide.photos[0]
                f.write(f"{H.photoID}\n")

In [7]:
def filterPhotos(photos, orient):
    out = []
    for photo in photos:
        #print(photo.tagsNum)
        if (photo.orient == orient):
            out.append(photo)
    
    return out

In [8]:
def generateSlides(Deck):

    N = len(Deck)
    pbar = tqdm_notebook(total = N)    
    
    myList = list(range(0,N))
    out = []
    
    while len(myList) > 0:
        N = random.choice(myList)
        myList.remove(N)
        S = createSlide([Deck[N]])
        out.append(S)
        pbar.update(1)
        
    pbar.close()
    return out

In [9]:
def generateSlides2(Deck):

    Num = len(Deck)
    TopN = int(0.25 * Num)
    pbar = tqdm_notebook(total = Num)
    
    out = []
    S1 = createSlide([Deck[0]])
    del Deck[0]
    out.append(S1)
    
    while len(Deck) > 1:
        pbar.update(1)
        #print(len(Deck))
        
        N = 0
        S2, N, Score = findNextBest(S1, Deck[0:TopN])
        
        if Score != 0:
            out.append(S2)
            del Deck[N]
            S1 = S2
        else:
            S1 = createSlide([Deck[0]])
            del Deck[0]
            out.append(S1)

    out.append(createSlide([Deck[0]]))
    
    pbar.close()
    return out

In [10]:
def findNextBest(Current, PhotoList):
    
    N = len(PhotoList)
    Score = 0
    Best = Current
    Num = 0
    
    for i in range(0,N):
        TestSlide = createSlide([PhotoList[i]])
        TestScore = calcTransScores(Current, TestSlide)
        if (TestScore > Score):
            Best = TestSlide
            Score = TestScore
            Num = i

    return Best, Num, Score

In [11]:
def createSlide(photos):
    if len(photos) == 1:
        out = slide('H', photos)
    else:
        out = slide('V', photos)
    return out

In [12]:
def get_jaccard_sim(L1, L2): 
    a = set(L1) 
    b = set(L2)
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))
    

In [13]:
inputfile = '../data/b_lovely_landscapes.txt'
outputfile = '../data/b_lovely_landscapes_out.txt'

N = num_of_photos(inputfile)
myPhotos, allTags = input_parser(inputfile)

In [15]:
V = filterPhotos(myPhotos, "V")
H = filterPhotos(myPhotos, "H")

In [16]:
H.sort(key=lambda x: x.tagsNum, reverse=True)
V.sort(key=lambda x: x.tagsNum, reverse=True)

In [None]:
myFinal = generateSlides2(H)

output_slideshow(outputfile,myFinal)

HBox(children=(IntProgress(value=0, max=80000), HTML(value='')))

In [79]:
print(calcSlideshowScores(myFinal))

721


In [None]:
H[1051]

In [None]:
import collections
allTagsCount = collections.Counter(allTags)
sorted(allTagsCount.items(), key=lambda x: x[1], reverse=True)

In [None]:
allTagsCount

In [None]:
L1 = H[100].tags
L2 = H[101].tags

print(sorted(L1))
print(sorted(L2))

get_jaccard_sim(L1,L2)

In [1]:
H = filterPhotos(myPhotos, "H")

NameError: name 'filterPhotos' is not defined

In [35]:
def analyseTags(PhotoList):
    
    pbar = tqdm_notebook(total = len(PhotoList)**2)
    
    iter1 = iter(PhotoList)
    iter2 = iter(PhotoList)
    similarity = 0
    out = []
    
    while True:
        try:
            iter2 = iter(PhotoList)
            T1 = next(iter1)
            while True:
                try:
                    T2 = next(iter2)
                    similarity = get_jaccard_sim(T1.tags,T2.tags)
                    if (similarity > 0 and similarity < 1):
                        out.append([T1.photoID,T2.photoID,similarity])
                    pbar.update(1)
                except StopIteration:
                    break
        except StopIteration:
            break
                
    pbar.close()        
    return(out)
                

In [38]:
myPair = analyseTags(H)

HBox(children=(IntProgress(value=0, max=6400000000), HTML(value='')))

In [40]:
import pandas as pd

myPairDF = pd.DataFrame(myPair, columns=['P1','P2','Similarity'])

In [42]:
myPairDF.to_csv("../data/Similarity2.csv")

In [None]:
myPhoto1 = photo('0','H',['A','B'])
myPhoto2 = photo('1','V',['B','C'])
myPhoto3 = photo('2','V',['C','D'])

mySlideList = [mySlide1,mySlide2]

In [None]:
myPhotos[0].orient

In [None]:
T = getAllTags(myPhotos)

In [None]:
np

mySlide1 = createSlide([H[3]])

mySlide2 = createSlide([V[2],V[5]])


calcTransScores(mySlide1,mySlide2)

In [None]:
First2, myRemaining, myFirst, myLast = generateFirst2(H)


In [None]:
type(First2)

In [None]:
myNext, myRemaining, myLast = generateNext(H, myRemaining, myLast)

In [None]:
myFinal = generateSlides(H)

In [None]:
myFinal

In [None]:
myFinal = list(filter(None, myFinal)) # fastest

In [None]:
output_slideshow('../data/outputTest.txt',myFinal)