# Replica project

### Quantitative analysis of how interesting morphograph clusters are

In [81]:
# loading the metadata
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import pickle
import sys
from tqdm import tqdm
from deep_translator import GoogleTranslator
from sklearn.feature_extraction.text import TfidfVectorizer


sys.path.insert(0, "../model/")
from utils import *

sys.path.insert(0, "../web_annotation/")
from utils_clusters import *
from metrics_clusters import *


data_dir = '/scratch/students/schaerf/'

data_dir = '../data/'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [116]:
positives = update_morph(data_dir, '-2022') 
positives.head()

Unnamed: 0,uid_connection,index,uid,AuthorOriginal,Description,Author,AuthorBirth,path,BeginDate,ImageURL,...,img1,img2,type,annotated,cluster_file_y,cluster,set,new_cluster,new set,old_cluster
0,002275ab17774b55a2d88d7a773b2d93,4881.0,4e6e928899224c9b8e5d15e37e211fe3,"YÁÑEZ DE LA ALMEDINA, Fernando",Madonna and Child with Infant St John,"YÁÑEZ DE LA ALMEDINA, Fernando",(active 1505-36),../data/WGA/images/y/yanez/madonna.jpg,1500.0,http://www.wga.hu/html/y/yanez/madonna.html,...,a2b4fec1607143d4ad4214b48bab69d6,4e6e928899224c9b8e5d15e37e211fe3,POSITIVE,2016-05-30 13:17:33+00:00,Original,118,train,118,train,118.0
1,002ca9c06a704c779e69d93cb4fadaa3,5465.0,45588b4e33444ed8b34c523bac808bc4,TIZIANO (copia da -),S.Mariá Maddalena. cm. 123 x 91,TIZIANO Vecellio (copia da),1483.0,../data/158B/158B_556.jpg,1560.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,45588b4e33444ed8b34c523bac808bc4,da54c106180449d1a52993e9a5bc17fd,POSITIVE,2018-07-24 14:58:44.559049+00:00,Original,50,train,50,train,50.0
2,0038ab462149452b98a4d7359a06f9f4,15245.0,ae828b07e4284d80b652206ffb77278a,TIZIANO (copia da -),"Morte di S. Pietro Martire. mm. 123,8 x 84,2",TIZIANO Vecellio (copia da),1483.0,../data/158C/158C_120.jpg,1641.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,ae828b07e4284d80b652206ffb77278a,ea64207684694d379579998f88b7c29a,POSITIVE,2018-07-16 09:47:57.987515+00:00,Original,51,test,51,test,51.0
3,00474897de044f7fb1ef16888dc198f3,11203.0,76b2c4aeab404bc79768117e3ddc62b1,"BELLINI, Giovanni (cerchia di -)",Pietà.,BELLINI Giovanni (cerchia di),1431.0,../data/47A/47A_858.jpg,1449.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,76b2c4aeab404bc79768117e3ddc62b1,b8dd2c0c2c7146e68e5bbb0aaf6cf803,POSITIVE,2018-03-14 10:46:45.603341+00:00,Original,498,val,498,val,498.0
4,008b01e279594f0eae9419cf52f6262b,15308.0,b03f0d316b824923a52e79688ba7e1b7,"ALBANI, Francesco",Diana and Actaeon,"ALBANI, Francesco","(b. 1578, Bologna, d. 1660, Bologna)",../data/WGA/images/a/albani/1/dianacte.jpg,1612.0,http://www.wga.hu/html/a/albani/1/dianacte.html,...,b03f0d316b824923a52e79688ba7e1b7,2264411f35ed4fa18a4962413c13f529,POSITIVE,2016-09-13 15:43:39+00:00,Original,242,train,242,train,242.0


In [5]:
def catch(func, handle=lambda e: e, *args, **kwargs):
    '''Prevents list comprehensions from going into an error when an exception occurs'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        return e


In [None]:
positives.loc[:, 'Description (EN)'] = [catch(lambda : GoogleTranslator(source='auto', target='en').translate(word)) 
                                      for word in tqdm(list(positives.loc[:, 'Description'].astype(str)))]


In [117]:
positives.loc[:, 'Description (EN)'] = positives.loc[:, 'Description']

In [118]:
uid2endesc = {uid:desc for uid,desc in zip(positives['uid'], positives['Description (EN)'])}
with open(data_dir + 'uid2desc.pkl', 'wb') as outfile:
    pickle.dump(uid2endesc, outfile)

In [119]:
positives['Description (EN - ref)'] = positives['Description (EN)'].astype(str).str.split('.').apply(lambda x: x[0]).apply(lambda x: x.replace('0123456789', ''))
positives['Description (EN - ref)']

0       Madonna and Child with Infant St John
1                                           S
2                                  Morte di S
3                                       Pietà
4                           Diana and Actaeon
                        ...                  
5011                        Sacra ramiglia cm
5012                           Venere e Adone
5013                         Supper at Emmaus
5014       La Madonna col Bambino e due Sante
5015                                La Carità
Name: Description (EN - ref), Length: 5016, dtype: object

In [120]:
def cluster_text(text, range_try=(100,102), hyperparam=False):
    vectorizer = TfidfVectorizer(stop_words={'english'})
    X = vectorizer.fit_transform(text)


    import matplotlib.pyplot as plt
    from sklearn.cluster import KMeans
    
    if hyperparam:
        Sum_of_squared_distances = []
        K = range(range_try[0],range_try[1])
        for k in K:
            km = KMeans(n_clusters=k, max_iter=200, n_init=10)
            km = km.fit(X)
            Sum_of_squared_distances.append(km.inertia_)
        plt.plot(K, Sum_of_squared_distances, 'bx-')
        plt.xlabel('k')
        plt.ylabel('Sum_of_squared_distances')
        plt.title('Elbow Method For Optimal k')
        plt.show()

    print('How many clusters do you want to use?')
    true_k = int(input())
    
    model = KMeans(n_clusters=true_k, init='k-means++', max_iter=200, n_init=10)
    model.fit(X)

    labels=model.labels_
    clusters=pd.DataFrame(list(zip(text,labels)),columns=['title','cluster'])
    #print(clusters.sort_values(by=['cluster']))

    for i in range(true_k):
        print(clusters[clusters['cluster'] == i])
        
    return clusters

In [121]:
clusters = cluster_text(positives['Description (EN - ref)'].values)

How many clusters do you want to use?
                      title  cluster
7         La Sacra Famiglia        0
44        La Sacra Famiglia        0
52           Sacra Famiglia        0
70        La Sacra Famiglia        0
80     Sacra Famiglia con s        0
...                     ...      ...
4642         Sacra famiglia        0
4655   Sacra Famiglia con S        0
4710      La Sacra Famiglia        0
4762   Sacra Famiglia con S        0
4852  Sacra Famiglia, con S        0

[108 rows x 2 columns]
                         title  cluster
59             Venus and Cupid        1
443            Venus and Cupid        1
496            Venus and Cupid        1
498   Venus Blindfolding Cupid        1
669            Venus and Cupid        1
...                        ...      ...
4524           Venus and Cupid        1
4574           Venus and Cupid        1
4700           Venus and Cupid        1
4740           Venus and Cupid        1
4915           Venus and Cupid        1

[64 rows x 2 

In [122]:
clusters['cluster_iconography'] = clusters['cluster']

In [123]:
positives = positives.merge(clusters[['cluster_iconography']], left_index=True, right_index=True)

In [124]:
positives.head()

Unnamed: 0,uid_connection,index,uid,AuthorOriginal,Description,Author,AuthorBirth,path,BeginDate,ImageURL,...,annotated,cluster_file_y,cluster,set,new_cluster,new set,old_cluster,Description (EN),Description (EN - ref),cluster_iconography
0,002275ab17774b55a2d88d7a773b2d93,4881.0,4e6e928899224c9b8e5d15e37e211fe3,"YÁÑEZ DE LA ALMEDINA, Fernando",Madonna and Child with Infant St John,"YÁÑEZ DE LA ALMEDINA, Fernando",(active 1505-36),../data/WGA/images/y/yanez/madonna.jpg,1500.0,http://www.wga.hu/html/y/yanez/madonna.html,...,2016-05-30 13:17:33+00:00,Original,118,train,118,train,118.0,Madonna and Child with Infant St John,Madonna and Child with Infant St John,16
1,002ca9c06a704c779e69d93cb4fadaa3,5465.0,45588b4e33444ed8b34c523bac808bc4,TIZIANO (copia da -),S.Mariá Maddalena. cm. 123 x 91,TIZIANO Vecellio (copia da),1483.0,../data/158B/158B_556.jpg,1560.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,2018-07-24 14:58:44.559049+00:00,Original,50,train,50,train,50.0,S.Mariá Maddalena. cm. 123 x 91,S,3
2,0038ab462149452b98a4d7359a06f9f4,15245.0,ae828b07e4284d80b652206ffb77278a,TIZIANO (copia da -),"Morte di S. Pietro Martire. mm. 123,8 x 84,2",TIZIANO Vecellio (copia da),1483.0,../data/158C/158C_120.jpg,1641.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,2018-07-16 09:47:57.987515+00:00,Original,51,test,51,test,51.0,"Morte di S. Pietro Martire. mm. 123,8 x 84,2",Morte di S,3
3,00474897de044f7fb1ef16888dc198f3,11203.0,76b2c4aeab404bc79768117e3ddc62b1,"BELLINI, Giovanni (cerchia di -)",Pietà.,BELLINI Giovanni (cerchia di),1431.0,../data/47A/47A_858.jpg,1449.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,2018-03-14 10:46:45.603341+00:00,Original,498,val,498,val,498.0,Pietà.,Pietà,13
4,008b01e279594f0eae9419cf52f6262b,15308.0,b03f0d316b824923a52e79688ba7e1b7,"ALBANI, Francesco",Diana and Actaeon,"ALBANI, Francesco","(b. 1578, Bologna, d. 1660, Bologna)",../data/WGA/images/a/albani/1/dianacte.jpg,1612.0,http://www.wga.hu/html/a/albani/1/dianacte.html,...,2016-09-13 15:43:39+00:00,Original,242,train,242,train,242.0,Diana and Actaeon,Diana and Actaeon,82


In [125]:
scores_iconography = {cluster: np.around(content['cluster_iconography'].nunique() / content.shape[0] + (content.shape[0] * 0.01),2) for cluster, content in positives.groupby('cluster') if content.shape[0] > 1}
list(scores_iconography.items())[:20]

[(0, 0.36),
 (1, 0.46),
 (2, 0.23),
 (3, 0.2),
 (4, 0.23),
 (5, 0.36),
 (7, 0.23),
 (9, 0.29),
 (10, 0.55),
 (13, 0.32),
 (14, 0.52),
 (15, 0.7),
 (17, 0.27),
 (19, 0.7),
 (20, 0.7),
 (22, 0.2),
 (24, 0.33),
 (25, 0.3),
 (26, 0.36),
 (27, 0.36)]

In [152]:
positives[positives['cluster'] == list(scores_iconography.keys())[np.argmin(list(scores_iconography.values()))]]['Description'].value_counts()

Winter Landscape with Skaters and Bird Trap    4
The Bird Trap                                  3
Winter Landscape with a Bird-trap              2
Nevicata.                                      1
Name: Description, dtype: int64

In [153]:
positives[positives['cluster'] == list(scores_iconography.keys())[np.argmax(list(scores_iconography.values()))]]['Description'].value_counts()

Venus and Cupid                           48
Reclining Water Nymph                     41
Sleeping Venus                            31
The Venus of Urbino                       23
Reclining Nymph                           20
Reclining River Nymph at the Fountain     19
Venus and Cupid with an Organist          19
Diana and Her Nymphs                      18
Death of Cleopatra                        18
Venus with Doves                          18
Sleeping Venus with Cupid                 17
Venus and an Organist and a Little Dog    17
Venus with Organist and Cupid             17
Eva Prima Pandora                         16
The Nymph of the Fountain                 16
Venus                                     16
Venere. em. 125 x 175.                     4
(20)                                       4
Nymph of the Spring                        3
Susanna e i vecchioni. Tav. 30             2
Venere,un satiro e Amore. x 148).          1
Name: Description, dtype: int64

## Authors

In [128]:
positives.Author.value_counts()

TIZIANO Vecellio                    342
CRANACH, Lucas the Elder            147
RAFFAELLO Sanzio                    131
LEONARDO da Vinci                   128
CANALETTO                           111
                                   ... 
CARRIERA Rosalba                      1
ALBANI France sco (inc e inv )        1
PALMA Jacopo (il Vecchio) (attr)      1
VASARI, Giorgio                       1
SCOTIN G (sculp )                     1
Name: Author, Length: 852, dtype: int64

In [129]:
positives['AuthorClean'] = positives['Author'].str.split('(').apply(lambda x: x[0])
positives['AuthorClean'].value_counts()

TIZIANO Vecellio                  342
CRANACH, Lucas the Elder          147
TIZIANO Vecellio                  144
RAFFAELLO Sanzio                  131
LEONARDO da Vinci                 128
                                 ... 
SCHEDONI, Bartolomeo                1
RIPANDA Jacopo                      1
MARCONI Rocco                       1
FRANCESCO RIZZO da SANTACROCE       1
SCOTIN G                            1
Name: AuthorClean, Length: 719, dtype: int64

In [130]:
scores_authors = {cluster: np.around(content['AuthorClean'].nunique() / content.shape[0] + (content.shape[0] * 0.005),2) for cluster, content in positives.groupby('cluster') if content.shape[0] > 1}
list(scores_authors.items())[:20]

[(0, 0.35),
 (1, 0.42),
 (2, 0.2),
 (3, 0.35),
 (4, 0.2),
 (5, 0.35),
 (7, 0.2),
 (9, 0.23),
 (10, 0.38),
 (13, 0.25),
 (14, 0.29),
 (15, 0.35),
 (17, 0.2),
 (19, 0.68),
 (20, 0.68),
 (22, 0.25),
 (24, 0.29),
 (25, 0.25),
 (26, 0.68),
 (27, 0.35)]

In [151]:
positives[positives['cluster'] == list(scores_authors.keys())[np.argmax(list(scores_authors.values()))]]['AuthorClean'].value_counts()

CRANACH, Lucas the Elder           96
TIZIANO Vecellio                   90
BORDONE, Paris                     31
HEEMSKERCK, Maerten van            20
SCOREL, Jan van                    18
SPADARINO, lo                      18
GARNIER, Etienne-Barthélemy        18
COUSIN, Jean the Elder             16
GIROLAMO DA TREVISO the Younger    16
PALMA VECCHIO                      16
GIORGIONE                          15
GIORGIONE                           4
LE TEBRE Valentin                   4
CRANACH, Lucas the Younger          3
LEFEBVRE Valentin                   2
BORDONE Paris                       1
Name: AuthorClean, dtype: int64

In [150]:
positives[positives['cluster'] == list(scores_authors.keys())[np.argmin(list(scores_authors.values()))]]['AuthorClean'].value_counts()

ALBANI, Francesco    15
Name: AuthorClean, dtype: int64

### Author attributions

In [133]:
positives['AuthorAttr'] = positives['AuthorOriginal'].str.split('(').apply(lambda x: x[1] if len(x)>1 else 'Original').str.split(')').apply(lambda x: x[0]).apply(lambda x: x.replace('-)', '')).apply(lambda x: x.strip(') '))
positives['AuthorAttr'].value_counts()

Original                4093
copia da -               120
attr.                     69
inc.                      61
scuola di -               39
                        ... 
inc. e inv.                1
Van                        1
copia ilamminga da -       1
copia -                    1
sculp.                     1
Name: AuthorAttr, Length: 117, dtype: int64

In [134]:
clusters = cluster_text(positives['AuthorAttr'].values)

How many clusters do you want to use?
         title  cluster
0     Original        0
4     Original        0
5     Original        0
6     Original        0
8     Original        0
...        ...      ...
5008  Original        0
5009  Original        0
5012  Original        0
5013  Original        0
5015  Original        0

[4093 rows x 2 columns]
           title  cluster
1     copia da -        1
2     copia da -        1
24    copia da -        1
46    copia da -        1
68     Copia da-        1
...          ...      ...
4882  copia da -        1
4968  copia da -        1
4981  copia da -        1
4983  copia da -        1
4988   copia da-        1

[248 rows x 2 columns]
            title  cluster
7      scuola di-        2
249   scuola di -        2
264   scuola di -        2
343    scuola di-        2
422   scuola di -        2
...           ...      ...
4710   scuola di-        2
4794   scuola di-        2
4895  scuola di -        2
4959  scuola di -        2
4980   scuola di

In [135]:
clusters['cluster_attribution'] = clusters['cluster']

In [136]:
positives = positives.merge(clusters[['cluster_attribution']], left_index=True, right_index=True)
positives.head()

Unnamed: 0,uid_connection,index,uid,AuthorOriginal,Description,Author,AuthorBirth,path,BeginDate,ImageURL,...,set,new_cluster,new set,old_cluster,Description (EN),Description (EN - ref),cluster_iconography,AuthorClean,AuthorAttr,cluster_attribution
0,002275ab17774b55a2d88d7a773b2d93,4881.0,4e6e928899224c9b8e5d15e37e211fe3,"YÁÑEZ DE LA ALMEDINA, Fernando",Madonna and Child with Infant St John,"YÁÑEZ DE LA ALMEDINA, Fernando",(active 1505-36),../data/WGA/images/y/yanez/madonna.jpg,1500.0,http://www.wga.hu/html/y/yanez/madonna.html,...,train,118,train,118.0,Madonna and Child with Infant St John,Madonna and Child with Infant St John,16,"YÁÑEZ DE LA ALMEDINA, Fernando",Original,0
1,002ca9c06a704c779e69d93cb4fadaa3,5465.0,45588b4e33444ed8b34c523bac808bc4,TIZIANO (copia da -),S.Mariá Maddalena. cm. 123 x 91,TIZIANO Vecellio (copia da),1483.0,../data/158B/158B_556.jpg,1560.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,train,50,train,50.0,S.Mariá Maddalena. cm. 123 x 91,S,3,TIZIANO Vecellio,copia da -,1
2,0038ab462149452b98a4d7359a06f9f4,15245.0,ae828b07e4284d80b652206ffb77278a,TIZIANO (copia da -),"Morte di S. Pietro Martire. mm. 123,8 x 84,2",TIZIANO Vecellio (copia da),1483.0,../data/158C/158C_120.jpg,1641.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,test,51,test,51.0,"Morte di S. Pietro Martire. mm. 123,8 x 84,2",Morte di S,3,TIZIANO Vecellio,copia da -,1
3,00474897de044f7fb1ef16888dc198f3,11203.0,76b2c4aeab404bc79768117e3ddc62b1,"BELLINI, Giovanni (cerchia di -)",Pietà.,BELLINI Giovanni (cerchia di),1431.0,../data/47A/47A_858.jpg,1449.0,https://dhlabsrv4.epfl.ch/iiif_replica/cini%2F...,...,val,498,val,498.0,Pietà.,Pietà,13,BELLINI Giovanni,cerchia di -,6
4,008b01e279594f0eae9419cf52f6262b,15308.0,b03f0d316b824923a52e79688ba7e1b7,"ALBANI, Francesco",Diana and Actaeon,"ALBANI, Francesco","(b. 1578, Bologna, d. 1660, Bologna)",../data/WGA/images/a/albani/1/dianacte.jpg,1612.0,http://www.wga.hu/html/a/albani/1/dianacte.html,...,train,242,train,242.0,Diana and Actaeon,Diana and Actaeon,82,"ALBANI, Francesco",Original,0


In [144]:
scores_attributions = {cluster: np.around(content['cluster_attribution'].nunique() / content.shape[0] + (content.shape[0] * 0.005),2) for cluster, content in positives.groupby('cluster') if content.shape[0] > 1}
list(scores_attributions.items())[:20]

[(0, 0.35),
 (1, 0.29),
 (2, 0.2),
 (3, 0.25),
 (4, 0.2),
 (5, 0.35),
 (7, 0.2),
 (9, 0.23),
 (10, 0.3),
 (13, 0.18),
 (14, 0.25),
 (15, 0.35),
 (17, 0.16),
 (19, 0.35),
 (20, 0.35),
 (22, 0.15),
 (24, 0.2),
 (25, 0.2),
 (26, 0.35),
 (27, 0.35)]

In [148]:
positives[positives['cluster'] == list(scores_attributions.keys())[np.argmax(list(scores_attributions.values()))]]['AuthorAttr'].value_counts()

Original             358
copia dal -            4
inc.                   4
da Paolo Veronese      2
Name: AuthorAttr, dtype: int64

In [149]:
positives[positives['cluster'] == list(scores_attributions.keys())[np.argmin(list(scores_attributions.values()))]]['AuthorAttr'].value_counts()

Original    15
Name: AuthorAttr, dtype: int64

### Time difference

In [156]:
extras = pd.read_csv(data_dir + 'morphograph/Cini_AllVariationsMerged_20210421.csv', sep=';')
extras.columns

Index(['Author', 'AuthorULAN', 'AuthorULANLabel', 'AuthorNationality',
       'BiographyLabel', 'AuthorBirth', 'AuthorDeath', 'AuthorBirthLong',
       'AuthorBirthLat', 'AuthorDeathLong', 'AuthorDeathLat',
       'AuthorBirthCity', 'AuthorDeathCity', 'CountModifiers',
       'CountModifiers_Contact', 'CountModifiers_Neighbours', 'CountWorks',
       'CountWorks_Modified', 'CountWorks_Contact', 'CountWorks_Neighbours',
       'PercWorks_Modified', 'PercWorks_Contact', 'PercWorks_Neighbours'],
      dtype='object')

In [161]:
positives_extra = positives.merge(extras[['Author', 'AuthorULAN', 'AuthorULANLabel', 'AuthorNationality',  'BiographyLabel', 
                                          'AuthorDeath', 'AuthorBirthLong', 'AuthorBirthLat', 'AuthorDeathLong', 'AuthorDeathLat',
                                           'CountModifiers',]], left_on='Author', right_on='Author', how='left')
positives_extra.columns                          

Index(['uid_connection', 'index', 'uid', 'AuthorOriginal', 'Description',
       'Author', 'AuthorBirth', 'path', 'BeginDate', 'ImageURL', 'City',
       'Country', 'AuthorBirthCity', 'AuthorDeathCity', 'Drawer', 'Type',
       'cluster_file_x', 'img1', 'img2', 'type', 'annotated', 'cluster_file_y',
       'cluster', 'set', 'new_cluster', 'new set', 'old_cluster',
       'Description (EN)', 'Description (EN - ref)', 'cluster_iconography',
       'AuthorClean', 'AuthorAttr', 'cluster_attribution', 'AuthorULAN',
       'AuthorULANLabel', 'AuthorNationality', 'BiographyLabel', 'AuthorDeath',
       'AuthorBirthLong', 'AuthorBirthLat', 'AuthorDeathLong',
       'AuthorDeathLat', 'CountModifiers'],
      dtype='object')

In [162]:
positives_extra['BeginDate'].value_counts()

1501.0    464
1523.0    204
1546.0    160
1493.0     98
1518.0     94
         ... 
1753.0      1
1819.0      1
1538.0      1
1447.0      1
1445.0      1
Name: BeginDate, Length: 306, dtype: int64

In [176]:
scores_times = {cluster: np.around(content[content['BeginDate'].notnull()]['BeginDate'].max() - content[content['BeginDate'].notnull()]['BeginDate'].min() + (content.shape[0] * 0.05), 2) for cluster, content in positives_extra.groupby('cluster') if content[content['BeginDate'].notnull()].shape[0] > 1}
list(scores_times.items())[:20]

[(0, 1.15),
 (1, 0.4),
 (2, 20.3),
 (3, 31.5),
 (4, 22.3),
 (5, 12.15),
 (7, 22.3),
 (9, 12.6),
 (10, 61.3),
 (13, 55.4),
 (14, 52.25),
 (15, 9.15),
 (17, 51.1),
 (19, 157.15),
 (20, 44.15),
 (22, 30.5),
 (24, 172.25),
 (25, 100.0),
 (26, 13.15),
 (27, 1.15)]

In [177]:
positives_extra[positives_extra['cluster'] == list(scores_times.keys())[np.argmax(list(scores_times.values()))]]['BeginDate'].value_counts()

1503.0    4
1497.0    1
1820.0    1
Name: BeginDate, dtype: int64

In [178]:
positives_extra[positives_extra['cluster'] == list(scores_times.keys())[np.argmin(list(scores_times.values()))]]['BeginDate'].value_counts()

1616.0    2
Name: BeginDate, dtype: int64

### Places

In [188]:
scores_place = {cluster: np.around(content[content['AuthorDeathLat'].notnull()]['AuthorDeathLat'].max() - content[content['AuthorDeathLat'].notnull()]['AuthorDeathLat'].min() + content[content['AuthorDeathLong'].notnull()]['AuthorDeathLong'].max() - content[content['AuthorDeathLong'].notnull()]['AuthorDeathLong'].min() + (content.shape[0] * 0.05), 2) for cluster, content in positives_extra.groupby('cluster') if content[content['AuthorDeathLat'].notnull()].shape[0] > 1}
list(scores_place.items())[:20]

[(1, 0.4),
 (10, 3.22),
 (17, 1.1),
 (20, 0.15),
 (24, 1.25),
 (25, 1.0),
 (32, 2.4),
 (33, 2.8),
 (35, 0.15),
 (48, 4.01),
 (50, 4.9),
 (51, 7.0),
 (52, 0.65),
 (54, 3.5),
 (63, 0.9),
 (68, 0.35),
 (69, 0.15),
 (71, 0.35),
 (72, 18.4),
 (73, 0.25)]

In [189]:
positives_extra[positives_extra['cluster'] == list(scores_place.keys())[np.argmax(list(scores_place.values()))]][['City', 'Country', 'AuthorDeath']]#['BeginDate'].value_counts()

Unnamed: 0,City,Country,AuthorDeath
45,ROMA CITTA DEL VATICANO,,1520.0
78,VENE ZIA,,
129,ROMA,,1520.0
252,A REZ zo,,
259,VENEZIA,,
288,ROMA,,
298,ROMA,,1834.0
400,VENE ZIA,,
655,L UCERNA,,
778,ROMA,,1520.0


In [190]:
positives_extra[positives_extra['cluster'] == list(scores_place.keys())[np.argmin(list(scores_place.values()))]][['City', 'Country', 'AuthorDeath']]#['BeginDate'].value_counts()

Unnamed: 0,City,Country,AuthorDeath
430,PISA,,1646.0
2314,PISA,,1646.0


In [None]:
scores_iconography, scores_authors, scores_attributions, scores_times, scores_places