# Database overview

In [1]:
from timelink.api.database import TimelinkDatabase

db_dir = '../database/sqlite/'
db = TimelinkDatabase(db_type='sqlite',
                      db_path=db_dir,
                      db_name='timelink-web')

## Database status


### Row count

In [2]:
row_count = db.table_row_count()
row_count

[('acts', 40),
 ('alembic_version', 1),
 ('aregisters', 0),
 ('attributes', 15452),
 ('blinks', 4),
 ('class_attributes', 63),
 ('classes', 13),
 ('entities', 18609),
 ('geoentities', 215),
 ('goods', 0),
 ('kleiofiles', 5),
 ('links', 4),
 ('objects', 0),
 ('persons', 1100),
 ('relations', 1782),
 ('rentities', 2),
 ('rgeoentities', 0),
 ('robjects', 0),
 ('rpersons', 0),
 ('sources', 5),
 ('syslog', 0),
 ('syspar', 0)]

As a dataframe

In [3]:
import pandas as pd

row_count_df = pd.DataFrame(row_count, columns=['table','rows'])
row_count_df

Unnamed: 0,table,rows
0,acts,40
1,alembic_version,1
2,aregisters,0
3,attributes,15452
4,blinks,4
5,class_attributes,63
6,classes,13
7,entities,18609
8,geoentities,215
9,goods,0


### Entity types

In [4]:
from sqlalchemy import select, func
from timelink.api.models.entity import Entity
from timelink.api.models import Entity

stmt = select(Entity.pom_class, func.count().label('count')).group_by(Entity.pom_class)
print(stmt)
print()

with db.session() as session:
    result = session.execute(stmt)
    pom_class_df = pd.DataFrame(result, columns=['pom_class', 'count'])

pom_class_df

SELECT entities.class, count(*) AS count 
FROM entities GROUP BY entities.class



Unnamed: 0,pom_class,count
0,act,40
1,attribute,15452
2,class,13
3,geoentity,215
4,person,1100
5,relation,1782
6,rentity,2
7,source,5


### Imported files

In [5]:
imported = db.get_imported_files()
imported



As a data frame

In [6]:

imported_df = pd.DataFrame([dict(file) for file in imported])
imported_df

Unnamed: 0,path,name,structure,translator,translation_date,nerrors,nwarnings,error_rpt,warning_rpt,imported,imported_string
0,sources/auc-alunos.cli,auc-alunos.cli,/usr/local/timelink/clio/src/gacto2.str,gactoxml2.str,2025-02-06 14:24:06,0,0,No errors,No warnings,2025-02-06 14:25:55.104942,2025-02-06 14:25:55 UTC
1,sources/dehergne-a.cli,dehergne-a.cli,/usr/local/timelink/clio/src/gacto2.str,gactoxml2.str,2025-02-09 03:08:49,3,0,ERROR: dehergne-a.cli processing same_as relat...,No warnings,2025-02-09 04:18:24.070168,2025-02-09 04:18:24 UTC
2,sources/dehergne-locations-1644.cli,dehergne-locations-1644.cli,/usr/local/timelink/clio/src/gacto2.str,gactoxml2.str,2025-02-09 03:08:48,0,0,No errors,No warnings,2025-02-09 04:18:29.862914,2025-02-09 04:18:29 UTC
3,sources/real-entities/real-entities.cli,real-entities.cli,/usr/local/timelink/clio/src/gacto2.str,gactoxml2.str,2025-02-09 03:08:50,0,0,No errors,No warnings,2025-02-09 04:18:34.129388,2025-02-09 04:18:34 UTC
4,sources/b1685.cli,b1685.cli,/usr/local/timelink/clio/src/gacto2.str,gactoxml2.str,2025-02-09 05:59:53,0,0,No errors,No warnings,2025-02-09 06:00:03.809879,2025-02-09 06:00:03 UTC


## Attributes

### types of attributes

In [30]:
import pandas as pd

from sqlalchemy import func
from sqlalchemy import select


pd.set_option('display.max_rows', 500)

attr_table = db.get_table('attributes')
db.describe('attributes', show=True)
print()
stmt = select(
    attr_table.c.the_type,
    func.count().label('count'),
    func.count(func.distinct(attr_table.c.the_value)).label('distinct_value')
    ).group_by('the_type')
print(stmt)
print()

with db.session() as session:
    # nml2 = session.query(Attribute.the_type,func.count().label('tot')).group_by(Attribute.the_type).all()
    nml = session.execute(stmt)
    attribute_df = pd.DataFrame(nml)

attribute_df

attributes
id                   attributes           VARCHAR    {ForeignKey('entities.id')}
entity               attributes           VARCHAR    {ForeignKey('entities.id')}
the_type             attributes           VARCHAR    
the_value            attributes           VARCHAR    
the_date             attributes           VARCHAR    
obs                  attributes           VARCHAR    

SELECT attributes.the_type, count(*) AS count, count(distinct(attributes.the_value)) AS distinct_value 
FROM attributes GROUP BY attributes.the_type



Unnamed: 0,the_type,count,distinct_value
0,activa,163,2
1,alcunha,2,1
2,alternative-name,4,4
3,alternative-name@wikidata,3,3
4,baptizado,2,2
5,baptizado@wikidata,2,2
6,cargo,23,21
7,chegada,17,6
8,chegada@wikidata,13,5
9,colegio,2,1


### Attribute values

In [8]:
from timelink.pandas import attribute_values

av = attribute_values('jesuita-entrada', sql_echo=True, db=db)
av

SELECT eattributes.the_value AS value, count(DISTINCT eattributes.entity) AS count, min(eattributes.the_date) AS date_min, max(eattributes.the_date) AS date_max 
FROM eattributes 
WHERE eattributes.the_type = :the_type_1 GROUP BY eattributes.the_value ORDER BY count DESC


Unnamed: 0_level_0,count,date_min,date_max
value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
?,35,15550000,17581126
Coimbra,6,15420000,16770701
Roma,4,15710816,16790512
Goa,3,15480502,16660000
Lisboa,3,16680000,17460223
Avignon,2,17350731,17370927
Paris,2,16700926,16771029
Saragoça,2,16740613,16771115
Shiuchow,2,15891100,15891100
Évora,2,15710304,17230000


### entities with attributes

#### All entities with a given attribute type

In [16]:
from timelink.pandas import entities_with_attribute

with_type = entities_with_attribute('jesuita-entrada', sql_echo=True, db=db)
with_type.fillna('').head(20)

Query for jesuita-entrada:
 SELECT entities.id, eattributes.the_type AS "jesuita-entrada.type", eattributes.the_value AS "jesuita-entrada", eattributes.the_date AS "jesuita-entrada.date", eattributes.the_line AS "jesuita-entrada.line", eattributes.the_level AS "jesuita-entrada.level", eattributes.aobs AS "jesuita-entrada.obs", eattributes.a_extra_info AS "jesuita-entrada.extra_info" 
FROM entities JOIN eattributes ON eattributes.entity = entities.id 
WHERE eattributes.the_type LIKE :the_type_1 ORDER BY eattributes.the_date


Unnamed: 0_level_0,jesuita-entrada.type,jesuita-entrada,jesuita-entrada.date,jesuita-entrada.line,jesuita-entrada.level,jesuita-entrada.obs,jesuita-entrada.extra_info,jesuita-entrada.comment,jesuita-entrada.date.comment,jesuita-entrada.comment.original
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
deh-pedro-de-alcacova,jesuita-entrada,Coimbra,15420000,271,3,saiu a primeira vez,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412,,
deh-pedro-de-alcacova,jesuita-entrada,Goa,15480502,271,3,,{'value': {'comment': '@wikidata:Q1171'}},@wikidata:Q1171,,
deh-goncalo-alvares,jesuita-entrada,Coimbra,15490101,637,3,,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412,,
deh-domingos-alvares-ref1,jesuita-entrada,?,15550000,606,4,,{},,,
deh-luis-de-almeida,jesuita-entrada,Japão,15560000,520,3,,"{'date': {'comment': 'como ""indiferente"" ou au...",@wikidata:Q17,"como ""indiferente"" ou auxiliar laico",
deh-hernando-de-alcaraz,jesuita-entrada,Alcalá de Henares,15580413,294,3,,{'value': {'comment': '@wikidata:Q46940'}},@wikidata:Q46940,,
deh-simao-antunes-ref2,jesuita-entrada,?,15670000,978,4,,{},,,
deh-diogo-antunes,jesuita-entrada,Évora,15710304,948,3,,{'value': {'comment': '@wikidata:Q179948'}},@wikidata:Q179948,,
deh-matteo-ricci,jesuita-entrada,Roma,15710816,12,3,,"{'value': {'comment': '@wikidata:Q220', 'origi...",@wikidata:Q220,,(Noviciado de Sant'Andrea al Quirinale)
deh-michele-ruggiere-ref2,jesuita-entrada,?,15720000,407,4,,{},,,


#### All entities with a combination of type and value

In [10]:
with_type_value = entities_with_attribute(
    the_type='jesuita-entrada',
    the_value='Coimbra',
    sql_echo=True,
    db=db)
with_type_value

Query for jesuita-entrada:
 SELECT entities.id, eattributes.the_type AS "jesuita-entrada.type", eattributes.the_value AS "jesuita-entrada", eattributes.the_date AS "jesuita-entrada.date", eattributes.the_line AS "jesuita-entrada.line", eattributes.the_level AS "jesuita-entrada.level", eattributes.aobs AS "jesuita-entrada.obs", eattributes.a_extra_info AS "jesuita-entrada.extra_info" 
FROM entities JOIN eattributes ON eattributes.entity = entities.id 
WHERE eattributes.the_type LIKE :the_type_1 AND eattributes.the_value LIKE :the_value_1 ORDER BY eattributes.the_date


Unnamed: 0_level_0,jesuita-entrada.type,jesuita-entrada,jesuita-entrada.date,jesuita-entrada.line,jesuita-entrada.level,jesuita-entrada.obs,jesuita-entrada.extra_info,jesuita-entrada.comment
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
deh-pedro-de-alcacova,jesuita-entrada,Coimbra,15420000,271,3,saiu a primeira vez,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412
deh-goncalo-alvares,jesuita-entrada,Coimbra,15490101,637,3,,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412
deh-antonio-de-abreu-ref1,jesuita-entrada,Coimbra,15760000,20,4,,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412
deh-antonio-de-andrade,jesuita-entrada,Coimbra,15961216,881,3,,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412
deh-afonso-aires,jesuita-entrada,Coimbra,16490000,171,3,,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412
deh-miguel-do-amaral,jesuita-entrada,Coimbra,16770701,702,3,,{'value': {'comment': '@wikidata:Q45412'}},@wikidata:Q45412


## Functions

In [23]:
import pandas as pd

from sqlalchemy import func
from sqlalchemy import select

pd.set_option('display.max_rows', 100)

rel = db.get_table('relations')

db.describe(rel,show=True)
print()
stmt = select(
    rel.c.the_value,
    func.count().label('count')
    ).where(rel.c.the_type=='function-in-act').group_by('the_value')
print(stmt)
print()

with db.session() as session:
    functions = session.execute(stmt)
    functions_df = pd.DataFrame(functions)

functions_df

relations
id                   relations            VARCHAR    {ForeignKey('entities.id')}
origin               relations            VARCHAR    {ForeignKey('entities.id')}
destination          relations            VARCHAR    {ForeignKey('entities.id')}
the_type             relations            VARCHAR    
the_value            relations            VARCHAR    
the_date             relations            VARCHAR    
obs                  relations            VARCHAR    

SELECT relations.the_value, count(*) AS count 
FROM relations 
WHERE relations.the_type = :the_type_1 GROUP BY relations.the_value



Unnamed: 0,the_value,count
0,geo1,18
1,geo2,62
2,geo3,135
3,irmao,2
4,mad,29
5,mae,44
6,mrmad,6
7,n,611
8,pad,36
9,pai,295


### List of entities with a function

In [12]:
import pandas as pd

from sqlalchemy import func
from sqlalchemy import select

# there is a ready made view linking relations and entities for this
nfunctions = db.nfunctions_view
db.describe(nfunctions, show=True)

a_function = 'mad'

stmt = select(nfunctions.c.id,
              nfunctions.c.name,
              nfunctions.c.id_act,
              nfunctions.c.act_type,
              nfunctions.c.act_date
              ).where(nfunctions.c.func == a_function
                      ).order_by(nfunctions.c.name)
print(stmt)

with db.session() as session:
    with_function = session.execute(stmt)
    with_function_df = pd.DataFrame(with_function)

with_function_df

nfunctions
id                   nfunctions           VARCHAR    
name                 nfunctions           VARCHAR    
groupname            nfunctions           VARCHAR    
pom_class            nfunctions           VARCHAR    
the_line             nfunctions           INTEGER    
the_level            nfunctions           INTEGER    
the_order            nfunctions           INTEGER    
updated              nfunctions           DATETIME   
indexed              nfunctions           DATETIME   
extra_info           nfunctions           JSON       
func                 nfunctions           VARCHAR    
id_act               nfunctions           VARCHAR    {ForeignKey('entities.id')}
act_type             nfunctions           VARCHAR(32) 
act_date             nfunctions           VARCHAR    
act_obs              nfunctions           VARCHAR    
SELECT nfunctions.id, nfunctions.name, nfunctions.id_act, nfunctions.act_type, nfunctions.act_date 
FROM nfunctions 
WHERE nfunctions.func = :func_1 OR

Unnamed: 0,id,name,id_act,act_type,act_date
0,b1685.8-per4,? coelho,b1685.8,bap,16850830
1,b1685.3-per5,ana velho,b1685.3,bap,16850802
2,b1685.13-per5,antonia,b1685.13,bap,16850923
3,b1685.7-per5,brites goncalves,b1685.7,bap,16850828
4,b1685.18-per5,catarina joao,b1685.18,bap,16851022
5,b1685.17-per5,domingas joao,b1685.17,bap,16851021
6,b1685.27b-per4,helena,b1685.27b,bap,16851125
7,b1685.29-per5,helena,b1685.29,bap,16851213
8,b1685.21-per5,ines ?,b1685.21,bap,16851104
9,b1685.23-per5,isabel de seia,b1685.23,bap,16841208


## Relations

### Relation types

In [28]:
import pandas as pd

from sqlalchemy import func
from sqlalchemy import select

db.describe('relations', show=True) # for refernce
relation = db.get_table('relations')

stmt = select(
    relation.c.the_type,
    func.count().label("count"),
    func.count(func.distinct(relation.c.the_value)).label("distinct_value")
).group_by(relation.c.the_type)

print(stmt)

with db.session() as session:
    rel_types = session.execute(stmt)
    rel_types_df = pd.DataFrame(rel_types)

rel_types_df


relations
id                   relations            VARCHAR    {ForeignKey('entities.id')}
origin               relations            VARCHAR    {ForeignKey('entities.id')}
destination          relations            VARCHAR    {ForeignKey('entities.id')}
the_type             relations            VARCHAR    
the_value            relations            VARCHAR    
the_date             relations            VARCHAR    
obs                  relations            VARCHAR    
SELECT relations.the_type, count(*) AS count, count(distinct(relations.the_value)) AS distinct_value 
FROM relations GROUP BY relations.the_type


Unnamed: 0,the_type,count,distinct_value
0,eclesiastica,4,2
1,eclesiástica,1,1
2,function-in-act,1315,15
3,geografica,4,1
4,identification,5,1
5,institucional,6,3
6,parentesco,411,9
7,profissional,4,4
8,sociabilidade,32,7


### Entities with a specific type of relation

In [32]:
import pandas as pd

from sqlalchemy import func
from sqlalchemy import select

nrels = db.nrelations_view
db.describe(nrels, show=True)

a_type = 'parentesco'

stmt = select(nrels.c.origin_id,
              nrels.c.origin_name,
              nrels.c.relation_type,
              nrels.c.relation_value,
              nrels.c.destination_id,
              nrels.c.destination_name,
              nrels.c.relation_date
              ).where(nrels.c.relation_type == a_type)
print(stmt)

with db.session() as session:
    rels_of_type = session.execute(stmt)
    rels_of_type_df = pd.DataFrame(rels_of_type)

rels_of_type_df.sample(20)

nrelations
relation_id          nrelations           VARCHAR    {ForeignKey('entities.id')}
origin_id            nrelations           VARCHAR    
origin_name          nrelations           VARCHAR    
destination_id       nrelations           VARCHAR    
destination_name     nrelations           VARCHAR    
relation_type        nrelations           VARCHAR    
relation_value       nrelations           VARCHAR    
relation_date        nrelations           VARCHAR    
SELECT nrelations.origin_id, nrelations.origin_name, nrelations.relation_type, nrelations.relation_value, nrelations.destination_id, nrelations.destination_name, nrelations.relation_date 
FROM nrelations 
WHERE nrelations.relation_type = :relation_type_1


Unnamed: 0,origin_id,origin_name,relation_type,relation_value,destination_id,destination_name,relation_date
300,b1685.7-per1-per3,isabel joao,parentesco,mae,b1685.7-per1,joao,16850828
131,140579-pai,Manuel Gomes de Abreu,parentesco,pai,140579,António Gomes de Abreu,20200211
220,140727-pai,João de Abreu,parentesco,pai,140727,"Francisco de Abreu, padre",20200211
40,140422-pai,Marcos Lourenço,parentesco,pai,140422,Jerónimo de Abranches,20200211
58,140450-pai,José Godinho de Miranda,parentesco,pai,140450,Luís António de Abranches,20200211
275,deh-matteo-ricci-pai,Giovanni Battista Ricci,parentesco,marido,deh-matteo-ricci-mae,Giovanna Angiolelli,20240920
117,140558-pai,João de Abreu,parentesco,pai,140558,António Alexandre de Abreu,20200211
374,b1685.27b.1-per1,manuel francisco,parentesco,pai,b1685.27b.1,maria,16851125
67,140471-pai,Teodorico José de Abranches,parentesco,pai,140471,Vicente Luís de Abranches,20200211
360,b1685.23-per6,cosme velho,parentesco,marido,b1685.23-per5,isabel de seia,16841208


### Number of relations by type and value

In [36]:
import pandas as pd

from sqlalchemy import func
from sqlalchemy import select

db.describe('relations', show=True) # for refernce
relation = db.get_table('relations')

stmt = select(
    relation.c.the_type,
    relation.c.the_value,
    func.count().label("count"),
).group_by(relation.c.the_type, relation.c.the_value)

print(stmt)

with db.session() as session:
    rel_types_values = session.execute(stmt)
    rel_types_values_df = pd.DataFrame(rel_types_values)

rel_types_values_df


relations
id                   relations            VARCHAR    {ForeignKey('entities.id')}
origin               relations            VARCHAR    {ForeignKey('entities.id')}
destination          relations            VARCHAR    {ForeignKey('entities.id')}
the_type             relations            VARCHAR    
the_value            relations            VARCHAR    
the_date             relations            VARCHAR    
obs                  relations            VARCHAR    
SELECT relations.the_type, relations.the_value, count(*) AS count 
FROM relations GROUP BY relations.the_type, relations.the_value


Unnamed: 0,the_type,the_value,count
0,eclesiastica,Ordena,3
1,eclesiastica,Propõe como bispo de Foochow,1
2,eclesiástica,Propõe como vigário apostólico de Pequim,1
3,function-in-act,geo1,18
4,function-in-act,geo2,62
5,function-in-act,geo3,135
6,function-in-act,irmao,2
7,function-in-act,mad,29
8,function-in-act,mae,44
9,function-in-act,mrmad,6


### Entities with a relation with a specific type and value

In [38]:
import pandas as pd

from sqlalchemy import func
from sqlalchemy import select

nrels = db.nrelations_view
db.describe(nrels, show=True)

a_type = 'sociabilidade'
a_value = 'Companheiro'

stmt = select(nrels.c.origin_id,
              nrels.c.origin_name,
              nrels.c.relation_type,
              nrels.c.relation_value,
              nrels.c.destination_id,
              nrels.c.destination_name,
              nrels.c.relation_date
              ).where(
                  nrels.c.relation_type == a_type,
                  nrels.c.relation_value == a_value
                )
print(stmt)

with db.session() as session:
    rels_of_type = session.execute(stmt)
    rels_of_type_df = pd.DataFrame(rels_of_type)

rels_of_type_df.sample(20)

nrelations
relation_id          nrelations           VARCHAR    {ForeignKey('entities.id')}
origin_id            nrelations           VARCHAR    
origin_name          nrelations           VARCHAR    
destination_id       nrelations           VARCHAR    
destination_name     nrelations           VARCHAR    
relation_type        nrelations           VARCHAR    
relation_value       nrelations           VARCHAR    
relation_date        nrelations           VARCHAR    
SELECT nrelations.origin_id, nrelations.origin_name, nrelations.relation_type, nrelations.relation_value, nrelations.destination_id, nrelations.destination_name, nrelations.relation_date 
FROM nrelations 
WHERE nrelations.relation_type = :relation_type_1 AND nrelations.relation_value = :relation_value_1


Unnamed: 0,origin_id,origin_name,relation_type,relation_value,destination_id,destination_name,relation_date
8,deh-jean-joseph-marie-amiot,Jean-Joseph-Marie Amiot,sociabilidade,Companheiro,deh-jean-joseph-marie-amiot-ref1,Jean-Régis-Lieou,17491229
12,deh-lazzaro-cattaneo,Lazzaro Cattaneo,sociabilidade,Companheiro,deh-lazzaro-cattaneo-ref1,Matteo Ricci,15940000
16,deh-sebastien-fernandes-tchong,Sébastien Fernandes Tchong,sociabilidade,Companheiro,deh-sebastien-fernandes-tchong-ref2,Matteo Ricci,15910101
19,deh-bento-de-gois,Bento de Góis,sociabilidade,Companheiro,deh-bento-de-gois-ref3,Jean Fernandes Tchong,16060000
2,deh-pedro-de-alcacova-ref1,Francisco Xavier,sociabilidade,Companheiro,deh-pedro-de-alcacova,Pedro de Alcáçova,0
9,deh-jean-joseph-marie-amiot,Jean-Joseph-Marie Amiot,sociabilidade,Companheiro,deh-jean-joseph-marie-amiot-ref2,Philippe-Stanislas K'ang,17491229
5,deh-louis-archambaud-ref1,Claudio Filippo Grimaldi,sociabilidade,Companheiro,deh-louis-archambaud,Louis Archambaud,16930000
13,deh-giovanni-cola-niccolo,Giovanni Cola Niccolò,sociabilidade,Companheiro,deh-giovanni-cola-niccolo-ref1,Matteo Ricci,15820807
14,deh-sabatino-de-ursis,Sabatino De Ursis,sociabilidade,Companheiro,deh-sabatino-de-ursis-ref1,Matteo Ricci,16170318
15,deh-sebastien-fernandes-tchong,Sébastien Fernandes Tchong,sociabilidade,Companheiro,deh-sebastien-fernandes-tchong-ref1,Francisco Martins Houang,15891100
