Skip to content

Eliminating Duplications (just the history of the application)

James Kent edited this page Mar 22, 2024 · 2 revisions
from sqlalchemy import func
query = session.query(BaseStudy.doi, func.count(BaseStudy.doi)).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
query = db.session.query(BaseStudy.doi, func.count(BaseStudy.doi)).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
empty_string_dois = BaseStudy.query.filter_by(doi="").all()
empty_string_dois
for bs in empty_string_dois:
    bs.doi = None
db.session.add_all(empty_string_dois)
db.session.commit()
empty_string_pmids = BaseStudy.query.filter_by(pmid="").all()
for bs in empty_string_pmids:
    bs.pmids = None
db.session.add_all(empty_string_pmids)
db.session.commit()
query = db.session.query(BaseStudy.doi, func.count(BaseStudy.doi)).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois
dup_dois = query.all()
dup_dois
query = db.session.query(BaseStudy.doi, BaseStudy.id).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
db.session.rollback()
dup_dois = query.all()
dup_dois
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi, BaseStudy.id).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi, func.group_concat(BaseStudy.id)).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi, func.string_agg(BaseStudy.id, ',')).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
db.session.rollback()
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi, func.string_agg(BaseStudy.id, ',')).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
db.session.rollback()
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
dup_dois[0][0]
BaseStudy.query.filter_by(dup_dois[0][0])
BaseStudy.query.filter_by(doi=dup_dois[0][0])
BaseStudy.query.filter_by(doi=dup_dois[0][0]).all()
pip install biopython
from Bio import Entrez

def doi_to_pmid(doi):
    Entrez.email = "your@email.com"  # Put your email here
    handle = Entrez.esearch(db="pubmed", term=doi)
    record = Entrez.read(handle)
    handle.close()
    pmids = record['IdList']
    return pmids
from Bio import Entrez

def doi_to_pmid(doi):
    Entrez.email = "jamesdkent21@gmail.com"  # Put your email here
    handle = Entrez.esearch(db="pubmed", term=doi)
    record = Entrez.read(handle)
    handle.close()
    pmids = record['IdList']
    return pmids
doi_to_pmid(dup_dois[0][0])
dup_bs = BaseStudy.query.filter_by(doi=dup_dois[0][0]).all()
dup_bs
dup_bs[0].pmid
dup_bs[1].pmid
def get_paper_doi(pmid):
    Entrez.email = "your@email.com"  # Put your email here
    handle = Entrez.efetch(db="pubmed", id=pmid, retmode="xml")
    record = Entrez.read(handle)
    handle.close()
    if record['PubmedArticle']:
        article = record['PubmedArticle'][0]['PubmedData']['ArticleIdList']
        for item in article:
            if item.attributes['IdType'] == 'doi':
                return item
    return "DOI not found for the given PMID."
get_paper_doi(dup_bs[0].pmid)
res = get_paper_doi(dup_bs[0].pmid)
str(res)
def get_paper_doi(pmid):
    Entrez.email = "jamesdkent@gmail.com"  # Put your email here
    handle = Entrez.efetch(db="pubmed", id=pmid, retmode="xml")
    record = Entrez.read(handle)
    handle.close()
    if record['PubmedArticle']:
        article = record['PubmedArticle'][0]['PubmedData']['ArticleIdList']
        for item in article:
            if item.attributes['IdType'] == 'doi':
                return str(item)
    return "DOI not found for the given PMID."
to_commit = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(doi=dup[0]).all()
    pmid = doi_to_pmid(dup[0])
    non_matching_bs = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            continue
        if bs.pmid:
            doi = get_paper_doi(bs.pmid) if "found" not in doi else bs.doi
            if doi != bs.doi:
                bs.doi = doi
                to_commit.append(bs)
                print(f"CHANGING {bs.name}")
                for v in bs.versions:
                    v.doi = doi
                    to_commit.append(v)
to_commit = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(doi=dup[0]).all()
    pmid = doi_to_pmid(dup[0])
    non_matching_bs = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            continue
        if bs.pmid:
            doi = get_paper_doi(bs.pmid) 
            doi = doi if "found" not in doi else bs.doi
            if doi != bs.doi:
                bs.doi = doi
                to_commit.append(bs)
                print(f"CHANGING {bs.name}")
                for v in bs.versions:
                    v.doi = doi
                    to_commit.append(v)
to_commit
db.session.add_all(to_commit)
db.session.commit()
dup_bs = BaseStudy.query.filter_by(doi=dup_dois[0][0]).all()
dup_bs
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
bss = BaseStudy.query.filter_by(doi="10.1016/j.cortex.2013.01.015").all()
bss
bss[0].doi
bss[0].pmid
bss[1].pmid
bss[1].name
bss[0].name
doi_to_pmid(bss[0].doi)
get_paper_doi('23994216')
to_commit = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(doi=dup[0]).all()
    pmid = doi_to_pmid(dup[0])
    non_matching_bs = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            continue
        if bs.pmid:
            doi = get_paper_doi(bs.pmid) 
            doi = doi if "found" not in doi else bs.doi
            if doi != bs.doi:
                bs.doi = doi
                to_commit.append(bs)
                print(f"CHANGING {bs.name}")
                for v in bs.versions:
                    v.doi = doi
                    to_commit.append(v)
db.session.add_all(to_commit)
db.session.commit()
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
to_commit = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(doi=dup[0]).all()
    pmid = doi_to_pmid(dup[0])
    non_matching_bs = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            continue
        if bs.pmid:
            doi = get_paper_doi(bs.pmid) 
            doi = doi if "found" not in doi else bs.doi
            if doi != bs.doi:
                bs.doi = doi
                to_commit.append(bs)
                print(f"CHANGING {bs.name}")
                for v in bs.versions:
                    v.doi = doi
                    to_commit.append(v)
to_commit
bss = BaseStudy.query.filter_by(doi="10.1016/j.neurobiolaging.2023.08.008").all()
bss
bss[0].pmid
bss[1].pmid
doi_to_pmid(bss[1].pmid)
doi_to_pmid(bss[1].doi)
doi_to_pmid(bss[0].pmid)
doi_to_pmid(bss[0].doi)
doi_to_pmid(bss[1].pmid)
to_commit = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(doi=dup[0]).all()
    pmid = doi_to_pmid(dup[0])[0]
    non_matching_bs = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            continue
        if bs.pmid:
            doi = get_paper_doi(bs.pmid) 
            doi = doi if "found" not in doi else bs.doi
            if doi != bs.doi:
                bs.doi = doi
                to_commit.append(bs)
                print(f"CHANGING {bs.name}")
                for v in bs.versions:
                    v.doi = doi
                    to_commit.append(v)
query = db.session.query(BaseStudy.pmid).group_by(BaseStudy.pmid).having(func.count(BaseStudy.pmid) > 1)
dup_pmids = query.all()
dup_pmids
empty_string_pmids = BaseStudy.query.filter_by(pmid='').all()
len(empty_string_pmids)
to_commit=[]
for bs in empty_string_pmids:
    bs.pmid = None
    to_commit.append(bs)
    for v in bs.versions:
        v.pmid = None if v.pmid == '' else v.pmid
        to_commit.append(v)
db.session.add_all(to_commit)
db.session.commit()
empty_string_pmids = BaseStudy.query.filter_by(pmid='').all()
empty_string_pmids
query = db.session.query(BaseStudy.pmid).group_by(BaseStudy.pmid).having(func.count(BaseStudy.pmid) > 1)
dup_pmids = query.all()
dup_pmids
BaseStudy.query.filter_by(pmid="10669519").all()
bss = BaseStudy.query.filter_by(pmid="10669519").all()
bss[0]
bss[0].doi
bss[1].doi
bss[0].name
bss[1].name
bss[1].user
bss[0].user
bss[1].user
bss[1].doi
bss[1].pmid
bss[1].versions
bss[0].versions
bss[0].versions[0].user
bss[0].versions[0].source
bss[0].versions[1].source
bss[0].versions[2].source
bss[0].abstracts
bss[0].description
bss[1].description
to_commit = []
for dup in dup_pmids:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    doi = get_paper_doi(dup[0])
    non_matching_bs = []
    for bs in dup_bs:
        if bs.doi == doi:
            continue
        if bs.doi:
            pmid = doi_to_pmid(bs.doi)[0] 
            if pmid != bs.pmid:
                bs.pmid = pmid
                to_commit.append(bs)
                print(f"CHANGING {bs.name}")
                for v in bs.versions:
                    v.pmid = pmid
                    to_commit.append(v)
to_commit
db.session.add_all(to_commit)
db.session.commit()
query = db.session.query(BaseStudy.pmid).group_by(BaseStudy.pmid).having(func.count(BaseStudy.pmid) > 1)
dup_pmids = query.all()
dup_pmids
to_commit = []
for dup in dup_pmids:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    doi = get_paper_doi(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.doi == doi:
            name = bs.name
        else:
            non_matching_dups.append(bs)
non_matching_dups
import re

def preprocess_string(string):
    # Remove non-alphanumeric characters and whitespace
    return re.sub(r'[^a-zA-Z0-9]', '', string)

def compare_strings(string1, string2):
    processed_string1 = preprocess_string(string1)
    processed_string2 = preprocess_string(string2)
    return processed_string1 == processed_string2
to_commit = []
for dup in dup_pmids:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    doi = get_paper_doi(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.doi == doi:
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name, name)
        if match:
            print(f"DUP: {name}")
to_commit = []
to_delete = []
for dup in dup_pmids:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    doi = get_paper_doi(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.doi == doi:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name, name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
to_commit = []
to_delete = []
for dup in dup_pmids:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    doi = get_paper_doi(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.doi == doi:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name, name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
to_commit
to_delete
to_delete[0].versions
to_delete[1].versions
to_delete[-`].versions
to_delete[-1].versions
for bs in to_delete:
    db.session.delete(bs)
db.session.commit()
db.session.add_all(to_commit)
db.session.commit()
query = db.session.query(BaseStudy.pmid).group_by(BaseStudy.pmid).having(func.count(BaseStudy.pmid) > 1)
dup_pmids = query.all()
dup_pmids
BaseStudy.query.filter_by(pmid='16371250').all()
bss = BaseStudy.query.filter_by(pmid='16371250').all()
bss[0]
bss[0].name
bss[1].name
compare_strings(bss[0].name, bss[1].name)
preprocess_string(bss[0].name)
preprocess_string(bss[1].name)
preprocess_string(bss[0].name) == preprocess_string(bss[1].name)
preprocess_string(bss[0].name).lower() == preprocess_string(bss[1].name).lower()
import re

def preprocess_string(string):
    # Remove non-alphanumeric characters and whitespace
    return re.sub(r'[^a-zA-Z0-9]', '', string)

def compare_strings(string1, string2):
    processed_string1 = preprocess_string(string1).lower()
    processed_string2 = preprocess_string(string2).lower()
    return processed_string1 == processed_string2
to_commit = []
to_delete = []
for dup in dup_pmids:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    doi = get_paper_doi(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.doi == doi:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name, name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
to_delete
to_delete[0]
to_delete[0].name
to_delete[4].name
to_delete[5].name
to_delete[6].name
to_delete[12].name
to_delete[12].doi
to_delete[12].pmid
to_delete[13].pmid
to_delete[13].name
to_delete[13].doi
to_delete[13].versions
to_commit
to_commit[12]
to_commit[12].name
to_commit[12].versions
to_delete[13].user
to_delete[12].user
for bs in to_delete:
    db.session.delete(bs)
db.session.add_all(to_commit)
db.session.commit()
query = db.session.query(BaseStudy.pmid).group_by(BaseStudy.pmid).having(func.count(BaseStudy.pmid) > 1)
dup_pmids = query.all()
dup_pmids
dup_dois
BaseStudy.query.filter_by(doi="10.1016/j.neurobiolaging.2023.08.008").all()
bss = BaseStudy.query.filter_by(doi="10.1016/j.neurobiolaging.2023.08.008").all()
bss[0].pmid
bss[1].pmid
bss[1].name
bss[0].name
bss[0].user
bss[0].versions
bss[1].versions
to_commit = []
to_delete = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    pmid = doi_to_pmid(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name, name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata_', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
to_commit = []
to_delete = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(doi=dup[0]).all()
    pmid = doi_to_pmid(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name, name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata_', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
to_commit = []
to_delete = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    pmid = doi_to_pmid(dup[0])
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name or '', name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata_', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
bss[0].name
bss[1].name
compare_strings(bss[0], bss[1])
compare_strings(bss[0].name, bss[1].name)
dup_dois
bss[0].name
bss[0].pmis
bss[0].pmid
bss[0].doi
get_paper_doi(bss[0]).doi
get_paper_doi(bss[0])
get_paper_doi(bss[0].doi)
get_paper_doi(bss[0].pmid)
get_paper_doi(bss[0].pmid)
get_paper_doi(bss[0].doi)
bss[0].doi
doi_to_pmid(bss[0].doi)
get_paper_doi(bss[0].pmid)
get_paper_doi(bss[1].pmid)
to_commit = []
to_delete = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    pmid = doi_to_pmid(dup[0])[0]
    non_matching_bs = []
    name = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name or '', name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata_', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
to_commit
doi_to_pmid(bss[0].doi)
doi_to_pmid(bss[0].doi)[0]
bss[0].pmid
bss[1].pmid
to_commit = []
to_delete = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(pmid=dup[0]).all()
    pmid = doi_to_pmid(dup[0])[0]
    non_matching_bs = []
    name = None
    main_bs = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    if not main_bs:
        print('no main bs found')
        continue
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name or '', name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata_', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
dup_dois
to_commit = []
to_delete = []
for dup in dup_dois:
    dup_bs = BaseStudy.query.filter_by(doi=dup[0]).all()
    pmid = doi_to_pmid(dup[0])[0]
    non_matching_bs = []
    name = None
    main_bs = None
    non_matching_dups = []
    for bs in dup_bs:
        if bs.pmid == pmid:
            main_bs = bs
            name = bs.name
        else:
            non_matching_dups.append(bs)
    if not main_bs:
        print('no main bs found')
        continue
    for nm_bs in non_matching_dups:
        match = compare_strings(nm_bs.name or '', name)
        if match:
            print(f"DUP: {name}")
            main_bs.versions.extend(nm_bs.versions)
            for attr in ['name', 'description', 'metadata_', 'publication', 'authors', 'year', 'level']:
                setattr(main_bs, attr, getattr(main_bs, attr, getattr(nm_bs, attr, None)))
            to_delete.append(nm_bs)
            to_commit.append(main_bs)
to_delete
to_delete[0].name
to_delete[0].versions
to_commit
to_commit[0].name
to_commit[0].doi
to_commit[0].pmid
for bs in to_delete:
    db.session.delete(bs)
db.session.add_all(to_commit)
db.session.commit()
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
BaseStudy.query.filter_by(doi="10.1016/j.neuroimage.2014.06.047").all()
bss = BaseStudy.query.filter_by(doi="10.1016/j.neuroimage.2014.06.047").all()
bss[0]
bss[0].pmid
bss[0].name
bss[1].name
bss[1].pmid
bss[1].user
bss[0].user
bss[1].versions
bss[1].versions[0]
bss[1].versions[0].source
bss[1].versions[1].source
bss[0].versions[1].source
bss[0].versions[0].source
bss[0].versions[0]
bss[0].versions[0].name
bss[0].pmid
bss[1].version
bss[1].versions
bss[1].versions.extend(bss[0].versions)
bss[1
]
bss[1].versions
bss[1].publication
bss[0].publication
bss[0].year
bss[1].year
bss[0].versions
bss[0].description
bss[1].description
db.session.delete(bss[0])
db.session.add(bss[1])
db.session.commit()
bss[1].versions
query = db.session.query(BaseStudy.doi).group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1)
dup_dois = query.all()
dup_dois
history