# Load data

In [4]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('../Data/Datos_CLOFBO-Plata.tsv', sep='\t', encoding='ISO-8859-1')
# Display the first few rows
print(df.head())

df['genus'] = df['scientificName'].str.split(' ').str[0]

   number            type    modified  \
0       1  PhysicalObject  2024-11-02   
1       2  PhysicalObject  2024-11-02   
2       3  PhysicalObject  2024-11-02   
3       4  PhysicalObject  2024-11-02   
4       5  PhysicalObject  2024-11-02   

                               bibliographicCitation  \
0  Boulenger, G. A. (1897) Viaggio del Dott. Alfr...   
1  Boulenger, G. A. (1897) Viaggio del Dott. Alfr...   
2  Boulenger, G. A. (1897) Viaggio del Dott. Alfr...   
3  Boulenger, G. A. (1897) Viaggio del Dott. Alfr...   
4  Boulenger, G. A. (1897) Viaggio del Dott. Alfr...   

  bibliographicCitation_abbrev     basisOfRecord catalogNumber recordNumber  \
0             Boulenger (1897)  MaterialCitation           NaN          NaN   
1             Boulenger (1897)  MaterialCitation           NaN          NaN   
2             Boulenger (1897)  MaterialCitation           NaN          NaN   
3             Boulenger (1897)  MaterialCitation           NaN          NaN   
4             Bouleng

In [5]:
class_levels_list = [level for level in df['class'].unique() if pd.notna(level)]
print(class_levels_list)

# Loop through each class level
for class_level in class_levels_list:
    
    # Filter the dataframe for the current class level
    class_df = df[df['class'] == class_level]
    
    # Do something with class_df here
    # For example, print how many rows belong to this class
    upper_class = class_level.upper()
    print(upper_class)
    spaced_class = ' '.join(upper_class)
    print(spaced_class)



['Actinopterygii', 'Elasmobranchii', 'Dipnoi']
ACTINOPTERYGII
A C T I N O P T E R Y G I I
ELASMOBRANCHII
E L A S M O B R A N C H I I
DIPNOI
D I P N O I


In [6]:
from docx import Document
from docx.shared import Pt  # for font size in points
from docx.oxml.ns import qn  # for setting font name properly
from docx.shared import Inches, Mm
from docx.enum.text import WD_LINE_SPACING
from docx.enum.text import WD_ALIGN_PARAGRAPH

# Create a new Document
doc = Document()

# Set the document layout
section = doc.sections[0]

# Set to A4 size (210mm x 297mm)
section.page_height = Mm(297)
section.page_width = Mm(210)

section.top_margin = Inches(0.98)
section.bottom_margin = Inches(0.79)
section.left_margin = Inches(0.98)
section.right_margin = Inches(0.69)

# Title

# Title line
title = doc.add_paragraph()

# First run: small text
run1 = title.add_run("Check List of the Freshwater Fishes from the La Plata basin in Bolivia (CLOFFBO-Plata).")
run1.font.name = 'Arial'
run1.font.size = Pt(12)
run1.font.bold = True

empty_line = doc.add_paragraph()
run = empty_line.add_run("")
run.font.name = 'Arial'
run.font.size = Pt(10)

# Authors

# Author line
title = doc.add_paragraph()

# First run: small text
run = title.add_run("Pascal Hablützel, Stefan Koerber, + ???")
run.font.name = 'Arial'
run.font.size = Pt(10)
run.font.bold = True

empty_line = doc.add_paragraph()
run = empty_line.add_run("")
run.font.name = 'Arial'
run.font.size = Pt(10)

# Overview table

data = []
data.append(['Order', 'Terrazas-Urquidi (1970)', 'Current list'])

order_list = [level for level in df['order'].unique() if pd.notna(level)]
for order in order_list:
    order_df = df[df['order'] == order]
    order_T_df = order_df[order_df['bibliographicCitation'] == 'Terrazas Urquidi, W. (1970) Lista de peces Bolivianos. Academia Nacional de Ciencias de Bolivia. La Paz. Publicación 24. 65 pp.']
    num_levels_T = order_T_df['verbatimIdentification'].dropna().nunique()
    num_levels_C = order_df['specificEpithet'].dropna().nunique()
    data.append([order, num_levels_T, num_levels_C])

T_df = df[df['bibliographicCitation'] == 'Terrazas Urquidi, W. (1970) Lista de peces Bolivianos. Academia Nacional de Ciencias de Bolivia. La Paz. Publicación 24. 65 pp.']
data.append(['Total', T_df['verbatimIdentification'].dropna().nunique(), df['specificEpithet'].dropna().nunique()])
print(data)

table = doc.add_table(rows=0, cols=3)
table.style = 'Table Grid'

for row_index, row_data in enumerate(data):
    row = table.add_row().cells
    for col_index, cell_text in enumerate(row_data):
        paragraph = row[col_index].paragraphs[0]
        run = paragraph.add_run(str(cell_text))

        # Set font
        run.font.name = 'Arial'
        run.font.size = Pt(10)
        run._element.rPr.rFonts.set(qn('w:eastAsia'), 'Arial')

        # Bold header
        if row_index == 0:
            run.bold = True
            paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT  # ← Force left-alignment for both header columns
        else:
            # Data rows: align based on column
            if col_index == 0:
                paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
            else:
                paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

# Add an empty line at the end of the table
empty_line = doc.add_paragraph()
run = empty_line.add_run("")
run.font.name = 'Arial'
run.font.size = Pt(10)


# Class line
        
class_levels_list = sorted([level for level in df['class'].unique() if pd.notna(level)])
print(class_levels_list)

# Loop through each class level
for class_level in class_levels_list:
    
    # Filter the dataframe for the current class level
    class_df = df[df['class'] == class_level]
    
    # Do something with class_df here
    # For example, print how many rows belong to this class
    upper_class = class_level.upper()
    print(upper_class)
    spaced_class = ' '.join(upper_class)
    print(spaced_class)
    
    # Class line
    class_ = doc.add_paragraph()

    # First run: small text
    run1 = class_.add_run("class	")
    run1.font.name = 'Arial'
    run1.font.size = Pt(10)

    # Second run: larger text
    run2 = class_.add_run(spaced_class)
    run2.font.name = 'Arial'
    run2.font.size = Pt(12)
    
    # Loop through each order level
    order_levels_list = sorted([level for level in class_df['order'].unique() if pd.notna(level)])
    print(order_levels_list)
    
    for order_level in order_levels_list:
    
        # Filter the dataframe for the current class level
        order_df = class_df[class_df['order'] == order_level]
    
        # Order line
        order = doc.add_paragraph()

        # First run: small text
        run1 = order.add_run("order		")
        run1.font.name = 'Arial'
        run1.font.size = Pt(10)

        # Second run: larger text
        run2 = order.add_run(order_level)
        run2.font.name = 'Arial'
        run2.font.size = Pt(12)
        run2.font.bold = True
        
        # Loop through each family level
        family_levels_list = sorted([level for level in order_df['family'].unique() if pd.notna(level)])
        print(family_levels_list)
    
        for family_level in family_levels_list:
    
            # Filter the dataframe for the current class level
            family_df = order_df[order_df['family'] == family_level]        
        
            # Family line
            order = doc.add_paragraph()

            # First run: small text
            run1 = order.add_run("family			")
            run1.font.name = 'Arial'
            run1.font.size = Pt(10)

            # Second run: larger text
            run2 = order.add_run(family_level)
            run2.font.name = 'Arial'
            run2.font.size = Pt(12)
            run2.font.bold = True
            
            # Loop through each family level
            genus_levels_list = sorted([level for level in family_df['genus'].unique() if pd.notna(level)])
            print(genus_levels_list)
    
            for genus_level in genus_levels_list:
    
                # Filter the dataframe for the current class level
                genus_df = family_df[family_df['genus'] == genus_level] 
            
                # Genus line
                genus = doc.add_paragraph()

                # First run: small text
                run1 = genus.add_run(genus_level)
                run1.font.name = 'Arial'
                run1.font.size = Pt(10)
                run1.font.bold = True
                run1.font.italic = True
                
                # Loop through each genus level
                species_levels_list = sorted([level for level in genus_df['specificEpithet'].unique() if pd.notna(level)])
                print(species_levels_list)
    
                for species_level in species_levels_list:
    
                    # Filter the dataframe for the current class level
                    species_df = genus_df[genus_df['specificEpithet'] == species_level] 
                    author = species_df['scientificNameAuthorship'].iloc[0]
                    original_name = species_df['originalNameUsage'].iloc[0]
                    
                    # Species line
                    species = doc.add_paragraph()

                    run = species.add_run("	" + genus_level)
                    run.font.name = 'Arial'
                    run.font.size = Pt(10)
                    run.font.bold = True
                    run.font.italic = True
                    
                    run = species.add_run(" " + species_level)
                    run.font.name = 'Arial'
                    run.font.size = Pt(10)
                    run.font.bold = True
                    run.font.italic = True
                    
                    run = species.add_run(" " + str(author))
                    run.font.name = 'Arial'
                    run.font.size = Pt(10)
                    
                    run.add_break()
                    run = species.add_run("			original		" + str(original_name))
                    run.font.name = 'Arial'
                    run.font.size = Pt(10)
                    
                    record_level_list = [level for level in species_df['bibliographicCitation_abbrev'].unique() if pd.notna(level)]
                    print(record_level_list)
                    
                    for i, record_level in enumerate(record_level_list):
                        if i == 0:
                            # First iteration
                            run.add_break()
                            record_df = species_df[species_df['bibliographicCitation_abbrev'] == record_level]
                            index = record_level.find(')')
                            if index != -1:
                                trimmed_record = record_level[:index + 1]
                            else:
                                trimmed_record = record_level
                            verbatim_id = record_df['verbatimIdentification'].iloc[0]
                            verbatim_id = "" if pd.isna(verbatim_id) or str(verbatim_id).strip() == "" else str(verbatim_id)
                            run = species.add_run("			record		" + verbatim_id + ": " + trimmed_record + ": ")
                            run.font.name = 'Arial'
                            run.font.size = Pt(8)
                            
                            sub_record_level_list = [level for level in record_df['type'].unique() if pd.notna(level)]
                            print(sub_record_level_list)
                        
                            for sub_record_level in sub_record_level_list:
                                sub_record_df = record_df[record_df['type'] == sub_record_level]
                                mask = (
                                    sub_record_df['catalogNumber'].notna() & (sub_record_df['catalogNumber'].astype(str).str.strip() != "")
                                ) | (
                                    sub_record_df['recordNumber'].notna() & (sub_record_df['recordNumber'].astype(str).str.strip() != "")
                                )
                                sub_record_df.loc[mask, 'type'] = 'specimen'
                                sub_record_type = sub_record_df['type']
                                #sub_record_type = list(sub_record_df['type'].dropna().unique())
                                #sub_record_type_str = ", ".join(sub_record_type.dropna().astype(str))
                                sub_record_type_str = ", ".join(sorted(set(sub_record_type.dropna().astype(str))))
                                sub_record_type_str = sub_record_type_str.replace("Text", "listing").replace("PhysicalObject", "observation").replace("StillImage", "illustration")
                                print(sub_record_type_str)
                                run = species.add_run(sub_record_type_str + " ")
                                run.font.name = 'Arial'
                                run.font.size = Pt(8)
                        else:
                            # All other iterations
                            run.add_break()
                            record_df = species_df[species_df['bibliographicCitation_abbrev'] == record_level]
                            index = record_level.find(')')
                            if index != -1:
                                trimmed_record = record_level[:index + 1]
                            else:
                                trimmed_record = record_level
                            verbatim_id = record_df['verbatimIdentification'].iloc[0]
                            verbatim_id = "" if pd.isna(verbatim_id) or str(verbatim_id).strip() == "" else str(verbatim_id)
                            run = species.add_run("					" + verbatim_id + ": " + trimmed_record + ": ")
                            run.font.name = 'Arial'
                            run.font.size = Pt(8)
                            
                            sub_record_level_list = [level for level in record_df['type'].unique() if pd.notna(level)]
                            print(sub_record_level_list)
                        
                            for sub_record_level in sub_record_level_list:
                                sub_record_df = record_df[record_df['type'] == sub_record_level]
                                mask = (
                                    sub_record_df['catalogNumber'].notna() & (sub_record_df['catalogNumber'].astype(str).str.strip() != "")
                                ) | (
                                    sub_record_df['recordNumber'].notna() & (sub_record_df['recordNumber'].astype(str).str.strip() != "")
                                )
                                sub_record_df.loc[mask, 'type'] = 'specimen'
                                sub_record_type = sub_record_df['type']
                                #sub_record_type = list(sub_record_df['type'].dropna().unique())
                                #sub_record_type_str = ", ".join(sub_record_type.dropna().astype(str))
                                sub_record_type_str = ", ".join(sorted(set(sub_record_type.dropna().astype(str))))
                                sub_record_type_str = sub_record_type_str.replace("Text", "listing").replace("PhysicalObject", "observation").replace("StillImage", "illustration")
                                print(sub_record_type_str)
                                run = species.add_run(sub_record_type_str + " ")
                                run.font.name = 'Arial'
                                run.font.size = Pt(8)
                        
                empty_line = doc.add_paragraph()
                run = empty_line.add_run("")
                run.font.name = 'Arial'
                run.font.size = Pt(10)

# References
references = doc.add_paragraph()

# First run: small text
run = references.add_run("References")
run.font.name = 'Arial'
run.font.size = Pt(12)
run.font.bold = True                

# Add individual references
ref_list = [level for level in df['bibliographicCitation'].unique() if pd.notna(level)]

# Loop through each class level
for ref in ref_list:

    # First run: small text
    reference = doc.add_paragraph()
    reference.paragraph_format.left_indent = Inches(0.5)
    reference.paragraph_format.first_line_indent = Inches(-0.5)
    # Set single line spacing
    reference.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE
    # Remove spacing before/after paragraph
    reference.paragraph_format.space_before = Pt(0)
    reference.paragraph_format.space_after = Pt(0)
    run = reference.add_run(ref)
    run.font.name = 'Arial'
    run.font.size = Pt(9)

# Save the document
doc.save('../Data/CLOFBO-Plata.docx')

[['Order', 'Terrazas-Urquidi (1970)', 'Current list'], ['Siluriformes', 33, 101], ['Characiformes', 44, 134], ['Atheriniformes', 0, 10], ['Synbranchiformes', 1, 1], ['Blenniiformes', 11, 26], ['Gymnotiformes', 5, 13], ['Myliobatiformes', 1, 1], ['Clupeiformes', 0, 1], ['Acanthuriformes', 0, 2], ['Ceratodontiformes', 0, 1], ['Carangiformes', 0, 1], ['Salmoniformes', 0, 1], ['Cypriniformes', 0, 1], ['Perciformes', 0, 0], ['Total', 95, 272]]
['Actinopterygii', 'Dipnoi', 'Elasmobranchii']
ACTINOPTERYGII
A C T I N O P T E R Y G I I
['Acanthuriformes', 'Atheriniformes', 'Blenniiformes', 'Carangiformes', 'Characiformes', 'Clupeiformes', 'Cypriniformes', 'Gymnotiformes', 'Perciformes', 'Salmoniformes', 'Siluriformes', 'Synbranchiformes']
['Sciaenidae']
['Pachyurus', 'Plagioscion']
['bonariensis']
['Osinaga & Farell (2010)']
['Text']
listing
['ternetzi']
['Osinaga & Farell (2010)']
['Text']
listing
['Anablepidae', 'Atherinopsidae', 'Belonidae', 'Poeciliidae', 'Rivulidae']
['Jenynsia']
['alterni

['Eigenmann (1917b)', 'Farell & Cancino (2007)', 'Osinaga & Farell (2010)', 'Reia et al. (2019)', 'Maldonado (2002b)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['PhysicalObject']
specimen
['PhysicalObject', 'StillImage']
observation
illustration
['Text']
listing
['Text']
listing
['Text']
listing
['Text']
listing
['orbicularis']
['GBIF (2025)']
['PhysicalObject']
specimen
['kennedyi']
['Eigenmann (1927)', 'Chernoff et al. (2001)', 'Farell & Cancino (2007)', 'Osinaga & Farell (2010)', 'Sarmiento et al. (2019)', 'Hablützel et al. (in prep.)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['PhysicalObject']
specimen
['PhysicalObject']
observation
['PhysicalObject']
observation
['Text']
listing
['Text']
listing
['PhysicalObject']
specimen
['Text']
listing
['ternetzi']
['Farell & Cancino (2007)', 'Osinaga & Farell (2010)', 'Hablützel et al. (in prep.)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['PhysicalObject']
observation
['Text']
listing
['PhysicalO

['Text']
listing
['Text']
listing
['Farell & Cancino (2007)', 'Osinaga & Farell (2010)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['PhysicalObject']
observation
['Text']
listing
['Text']
listing
[]
[]
['argenteus']
['Osinaga & Farell (2010)']
['Text']
listing
['gulo']
['Terrazas Urquidi (1970)', 'Giovannetti, Toledo-Piza & Menezes (2017)']
['Text']
listing
['Text']
listing
['pequira']
['Boulenger (1897)', 'Eigenmann (1915a)', 'Sarmiento & Barrera (1997)', 'Chernoff et al. (2001)', 'Osinaga & Farell (2010)', 'Baigún & Minotti (2019)', 'Hablützel et al. (in prep.)', 'Maldonado (2002a)', 'Maldonado (2002b)', 'Halcrow & Serman Asociados (2006)']
['PhysicalObject']
observation
['Text']
listing
['Text']
listing
['PhysicalObject']
observation
['Text']
listing
['Text']
listing
['PhysicalObject']
specimen
['Text']
listing
['Text']
listing
['Text']
listing
[]
['microcephala', 'paraguayensis', 'pequira']
['Eigenmann & Ogle (1907)', 'Eigenmann (1915a)', 'Fowler (1940)', 'Terrazas Ur

['Text']
listing
['GBIF (2025)']
['PhysicalObject']
specimen
['Terrazas Urquidi (1970)', 'Londoño-Burbano et al. (2011)', 'Baigún & Minotti (2019)', 'Maldonado (2002b)', 'Halcrow & Serman Asociados (2006)']
['Text']
listing
['Text']
listing
['Text']
listing
['Text']
listing
['Text']
listing
['Prochilodus']
['lineatus', 'nigricans', 'rubrotaeniatus']
['Boulenger (1897)', 'Sarmiento & Barrera (1997)', 'Farell & Cancino (2007)', 'Osinaga & Farell (2010)', 'Baigún & Minotti (2019)', 'Sarmiento et al. (2019)', 'Hablützel et al. (in prep.)', 'GBIF (2025)', 'Maldonado (2002a)', 'Maldonado (2002b)', 'Baigún et al. (2019)', 'Careaga et al. (2019)', 'Van den Heuvel et al. (2019)', 'Palomeque De la Cruz (2012)', 'Halcrow & Serman Asociados (2006)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['PhysicalObject']
observation
['Text']
listing
['PhysicalObject']
observation
['Text']
listing
['Text']
listing
['Text']
listing
['PhysicalObject']
specimen
['PhysicalObject']
specimen
['Text']
l

['Text']
listing
['PhysicalObject']
observation
['Text']
listing
['Text']
listing
['PhysicalObject']
specimen
['Text']
listing
['Text']
listing
['Text']
listing
['Text']
listing
['Hablützel et al. (in prep.)']
['PhysicalObject']
specimen
['Terrazas Urquidi (1970)', 'Osinaga & Farell (2010)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['Text']
listing
['Text']
listing
['Text']
listing
[]
['Brochis', 'Callichthys', 'Corydoras', 'Gastrodermus', 'Hoplisoma', 'Hoplosternum', 'Lepthoplosternum', 'Megalechis', 'Osteogaster']
['britskii', 'pantanalensis']
['Osinaga & Farell (2010)', 'Hablützel et al. (in prep.)']
['Text']
listing
['PhysicalObject']
specimen
['Hablützel et al. (in prep.)']
['PhysicalObject']
specimen
['callichthys']
['Osinaga & Farell (2010)', 'Sarmiento et al. (2019)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['Text']
listing
['Text']
listing
['Text']
listing
['aurofrenatus']
['Farell & Cancino (2007)', 'Osinaga & Farell (2010)', 'Gobierno Autónomo 

listing
['PhysicalObject']
observation
['Text']
listing
['Text']
listing
['Text']
listing
['PhysicalObject']
specimen
['Text']
listing
['Text']
listing
['Text']
listing
['Text']
listing
['Maldonado (2002b)']
['Text']
listing
[]
['macrops']
['Baigún & Minotti (2019)', 'Halcrow & Serman Asociados (2006)']
['Text']
listing
['Text']
listing
['aurata', 'catamarcensis', 'lanceolata', 'parva', 'phoxocephala', 'steinbachi']
['Hablützel et al. (in prep.)']
['PhysicalObject']
specimen
['Boulenger (1897)', 'Regan (1904)', 'Eigenmann (1910)']
['PhysicalObject']
observation
['PhysicalObject']
observation
['Text']
listing
['Lizarro & Moreno-Aulo (2019)']
['Text']
listing
['Osinaga & Farell (2010)', 'Hablützel et al. (in prep.)', 'Gobierno Autónomo Departamental de Santa Cruz (2011)']
['Text']
listing
['PhysicalObject']
specimen
['Text']
listing
['Terrazas Urquidi (1970)', 'Sarmiento & Barrera (1997)', 'Maldonado (2002a)']
['Text']
listing
['Text']
listing
['Text']
listing
['Terrazas Urquidi (1970)',

In [87]:
master_list = []
master_list.append(['Order', 'Terrazas-Urquidi (1970)', 'Current list'])

order_list = [level for level in df['order'].unique() if pd.notna(level)]
for order in order_list:
    order_df = df[df['order'] == order]
    order_T_df = order_df[order_df['bibliographicCitation'] == 'Terrazas Urquidi, W. (1970) Lista de peces Bolivianos. Academia Nacional de Ciencias de Bolivia. La Paz. Publicación 24. 65 pp.']
    num_levels_T = order_T_df['verbatimIdentification'].dropna().nunique()
    num_levels_C = order_df['specificEpithet'].dropna().nunique()
    master_list.append([order, num_levels_T, num_levels_C])

T_df = df[df['bibliographicCitation'] == 'Terrazas Urquidi, W. (1970) Lista de peces Bolivianos. Academia Nacional de Ciencias de Bolivia. La Paz. Publicación 24. 65 pp.']
master_list.append(['Total', T_df['verbatimIdentification'].dropna().nunique(), df['specificEpithet'].dropna().nunique()])
print(master_list)

[['Order', 'Terrazas-Urquidi (1970)', 'Current list'], ['Cichliformes', 11, 19], ['Characiformes', 44, 98], ['Siluriformes', 33, 75], ['Cyprinodontiformes', 0, 7], ['Acanthuriformes', 0, 2], ['Gymnotiformes', 5, 9], ['Clupeiformes', 0, 1], ['Pleuronectiformes', 0, 1], ['Synbranchiformes', 1, 1], ['Beloniformes', 0, 0], ['Myliobatiformes', 1, 1], ['Ceratodontiformes', 0, 1], ['Total', 95, 200]]


In [68]:
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_LINE_SPACING

doc = Document()

# Create a paragraph
paragraph = doc.add_paragraph()

# ✅ Hanging indent: first line at margin, rest indented
paragraph.paragraph_format.first_line_indent = Inches(0)  # no indent for the first line
paragraph.paragraph_format.left_indent = Inches(0.5)      # indent for all lines

# ✅ Set single line spacing
paragraph.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE
paragraph.paragraph_format.space_before = Pt(0)
paragraph.paragraph_format.space_after = Pt(0)

# Add text (long enough to wrap and show the indent effect)
text = (
    "This is a paragraph with a hanging indent. "
    "Only the second and following lines are indented, "
    "while the first line starts at the margin."
)
run = paragraph.add_run(text)
run.font.name = 'Arial'
run.font.size = Pt(10)

doc.save("hanging_indent_corrected.docx")

In [None]:
from docx import Document
from docx.shared import Pt  # for font size in points
from docx.oxml.ns import qn  # for setting font name properly
from docx.shared import Inches, Mm

# Create a new Document
doc = Document()

# Set the document layout
section = doc.sections[0]

# Set to A4 size (210mm x 297mm)
section.page_height = Mm(297)
section.page_width = Mm(210)

section.top_margin = Inches(0.98)
section.bottom_margin = Inches(0.79)
section.left_margin = Inches(0.98)
section.right_margin = Inches(0.69)

# Title

# Title line
title = doc.add_paragraph()

# First run: small text
run1 = title.add_run("Check List of the Freshwater Fishes from the La Plata basin in Bolivia (CLOFFBO-Plata).")
run1.font.name = 'Arial'
run1.font.size = Pt(12)
run1.font.bold = True

# Class line
class_ = doc.add_paragraph()

# First run: small text
run1 = class_.add_run("class	")
run1.font.name = 'Arial'
run1.font.size = Pt(10)

# Second run: larger text
run2 = class_.add_run("E L A S M O B R A N C H I I")
run2.font.name = 'Arial'
run2.font.size = Pt(12)

# Order line
order = doc.add_paragraph()

# First run: small text
run1 = order.add_run("order		")
run1.font.name = 'Arial'
run1.font.size = Pt(10)

# Second run: larger text
run2 = order.add_run("RAJIFORMES")
run2.font.name = 'Arial'
run2.font.size = Pt(12)
run2.font.bold = True

# Family line
order = doc.add_paragraph()

# First run: small text
run1 = order.add_run("family			")
run1.font.name = 'Arial'
run1.font.size = Pt(10)

# Second run: larger text
run2 = order.add_run("Potamotrygonidae")
run2.font.name = 'Arial'
run2.font.size = Pt(12)
run2.font.bold = True

# Genus line
genus = doc.add_paragraph()

# First run: small text
run1 = genus.add_run("Potamotrygon")
run1.font.name = 'Arial'
run1.font.size = Pt(10)
run1.font.bold = True
run1.font.italic = True

# Species line
species = doc.add_paragraph()

run = species.add_run("	P. falkneri")
run.font.name = 'Arial'
run.font.size = Pt(10)
run.font.bold = True
run.font.italic = True

run = species.add_run("   Castex & Maciel, 1963")
run.font.name = 'Arial'
run.font.size = Pt(10)
run.add_break()

run = species.add_run("			synonyms")
run.font.name = 'Arial'
run.font.size = Pt(10)

run = species.add_run("	P. castexi")
run.font.name = 'Arial'
run.font.size = Pt(10)
run.font.italic = True

run = species.add_run(" Castello & Yagolkowski, 1969")
run.font.name = 'Arial'
run.font.size = Pt(10)
run.add_break()

run = species.add_run("			record")
run.font.name = 'Arial'
run.font.size = Pt(8)

run = species.add_run("		lista de Karina sub")
run.font.name = 'Arial'
run.font.size = Pt(8)

run = species.add_run(" P. castexi")
run.font.name = 'Arial'
run.font.size = Pt(8)
run.font.italic = True

# Save the document
doc.save('../Data/CLOFBO-Plata.docx')