In [2]:
%%time
%load_ext autoreload
%autoreload 2

# Standard Python modules
import time 
import numpy as np
import os
import sys
import ipdb
sys.path.append(os.path.abspath(".."))
import json
import re
import glob
import asyncio
import aiohttp
import openai
import logging
from typing import Dict, Tuple, List, Callable
from dotenv import load_dotenv
from pathlib import Path

# Project imports
from src.utils import timeit, encode_image, plt, pylab, find_bad_pagenos, delete_bad_pagenos, dump_fragmented_output
from src.processing import compute_log_spectrum_1d, extract_image_bbox, save_images
from src.api_requests import construct_payload_for_gpt, process_single_page
from src.document_generation import save_document, setup_logger

# Display and plotting, set notebook display width
from IPython.display import display, HTML, clear_output

display(HTML("<style>.container { width:90% !important; }</style>"))

import PyPDF2
from src.document_generation import setup_logger, chapter_splitter
from src.utils import log_execution_time, count_num_tokens
import logging

# Print Python environment info
print('sys.executable:', sys.executable)
print('sys.version:', sys.version, '\n')

# Load environment variables from .env file
env_path = Path('../.env')  # Adjust path if needed
load_dotenv(dotenv_path=env_path)

# Setup for PDF processing
foldername = "Der Weltkrieg v7"

# OpenAI API setup
openai.api_key = os.getenv("OPENAI_API_KEY")

# # split the pdf into separate pages
# chapter_splitter("../input_data/Der Weltkrieg v5 East Front.pdf")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


sys.executable: /Users/ozkansafak/code/fraktur/.venv/bin/python3
sys.version: 3.10.9 (main, Mar  1 2023, 12:20:14) [Clang 14.0.6 ] 

CPU times: user 1.75 ms, sys: 1.15 ms, total: 2.9 ms
Wall time: 2.08 ms


In [3]:
# Initialize variables
plotter = False
image_path = f"../input_data/{foldername}/*pdf"
fnames = sorted(glob.glob(image_path))
all_pagenos = [re.search(r'page_(.*?)\.pdf', fname, re.DOTALL).group(1) for fname in fnames]

# Storage for processed texts
raw_german_texts: Dict[str, str] = {}
german_texts: Dict[str, str] = {}
english_texts: Dict[str, str] = {}

# Configure logging
logger = setup_logger('time_logger')


---
## PART 1
## Fraktur Translator (GPT-4o)

In [76]:
@log_execution_time
async def main(fnames, model_name="", semaphore_count=10, extract=True):
    semaphore = asyncio.Semaphore(semaphore_count)  # Adjust number based on API limits
    async def _process_page(fname: str) -> Tuple[str, Dict]:
        global raw_german_texts, german_texts, english_texts  # Explicitly declare globals
        async with semaphore:
            pageno = re.search(r'page_(.*?)\.pdf', fname, re.DOTALL).group(1)
            result = await process_single_page(fname, model_name, plotter, pageno, extract) 
            return pageno, result
    
    # A list of coroutine objects. include only the unprocessed pages.
    tasks = []
    print('Composing tasks to be carried out asynchronously...')
    for fname in fnames:
        pageno = re.search(r'page_(.*?)\.pdf', fname, re.DOTALL).group(1)
        if pageno not in set(raw_german_texts.keys()):
            print('pageno:', pageno)
            tasks.append(_process_page(fname))
    logger.info(f"main: len(tasks): {len(tasks)} -- Processing tasks as they complete")
    
    # Process tasks as they complete
    for i, completed_task in enumerate(asyncio.as_completed(tasks)):
        try:
            pageno, (content, raw_german_text, german_text, english_text) = await completed_task
            raw_german_texts[pageno] = raw_german_text
            german_texts[pageno] = german_text
            english_texts[pageno] = english_text
            
            if 'section was not found' in english_text:
                logger.error(f"pageno:{pageno}. 'section was not found' in english_text")
                logger.error(f"len(content):{count_num_tokens(content)}")
            elif '[' in german_text:
                logger.error('---'*22+'\n' + f"pageno:{pageno}. '[' present in german_text")
                j = german_text.index('[')
                print('pageno:' + pageno + '\n' + german_text[j:]+'\n' + '---'*22)
            elif '[' in english_text:
                logger.error('---'*22 + '\n' + f"pageno:{pageno}. '[' present in english_text")
                j = english_text.index('[')
                print('pageno:' + pageno + '\n' + english_text[j:] + '\n'+ '---'*22)
            else:
                logger.info(f"{i} of {len(tasks)-1} -- Successfully processed page:{pageno}")
        except Exception as e:
            logger.error(f"{i} of {len(tasks)-1} -- Error processing a task: {e}")
    return 

# Run the async code
await main(fnames, model_name="gpt-4o-2024-08-06", semaphore_count=10)


main - L19 - INFO - main: len(tasks): 1 -- Processing tasks as they complete


Composing tasks to be carried out asynchronously...
pageno: 216

**Instructions**

You are to perform three steps on the provided image of a document.

**General Guidelines**:
- **Accuracy**: Capture all text elements exactly as they appear. Do **NOT** summarize or skip content, including index or table of contents pages.
- **Paragraph Integrity**: Transcribe paragraphs as cohesive blocks. Do not add carriage returns (`\n`) at the end of each line unless explicitly present.
- **Separator**: After completing each step, print the following separator line: "-----"

**Step 1: OCR Transcription in to Raw German Text**

Task: Transcribe the entire text from the image into German, including all Fraktur characters.

**Instructions**:
1. **Accuracy**: 
   - Faithfully transcribe all text elements **WITHOUT ANY SUMMARIZATION** even if the page consists of dense repetitive looking text. 
   - Don't repetitively extract the same letter or the same '.' character unlike in the original document.

2.

extract_text_section - L243 - ERROR - Pageno: 216, "raw_german" section was not found
extract_text_section - L243 - ERROR - Pageno: 216, "german" section was not found
extract_text_section - L243 - ERROR - Pageno: 216, "english" section was not found
main - L30 - ERROR - pageno:216. 'section was not found' in english_text
main - L31 - ERROR - len(content):10000
wrapper - L83 - INFO - Finished main in 204.66 seconds.


In [78]:
from src.utils import find_bad_pagenos, delete_bad_pagenos

good_pagenos = {'213', '225', '221'} # V5
bad_pagenos = find_bad_pagenos(all_pagenos, raw_german_texts, german_texts, english_texts, good_pagenos)
# delete_bad_pagenos(bad_pagenos, raw_german_texts, german_texts, english_texts)

> pageno: 216. In raw_german_texts: Pageno: 216, "<raw_german>" section was not found

bad_pagenos: ['216']


---
## PART 2
##  Handle missing keys (Claude Sonnet)

In [79]:
# Check for unprocessed pages and print them to stdout.
from src.utils import get_corresponding_bad_fnames, delete_bad_pagenos

# ------------------------------------------------------------------------------------
# 1. Rerun the missing pages on Claude without performing FFT based extraction. 
# ------------------------------------------------------------------------------------

# Get missing keys based on empty raw_german_texts
bad_pagenos = find_bad_pagenos(all_pagenos, raw_german_texts, german_texts, english_texts, good_pagenos)
delete_bad_pagenos(bad_pagenos, raw_german_texts, german_texts, english_texts)

await main(fnames, model_name="claude-3-5-sonnet-20241022", semaphore_count=1, extract=False)

# ------------------------------------------------------------------------------------
# 2. If there still are missing pages, run them performing FFT based extraction. 
#    This time compute missing_keys based on 'english_texts'.
# ------------------------------------------------------------------------------------

# Now get missing keys based on empty english_texts
bad_pagenos = find_bad_pagenos(all_pagenos, raw_german_texts, german_texts, english_texts, good_pagenos)
delete_bad_pagenos(bad_pagenos, raw_german_texts, german_texts, english_texts)

await main(fnames, model_name="claude-3-5-sonnet-20241022", semaphore_count=1, extract=True)
await main(fnames, model_name="claude-3-5-sonnet-20241022", semaphore_count=1, extract=False)

bad_pagenos = find_bad_pagenos(all_pagenos, raw_german_texts, german_texts, english_texts, good_pagenos)


main - L19 - INFO - main: len(tasks): 1 -- Processing tasks as they complete


> pageno: 216. In raw_german_texts: Pageno: 216, "<raw_german>" section was not found

bad_pagenos: ['216']
------------------------------------------------------------------
> Deleting pageno:216
Composing tasks to be carried out asynchronously...
pageno: 216


main - L41 - INFO - 0 of 0 -- Successfully processed page:216
wrapper - L83 - INFO - Finished main in 21.73 seconds.
main - L19 - INFO - main: len(tasks): 1 -- Processing tasks as they complete


> pageno: 216 in raw_german_texts:[Continued technical military details and unit compositions...]
 ...

bad_pagenos: ['216']
------------------------------------------------------------------
> Deleting pageno:216
Composing tasks to be carried out asynchronously...
pageno: 216


main - L33 - ERROR - ------------------------------------------------------------------
pageno:216. '[' present in german_text
wrapper - L83 - INFO - Finished main in 24.48 seconds.
main - L19 - INFO - main: len(tasks): 0 -- Processing tasks as they complete
wrapper - L83 - INFO - Finished main in 0.01 seconds.


pageno:216
[Continued detailed military unit listings and specifications...]
</body>

------------------------------------------------------------------
Composing tasks to be carried out asynchronously...
> pageno: 216 in raw_german_texts:[Continued detailed military unit listings and specifications...]
 ...

bad_pagenos: ['216']


In [91]:
# for pageno in bad_pagenos:
#     english_texts[pageno] = '<Blank Page>'
#     german_texts[pageno] = '<Blank Page>'
#     print(f"{pageno}: <Blank Page>")
    
# save json outputs
if not os.path.exists(f'../output_data/{foldername}'):
    os.makedirs(f'../output_data/{foldername}')
    
with open(f'../output_data/{foldername}/english_texts.json', 'w') as f:
    json.dump(english_texts, f)
with open(f'../output_data/{foldername}/german_texts.json', 'w') as f:
    json.dump(german_texts, f)
with open(f'../output_data/{foldername}/raw_german_texts.json', 'w') as f:
    json.dump(raw_german_texts, f)

# Save json files and .docx files.
doc1, fname1 = save_document(german_texts, foldername, language=f'{foldername} - German')
doc2, fname2 = save_document(english_texts, foldername, language=f'{foldername} - English')


``` 
1.  Upload Input folder of pdfs to blob storage.
2.  Read file from s3.
3.  FFT in y -> (x_hi, x_lo), write half_cropped_image to s3
4.  FFT in x -> (y_hi, y_lo), write cropped_image to s3
5.  Read cropped image from s3 -> encode_image -> translate and transcribe -> JSON output

```

### Available models and pricing:
```
"gpt-4o-2024-08-06":
    "price_txt": "$2.50 / 1M input tokens"
    "price_img": "$0.001913 / 1500px^2"
    
"gpt-4o-mini-2024-07-18":
    "price_txt": "$0.150 / 1M input tokens"
    "price_img": "$0.003825 / 1500px^2"
    
```

---
## PART 3
## Load the German text and translate broken sentences.

In [4]:
# load `raw_german_texts`, `german_texts`, `english_texts` from disk.

with open(f'../output_data/{foldername}/raw_german_texts.json', 'r') as f:
    raw_german_texts = json.load(f)
with open(f'../output_data/{foldername}/german_texts.json', 'r') as f:
    german_texts = json.load(f)
with open(f'../output_data/{foldername}/english_texts.json', 'r') as f:
    english_texts = json.load(f)

# if os.path.exists(f'../output_data/{foldername}/english_texts_defragmented.json'):
#     with open(f'../output_data/{foldername}/english_texts_defragmented.json', 'r') as f:
#         english_texts_defragmented = json.load(f)


In [4]:
# initialize output dicts
english_texts_defragmented = {}
contents = {}
payloads = {}
fragments_2 = {None:''}

In [5]:
from src.constants import FRAGMENTED_SENTENCES_PROMPT

def construct_payload_fragmented_sentences(
                        german_page_1: str,
                        german_page_2: str,
                        english_page_1_old: str,
                        german_page_1_top_fragment_to_be_ignored: str):

    model_name = "gpt-4o-2024-08-06"
    payload = {
        "model": model_name,
        "messages": [
          {
            "role": "system", 
            "content": "You are a World War II historian, who's bilingual in German and English "
              "You speak both languages with masterful efficiency and you're a professional translator from GERMAN to ENGLISH who "
              "stays loyal to both the style and the character of the original German text in your book translations."
          },
          {
            "role": "user",
            "content": [
              {
                  "type": "text",
                  "text": FRAGMENTED_SENTENCES_PROMPT.format(
                      german_page_1=german_page_1,
                      german_page_2=german_page_2,
                      english_page_1_old=english_page_1_old,
                      german_page_1_top_fragment_to_be_ignored=german_page_1_top_fragment_to_be_ignored
                  )
              },
            ]
          },
        ],
        "max_tokens": 6000,
        "temperature": 0.1
    }

    return payload 

In [21]:
import ipdb
import requests

def log_execution_time_synchronous(func: Callable):
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        logger.info(f"Finished {func.__name__} in {time.time() - start:.2f} seconds.")
        return result
    return wrapper

def make_gpt_request_for_broken_sentences(payload: dict, pageno: str) -> dict:
    global english_texts_defragmented 

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai.api_key}"
    }

    model_name = "gpt-4o-2024-08-06"
    response = requests.post(
        "https://api.openai.com/v1/chat/completions",
        json=payload,
        headers=headers
    )
    result = response.json()
    try:
        content = result['choices'][0]['message']['content']
        english_texts_defragmented[pageno] = re.search(r'<english_page_1_new>(.*?)</english_page_1_new>', 
                                                       content, re.DOTALL).group(1)
        return content
    except Exception as e:
        print(f"An Error occurred. Error details: {e}")
        # ipdb.set_trace()
        return None

@log_execution_time_synchronous
def main_broken_sentences():
    global payloads, contents, fragments_2
    for i in range(len(all_pagenos) - 1):
        pageno = all_pagenos[i]
        prev_pageno = all_pagenos[i-1] if i > 0 else None
        next_pageno = all_pagenos[i+1]
        if pageno in english_texts_defragmented.keys() and len(english_texts_defragmented[pageno]) > 10:
            logger.info(f'pageno: {pageno} already in english_texts_defragmented... skipping')
            continue
        
        for trial in [1,2,3]:
            try:
                logger.info(f"Processing pageno: {pageno} {' ...trying again '+ str(trial) if trial > 1 else ''}")
                payload = construct_payload_fragmented_sentences(
                    german_texts[pageno], 
                    german_texts[next_pageno], 
                    english_texts[pageno], 
                    fragments_2[prev_pageno] 
                )
                payloads[pageno] = payload['messages'][1]['content'][0]['text']
                content = make_gpt_request_for_broken_sentences(payload, pageno)
                contents[pageno] = content
                fragments_2[pageno] = re.search(r'<fragment_2>(.*?)</fragment_2>', content, re.DOTALL).group(1)
                if fragments_2[pageno].count('\n') > 10:
                    print('not accepting fragment_2. fragments_2[pageno].count("\\n")', fragments_2[pageno].count('\n'))
                    fragments_2[pageno] = '' 
                logger.info(f"next round's fragment_2: {fragments_2[pageno]}") 
                break
            except Exception as e:
                logger.error(f"Error processing. {e}. trial:{trial}, pageno: {pageno}")
                if trial == 3:
                    english_texts_defragmented[pageno] = ''
        
        if pageno[-1] == '0' or next_pageno == all_pagenos[-1]:
            dump_fragmented_output(foldername, english_texts_defragmented, fragments_2, contents)
            
    english_texts_defragmented[next_pageno] = ''

main_broken_sentences()


main_broken_sentences - L45 - INFO - pageno: 001 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 002 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 003 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 004 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 005 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 006 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 007 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 008 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 009 already in english_texts_defragmented... skipping
main_broken_sentences - L45 - INFO - pageno: 010 already in english_texts_defragmented... skipping
main_broke

dumped fragemented outputs to foldername: Der Weltkrieg v5


main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 222 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 223 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 224 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 225 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 226 
main_broken_sentences - L64 - INFO - next round's fragment_2: Das russische Heer
main_broken_sentences - L50 - INFO - Processing pageno: 227 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 228 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pag

dumped fragemented outputs to foldername: Der Weltkrieg v5


main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 232 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 233 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 234 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 235 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 236 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 237 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 238 
main_broken_sentences - L64 - INFO - next round's fragment_2: Truppen- beim Gegner an der Westfront vom
main_broken_sentences - L50 

dumped fragemented outputs to foldername: Der Weltkrieg v5


main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 242 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 243 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 244 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 245 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 246 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 247 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 248 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 249 
main_bro

dumped fragemented outputs to foldername: Der Weltkrieg v5


main_broken_sentences - L64 - INFO - next round's fragment_2: Kaempffer, Genlt., Gen. d. Pioniere beim A. O. K. 5 157.
main_broken_sentences - L50 - INFO - Processing pageno: 252 
main_broken_sentences - L64 - INFO - next round's fragment_2: v. Meyer, Genlt., Kdr. d. 37. bdw. Brig. 227. 238. 322.
main_broken_sentences - L50 - INFO - Processing pageno: 253 
main_broken_sentences - L64 - INFO - next round's fragment_2: Schabel, Genmaj. 226.
main_broken_sentences - L50 - INFO - Processing pageno: 254 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 255 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 256 
main_broken_sentences - L64 - INFO - next round's fragment_2: Kav.-Gruppe Marwitz 313. 324f. 329. 331. 336.
main_broken_sentences - L50 - INFO - Processing pageno: 257 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sent

dumped fragemented outputs to foldername: Der Weltkrieg v5


main_broken_sentences - L64 - INFO - next round's fragment_2: Feld-Art. R. 78 277.
main_broken_sentences - L50 - INFO - Processing pageno: 262 
main_broken_sentences - L64 - INFO - next round's fragment_2: I. Korps 465. 475ff. 482f.
main_broken_sentences - L50 - INFO - Processing pageno: 263 
main_broken_sentences - L64 - INFO - next round's fragment_2: provisorisches Korps Délèville (später XXXI. Korps) 140. 360.
main_broken_sentences - L50 - INFO - Processing pageno: 264 
main_broken_sentences - L67 - ERROR - Error processing. expected string or bytes-like object. trial:1, pageno: 264
main_broken_sentences - L50 - INFO - Processing pageno: 264  ...trying again 2


An Error occurred. Error details: 'NoneType' object has no attribute 'group'


main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 265 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 266 
main_broken_sentences - L64 - INFO - next round's fragment_2: 
main_broken_sentences - L50 - INFO - Processing pageno: 267 
main_broken_sentences - L64 - INFO - next round's fragment_2: Transbaïkal-Kos. Brig. 495f.
wrapper - L8 - INFO - Finished main_broken_sentences in 945.55 seconds.


In [23]:

dump_fragmented_output(foldername, english_texts_defragmented, fragments_2, contents)
doc1, fname1 = save_document(german_texts, foldername, language=f'{foldername} - German_defragmented')
doc2, fname2 = save_document(english_texts_defragmented, foldername, language=f'{foldername} - English_defragmented')


dumped fragemented outputs to foldername: Der Weltkrieg v5


<!-- --- -->
## Experiments

In [230]:
# pattern = r'<(pageno|header|body|footer)>(.*?)</\1>'
# for match in re.finditer(pattern, english_texts_defragmented['006'], re.DOTALL):
#     print(match)
#     break

from src.document_generation import extract_sections_in_order
sections = extract_sections_in_order(english_texts_defragmented['006'])

In [217]:
for pageno in all_pagenos:
    findall = re.findall(r'<pageno>(.*?)</pageno>', english_texts_defragmented[pageno], re.DOTALL)
    fraktur_pageno = findall[0] if findall else '-'
    print('\n---')
    print(f"\npageno:{pageno}  --  fraktur_pageno: {fraktur_pageno}")
    print(english_texts_defragmented[pageno].strip())
    


---

pageno:001  --  fraktur_pageno: -
<header>The World War</header>
<body>
1914 to 1918
Edited in the
Reichsarchiv
*
The Military Operations on Land
Eighth Volume
Published by E. S. Mittler & Son
Berlin in the Year 1932
</body>

---

pageno:002  --  fraktur_pageno: -
<header>The Operations of the Year 1915</header>
<body>
Events in the West in Spring and Summer, in the East from Spring to the End of the Year
With thirty-nine Maps and Sketches
</body>
<footer>
Published by E. S. Mittler & Sohn
Berlin in the Year 1932
</footer>

---

pageno:003  --  fraktur_pageno: -
<header>Introduction to the Eighth Volume.</header>
<body>The present Volume VIII describes the operations in the West in the spring and summer of 1915, in the East from spring until the end of the year.
The previous practice of concluding the depiction of events on the various theaters of war at the same time had to be deviated from, as the operations in the East formed internally coherent actions until the end of the ye