# Face of the war

This notebook contains the python script for extracting the data and storing it in a JSON file to create the visualisation.

In [1]:
# Imports

import pandas as pd
import json
import re
import requests
from tqdm import tqdm

## Reading the data

Only the required columns of the data are read from the CSV file to a pandas dataframe.

In [2]:
req_cols = ['Title of image', 'Temporal',
            'Military Details', 'Thumbnail image', "High resolution image "]

ww1_sol = pd.read_csv("./../data/ww1_soldiers.csv", usecols=req_cols)

The rows that contain nan values are removed

In [3]:
ww1_sol_name_photo = ww1_sol[["Title of image", "Thumbnail image", "Temporal", "Military Details", "High resolution image "]].dropna()

The date is converted into the 20th century format i.e. 19 is added to the two digit year and stored as a string

In [4]:
ww1_sol_name_photo["Temporal"] = pd.to_datetime(ww1_sol_name_photo["Temporal"].str[:-2] + '19' + ww1_sol_name_photo["Temporal"].str[-2:])
ww1_sol_name_photo["Temporal"] = ww1_sol_name_photo["Temporal"].astype(str)

Name of the soldier is extracted from the title of the image

In [5]:
ww1_sol_name_photo["Name"] = ww1_sol_name_photo["Title of image"].apply(lambda x :x.split(",")[0])

Any special characters in the name are removed.

In [6]:
spec_chars = ["!",'"',"#","%","&","'","(",")",
              "*","+",",","-","/",":",";","<",
              "=",">","?","@","[","\\","]","^","_",
              "`","{","|","}","~","–"]
spec_chars_dict = {spl_char: '' for spl_char in spec_chars}
ww1_sol_name_photo['Name'] = ww1_sol_name_photo['Name'].apply(lambda x: re.sub('({})'.format('|'.join(map(re.escape, spec_chars_dict.keys()))), lambda m: spec_chars_dict[m.group()], x))

In [7]:
ww1_sol_name_photo

Unnamed: 0,Title of image,Thumbnail image,Temporal,Military Details,High resolution image,Name
11,"- Burton, one of the soldiers photographed in ...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-03-10,9th Infantry Battalion,http://resources.slq.qld.gov.au/images/slq/pub...,Burton
12,"- Chambers, E Co., one of the soldiers photogr...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-10-17,9th Infantry Battalion,http://resources.slq.qld.gov.au/images/slq/pub...,Chambers
16,"- Demmack, C Co., one of the soldiers photogra...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-10-10,9th Infantry Battalion,http://resources.slq.qld.gov.au/images/slq/pub...,Demmack
24,"- Gorgill, E Co., one of the soldiers photogra...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-10-17,9th Infantry Battalion,http://resources.slq.qld.gov.au/images/slq/pub...,Gorgill
25,"- Graffunder, one of the soldiers photographed...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-03-10,2nd Light Horse Regiment,http://resources.slq.qld.gov.au/images/slq/pub...,Graffunder
...,...,...,...,...,...,...
28259,"W.V. Diamond, B Co., one of the soldiers photo...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-10-10,9th Infantry Battalion - embarked Oct 19th 191...,http://resources.slq.qld.gov.au/images/slq/pub...,W.V. Diamond
28260,"W.V. Ferry 7-25, one of the soldiers photograp...",http://resources.slq.qld.gov.au/images/slq/pub...,1916-05-13,"7th, 10th, 11th & 12th Reinforcements 25th Bat...",http://resources.slq.qld.gov.au/images/slq/pub...,W.V. Ferry 725
28263,"W.V. Hoskins 10-15, one of the soldiers photog...",http://resources.slq.qld.gov.au/images/slq/pub...,1916-05-27,Infantry Reinforcements,http://resources.slq.qld.gov.au/images/slq/pub...,W.V. Hoskins 1015
28287,"W.W. McNalby, D Co, one of the soldiers photog...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-10-10,9th Infantry Battalion,http://resources.slq.qld.gov.au/images/slq/pub...,W.W. McNalby


To save a limited number of entries, the soliders with unique military units are selected.

In [8]:
milidet_val_count = ww1_sol_name_photo["Military Details"].value_counts()
unique_milidet = milidet_val_count[milidet_val_count == 1].index.tolist()

selected_soldiers = ww1_sol_name_photo[ww1_sol_name_photo["Military Details"].isin(unique_milidet)]
selected_soldiers

Unnamed: 0,Title of image,Thumbnail image,Temporal,Military Details,High resolution image,Name
482,"A. Degonmois, one of the soldiers photographed...",http://resources.slq.qld.gov.au/images/slq/pub...,1915-01-30,"15th Infantry Battalion, 3rd Reinforcements",http://resources.slq.qld.gov.au/images/slq/pub...,A. Degonmois
1246,"A. Zyngay, one of the soldiers photographed in...",http://resources.slq.qld.gov.au/images/slq/pub...,1915-11-09,"9th Infantry Battalion, 10th Reinforcements",http://resources.slq.qld.gov.au/images/slq/pub...,A. Zyngay
1699,"A.E. Taylor, F. Co, one of the soldiers photog...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-10-24,9th Infantry Battalion ; Alfred Edward,http://resources.slq.qld.gov.au/images/slq/pub...,A.E. Taylor
2057,"A.H. Young, one of the soldiers photographed i...",http://resources.slq.qld.gov.au/images/slq/pub...,1918-03-16,Light Horse Reinforcements,http://resources.slq.qld.gov.au/images/slq/pub...,A.H. Young
2793,"A.W. Yeo, one of the soldiers photographed in ...",http://resources.slq.qld.gov.au/images/slq/pub...,1916-04-15,9th Field Artillary Brigade,http://resources.slq.qld.gov.au/images/slq/pub...,A.W. Yeo
...,...,...,...,...,...,...
26647,"W.A. Ware, Am. Col, one of the soldiers photog...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-10-24,DAC - No 2 Section,http://resources.slq.qld.gov.au/images/slq/pub...,W.A. Ware
27048,"W.F.H. Zimmerman, one of the soldiers photogra...",http://resources.slq.qld.gov.au/images/slq/pub...,1917-11-08,Field Artillary Brigade,http://resources.slq.qld.gov.au/images/slq/pub...,W.F.H. Zimmerman
27408,"W.H. Yeo, one of the soldiers photographed in ...",http://resources.slq.qld.gov.au/images/slq/pub...,1917-04-21,26 Infantry Battalion,http://resources.slq.qld.gov.au/images/slq/pub...,W.H. Yeo
27784,"W.J. Wynne, one of the soldiers photographed i...",http://resources.slq.qld.gov.au/images/slq/pub...,1918-01-26,"15 Infantry Battalion, 26 Reinforcements",http://resources.slq.qld.gov.au/images/slq/pub...,W.J. Wynne


The data is sorted based on the date of publication of the photo.

In [9]:
sorted_soldiers_df = selected_soldiers.sort_values(by=['Temporal'])
sorted_soldiers_df

Unnamed: 0,Title of image,Thumbnail image,Temporal,Military Details,High resolution image,Name
9966,"G.H Taylor, one of the soldiers photographed i...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-03-10,9th Infantry Battalion ; Arthur Edward,http://resources.slq.qld.gov.au/images/slq/pub...,G.H Taylor
15996,"J.McGowen, B Co., one of the soldiers photogra...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-03-10,3rd FAB [Field Artillery Brigade],http://resources.slq.qld.gov.au/images/slq/pub...,J.McGowen
24007,"T. Bower, one of the soldiers photographed in ...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-03-10,"12th Fld Bay, Engineering",http://resources.slq.qld.gov.au/images/slq/pub...,T. Bower
17692,"Lieut. R. Webster (A.A.S.C.), one of the soldi...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-03-10,AASC 1st Light Horse Brigade Train (5th Compan...,http://resources.slq.qld.gov.au/images/slq/pub...,Lieut. R. Webster A.A.S.C.
21155,"Q.M.S F. Cartwright, one of the soldiers photo...",http://resources.slq.qld.gov.au/images/slq/pub...,1914-03-10,Designation - Quartermaster,http://resources.slq.qld.gov.au/images/slq/pub...,Q.M.S F. Cartwright
...,...,...,...,...,...,...
19889,"Private Lewes Golden, one of the soldiers phot...",http://resources.slq.qld.gov.au/images/slq/pub...,1918-11-30,"15th Battalion , France",http://resources.slq.qld.gov.au/images/slq/pub...,Private Lewes Golden
23607,"Sergt. W.E. Cox, one of the soldiers photograp...",http://resources.slq.qld.gov.au/images/slq/pub...,1919-03-29,49th Battalion,http://resources.slq.qld.gov.au/images/slq/pub...,Sergt. W.E. Cox
23472,"Sapper R.W. Cox, one of the soldiers photograp...",http://resources.slq.qld.gov.au/images/slq/pub...,1919-03-29,2nd Tunnelling Company,http://resources.slq.qld.gov.au/images/slq/pub...,Sapper R.W. Cox
6094,"Driver E.M. Cox, one of the soldiers photograp...",http://resources.slq.qld.gov.au/images/slq/pub...,1919-03-29,Army Service Corps,http://resources.slq.qld.gov.au/images/slq/pub...,Driver E.M. Cox


The urls of the entries are tested for validity and only valid ones are appened to a dictionary.

In [10]:
def url_test(url1, url2):
    if requests.head(url1).status_code == 200 and requests.head(url2).status_code == 200:
        return True
    else:
        return False

In [11]:
sol_photo_dict = {"photos": []}
common_url_part = "http://resources.slq.qld.gov.au/images/slq/pub/"
url_common_len = len(common_url_part)
common_url_suffix = ".jpg"
urlsuffix_common_len = len(common_url_suffix)

for ind, row in tqdm(selected_soldiers.iterrows()):
    if url_test(row["Thumbnail image"], row["High resolution image "]):
        sol_photo_dict["photos"].append({"id": ind,
                                         "name": row["Name"].strip().upper(),
                                         "thumb_image": row["Thumbnail image"][url_common_len:-urlsuffix_common_len],
                                         "highres_image": row["High resolution image "][url_common_len:-urlsuffix_common_len],
                                         "unit": row["Military Details"].strip(),
                                         "date": row["Temporal"].strip()})

160it [03:49,  1.44s/it]


In [12]:
sol_photo_dict

{'photos': [{'id': 482,
   'name': 'A. DEGONMOIS',
   'thumb_image': '2014-07-20/tnl/702692-19150130-s0025/702692-19150130-s0025-0020b',
   'highres_image': '2014-07-20/research/702692-19150130-s0025/702692-19150130-s0025-0020r',
   'unit': '15th Infantry Battalion, 3rd Reinforcements',
   'date': '1915-01-30'},
  {'id': 1246,
   'name': 'A. ZYNGAY',
   'thumb_image': '2015-06-09/tnl/702692-19150911-s0026/702692-19150911-s0026-0055b',
   'highres_image': '2015-06-09/research/702692-19150911-s0026/702692-19150911-s0026-0055r',
   'unit': '9th Infantry Battalion, 10th Reinforcements',
   'date': '1915-11-09'},
  {'id': 1699,
   'name': 'A.E. TAYLOR',
   'thumb_image': '2014-06-18/tnl/702692-19141024-s0022/702692-19141024-s0022-0033b',
   'highres_image': '2014-06-18/research/702692-19141024-s0022/702692-19141024-s0022-0033r',
   'unit': '9th Infantry Battalion ; Alfred Edward',
   'date': '1914-10-24'},
  {'id': 2057,
   'name': 'A.H. YOUNG',
   'thumb_image': '2015-03-19/tnl/702692-1918

The data in the dictionary is stored as JSON file.

In [13]:
js = json.dumps(sol_photo_dict, sort_keys=True, indent=4, separators=(',', ': '))
with open('./../data/sol_photos_unique_military.json', 'w+') as f:
    f.write(js)