# Western Sydney Women's Oral History Project: From farms to freeways: Women's memories of Western Sydney

For more information regarding the project and the dataset, please visit [Western Sydney University - Western Sydney Women's Oral History Project: From farms to freeways](https://omeka.westernsydney.edu.au/farmstofreeways/). 

The original dataset includes 34 audio recordings, alone with their interview transcripts, participant photographs, and project material. You can choose to download the whole dataset, or the 34 interview transcripts only. 

This notebook is used for education and research purposes only. The repository is licensed under MIT.

In [1]:
# WARNING: DO NOT CHANGE THE FOLLOWING CODE
prefix = 'https://research-data.westernsydney.edu.au/default/rdmp/pubrecord/bc45b4d0519311ecb15399911543e199/pubattach/'
pub_id = '31f45ab0519411ecb15399911543e199'

audio_recording_metadata = {'id':'0cdb44dddabe4082af0afd08b51dd296', 'name':'ftf_audio_for_upload.csv'}
transcripts_for_upload = {'id':'dc132d20e28a4aabb32583b13b17f8a7', 'name':'ftf_transcripts_for_upload.csv'}
photographs_metadata = {'id':'bc45b4d0519311ecb15399911543e199', 'name':'ftf_images_for_upload.csv'}
thank_you_notes_and_letters_metadata = {'id':'976fefa533754a2f9e035ed97965ba6d', 'name':'ftf_letters_for_upload.csv'}
project_materials_metadata = {'id':'2464028287574f8c883bfc987c600df3', 'name':'ftf_projectMaterials_for_upload.csv'}
relationships_between_interviewee_items = {'id':'ec80f70ec5974cc6abdf04db7a994a30', 'name':'ftf_item_relationships.rtf'}
export_of_all_items = {'id':'db5949d5a36f4ab085bca6edf69118f5', 'name':'ftf_allItems_output.xml'}
plain_text_format_transcripts = {'id':'6627738d4a73422786bfc350aac0ff1c', 'name':'ftf_transcripts_plaintext.zip'}
pdf_format_transcripts = {'id':'6dff96b8f2444a8aaf15e479c7c74ce6', 'name':'ftf_transcripts_pdfa.zip'}
jpeg_images = {'id':'63ee58806c2347fd862a7c6f3af181b1', 'name':'ftf_photographs.zip'}

In [2]:
import os
import zipfile

import requests


def download(save_path=None, file_name=plain_text_format_transcripts, unzip=True):
    """Download and unzip the dataset.

    Args:
        save_path (str, optional): The root path to save the file. If the save_path is None, 
            the file will be saved in the current directory. Defaults to None.
        file_name (dict or list[dict], optional): The file(s) you want to download. Download only
            plain_text_format_transcripts by default. Defaults to plain_text_format_transcripts.
        unzip (bool, optional): Whether to unzip downloaded files. Defaults to True.
    """
    if type(file_name) == dict:
        file_name = [file_name]

    for file in file_name:
        url = prefix + file['id'] + '?pubId=' + pub_id
        # Send a GET request to the URL
        response = requests.get(url)
        # Specify the local file path where you want to save the ZIP file
        file_path = save_path + '/' + \
            file['name'] if save_path else file['name']
        # Write the content of the response to a file
        if not os.path.exists(save_path):
            os.mkdir(save_path)
        with open(file_path, 'wb') as f:
            f.write(response.content)
        print(f"File downloaded and saved as {file_path}")
        # Unzip files if needed
        if file['name'].split('.')[-1] == 'zip' and unzip:
            with zipfile.ZipFile(file_path, 'r') as f:
                # Extract all the contents into the directory
                f.extractall(file_path.split('.')[0])
            print(f"File is unzipped and saved as {file_path.split('.')[0]}")


# Replace file_name as all if you want to download all data
all = [audio_recording_metadata, transcripts_for_upload, photographs_metadata,
       thank_you_notes_and_letters_metadata, project_materials_metadata,
       relationships_between_interviewee_items, export_of_all_items,
       plain_text_format_transcripts, pdf_format_transcripts, jpeg_images]
# Download all data into the directory named dataset
download(save_path='dataset', file_name=all, unzip=True)


# Or replace file_name as files_to_download and choose which files you want to download
# files_to_download = [plain_text_format_transcripts, pdf_format_transcripts]
# download(save_path='dataset', file_name=files_to_download, unzip=True)


# This will only download and unzip only the interview transcript
# download(save_path='dataset', file_name=plain_text_format_transcripts, unzip=True)

File downloaded and saved as dataset/ftf_audio_for_upload.csv
File downloaded and saved as dataset/ftf_transcripts_for_upload.csv
File downloaded and saved as dataset/ftf_images_for_upload.csv
File downloaded and saved as dataset/ftf_letters_for_upload.csv
File downloaded and saved as dataset/ftf_projectMaterials_for_upload.csv
File downloaded and saved as dataset/ftf_item_relationships.rtf
File downloaded and saved as dataset/ftf_allItems_output.xml
File downloaded and saved as dataset/ftf_transcripts_plaintext.zip
File is unzipped and saved as dataset/ftf_transcripts_plaintext
File downloaded and saved as dataset/ftf_transcripts_pdfa.zip
File is unzipped and saved as dataset/ftf_transcripts_pdfa
File downloaded and saved as dataset/ftf_photographs.zip
File is unzipped and saved as dataset/ftf_photographs
