diff --git a/docs/examples/notebooks/lemi_reader_magdelena.ipynb b/docs/examples/notebooks/lemi_reader_magdelena.ipynb new file mode 100644 index 00000000..8f3da6b6 --- /dev/null +++ b/docs/examples/notebooks/lemi_reader_magdelena.ipynb @@ -0,0 +1,3475 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "68281fe0-4875-47b0-8240-72945692705c", + "metadata": {}, + "source": [ + "## LEMI Example" + ] + }, + { + "cell_type": "markdown", + "id": "3a514086-1b8a-4e12-bc18-9a1de04746a1", + "metadata": {}, + "source": [ + "Recived from Andy Frassetto via email, 10 May, 2022.\n", + "_______________________\n", + "Karl,\n", + "\n", + "Here's one candidate. PASSCAL test set from fall 2020 in the Magdalena\n", + "mountains, so...should be fairly quiet.\n", + "\n", + "Cheers, A\n" + ] + }, + { + "cell_type": "markdown", + "id": "a004872a-e6e9-421a-84c5-a3a688cea857", + "metadata": {}, + "source": [ + "The data recieved were from a single station, and sit in a folder called \n", + "DATA0110. In general, it is recommended to group the LEMI files like this, in one folder per station." + ] + }, + { + "cell_type": "markdown", + "id": "c8c316cf-50ac-45e8-8a8b-97a280280348", + "metadata": {}, + "source": [ + "Within a station folder, there can be many files.\n", + "\n", + "Every file is associated with exactly one run.\n", + "However, some runs are associated with more than one file.\n", + "\n", + "Therefore it is desireable to group the files according to their runs.\n", + "\n", + "We could do this with subfolders, but in this example we use a dataframe\n", + "\n", + "\n", + "We can take advantage of the highly regular LEMI filename structure, \n", + "which is if the form YYYYMMDDhhmm.TXT\n", + "i.e. LEMI files start on the UTC minute.\n", + "\n", + "Thus we can easily sort these, and determine, based on filename _only_ whether the data are contiguous or not\n", + "202009302021.TXT\n", + "YYYYMMDDHHMM.TXT\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "327a5dae-6559-4f75-a6a0-c9b4673e6189", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-08-05 16:56:20,375 [line 135] mth5.setup_logger - INFO: Logging file can be found /home/kkappler/software/irismt/mth5/logs/mth5_debug.log\n" + ] + } + ], + "source": [ + "import os\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "#from string import zfill\n", + "\n", + "from mth5 import read_file\n", + "from mth5 import mth5\n", + "from mth5.io.lemi424 import LEMI424\n", + "\n", + "from mt_metadata import timeseries as metadata\n", + "from mt_metadata.utils.mttime import MTime\n" + ] + }, + { + "cell_type": "markdown", + "id": "2ee715c8-ff8a-47da-a866-0b35df118b83", + "metadata": {}, + "source": [ + "### Define path to the data\n", + "\n", + "The original data dump was in a folder called DATA0110.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "47a6458c-6932-4985-b3c6-3e0b1124c10f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Survey Directory Contents\n", + "DATA0110\n", + "from_lemi424.mth5\n", + "lemi_reader_test.py\n", + "magdelena.h5\n", + "out.png\n", + "process_lemi.py\n", + "sample.json\n", + "stations\n", + "test_read_multiple_lemi.py\n", + "tmp\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "survey_dir = Path(r\"/home/kkappler/software/irismt/aurora/tests/LEMI/\")\n", + "cmd = f\"ls {survey_dir}\"\n", + "print(\"Survey Directory Contents\")\n", + "os.system(cmd)" + ] + }, + { + "cell_type": "markdown", + "id": "ef319ad9-bb42-4c6e-92fb-21e15c5b6d71", + "metadata": {}, + "source": [ + "Let's make a _stations_ folder to better emulate how the data would be stored in a survey directory" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e1a1a96e-9417-4a23-b084-4ea06f63e973", + "metadata": {}, + "outputs": [], + "source": [ + "stations_dir = survey_dir.joinpath(\"stations\")\n", + "stations_dir.mkdir(exist_ok=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d2d86e19-961f-411a-9816-96155c309383", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DATA0110\n", + "from_lemi424.mth5\n", + "lemi_reader_test.py\n", + "magdelena.h5\n", + "out.png\n", + "process_lemi.py\n", + "sample.json\n", + "stations\n", + "test_read_multiple_lemi.py\n", + "tmp\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.system(cmd)" + ] + }, + { + "cell_type": "markdown", + "id": "1722d71a-914d-4d6b-9f46-ef12257da1a8", + "metadata": {}, + "source": [ + "Now in the stations folder, let's create a symlink to DATA0110.\n", + "Give the station a name, like 53" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cef97bc3-fdd5-40ad-9ad1-302cb803e134", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ln -s /home/kkappler/software/irismt/aurora/tests/LEMI/DATA0110 /home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ln: failed to create symbolic link '/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/DATA0110': File exists\n" + ] + }, + { + "data": { + "text/plain": [ + "256" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "original_station_dir = survey_dir.joinpath(\"DATA0110\")\n", + "symlink_path = stations_dir.joinpath(\"station_53\")\n", + "cmd = f\"ln -s {original_station_dir} {symlink_path}\"\n", + "#cmd = f\"ln -s {symlink_path} {original_station_dir}\"\n", + "print(cmd)\n", + "os.system(cmd)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "30556705-5c14-425c-8a9e-ffc8cf1b0355", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "202009302020.INF\n", + "202009302021.TXT\n", + "202009302028.INF\n", + "202009302029.TXT\n", + "202009302053.INF\n", + "202009302054.TXT\n", + "202009302111.INF\n", + "202009302112.TXT\n", + "202009302113.INF\n", + "202009302114.TXT\n", + "202010010000.TXT\n", + "202010020000.TXT\n", + "202010030000.TXT\n", + "202010040000.TXT\n", + "202010050000.TXT\n", + "202010060000.TXT\n", + "202010070000.TXT\n", + "DATA0110\n", + "readme\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.system(f\"ls {symlink_path}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "300e960d-c648-4a02-a281-285ab79f5239", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "FILES:\n", + "\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302021.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302029.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302054.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302112.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302114.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010010000.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010020000.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010030000.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010040000.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010050000.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010060000.TXT\n", + "/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010070000.TXT\n" + ] + } + ], + "source": [ + "p = symlink_path.glob(\"*.TXT\")\n", + "files_list = [x for x in p if x.is_file()]\n", + "files_list.sort() #Important: List is sorted so the files are sequential. We leverage this property\n", + "\n", + "print(\"FILES:\\n\")\n", + "for file in files_list:\n", + " print(file)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "c1c36a95-f3a5-4de7-933b-71c070f62d42", + "metadata": {}, + "source": [ + "## Replace l424_list below with sniffer that takes first and last lines of TXT" + ] + }, + { + "cell_type": "markdown", + "id": "d5130d40-38c7-4503-a97b-88e770e344f2", + "metadata": {}, + "source": [ + "### Make a list of LEMI424 objects, one per file" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8d62ab7d-337a-4fb8-ac12-21e0ac8abd08", + "metadata": {}, + "outputs": [], + "source": [ + "l424_list = [LEMI424(fn=x) for x in files_list]" + ] + }, + { + "cell_type": "markdown", + "id": "7072b81a-1967-4e88-a7e9-5c237f9d7b48", + "metadata": {}, + "source": [ + "### Read in the data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "deeb40f0-ecb5-437f-aa79-944464080839", + "metadata": {}, + "outputs": [], + "source": [ + "for l424 in l424_list:\n", + " l424.read()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "945f5474-ccfb-473b-a409-8c3e911c8bd3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "202009302021\n", + "2020-09-30T20:21:00\n", + "2020-09-30T20:28:15\n" + ] + } + ], + "source": [ + "L0 = l424_list[0]\n", + "print(L0.fn[0].stem)\n", + "print(L0.start)\n", + "print(L0.end)\n" + ] + }, + { + "cell_type": "markdown", + "id": "005c8ab1-79bd-47fb-b42c-2bdeacf53c98", + "metadata": {}, + "source": [ + "#### Now info can be accessed via data frame" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8c9d85f4-d249-4ae5-8d3c-46b8d349e88a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmonthdayhourminutesecondbxbybze1e2e3e4latitudelongitude
020209302021023813.621729.81641802.042131.013-111.026164.1669.7153404.8391110712.84475
120209302021123813.586729.84241802.030130.917-111.204164.0619.5403404.8391110712.84473
220209302021223813.553729.87541802.058130.918-111.227164.0719.5213404.8391010712.84470
320209302021323813.477729.87841802.042130.918-111.396164.0609.3573404.8391010712.84468
420209302021423813.449729.90841802.034131.018-111.326164.1709.4283404.8390910712.84467
\n", + "
" + ], + "text/plain": [ + " year month day hour minute second bx by bz \\\n", + "0 2020 9 30 20 21 0 23813.621 729.816 41802.042 \n", + "1 2020 9 30 20 21 1 23813.586 729.842 41802.030 \n", + "2 2020 9 30 20 21 2 23813.553 729.875 41802.058 \n", + "3 2020 9 30 20 21 3 23813.477 729.878 41802.042 \n", + "4 2020 9 30 20 21 4 23813.449 729.908 41802.034 \n", + "\n", + " e1 e2 e3 e4 latitude longitude \n", + "0 131.013 -111.026 164.166 9.715 3404.83911 10712.84475 \n", + "1 130.917 -111.204 164.061 9.540 3404.83911 10712.84473 \n", + "2 130.918 -111.227 164.071 9.521 3404.83910 10712.84470 \n", + "3 130.918 -111.396 164.060 9.357 3404.83910 10712.84468 \n", + "4 131.018 -111.326 164.170 9.428 3404.83909 10712.84467 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns = [\"year\", \"month\", \"day\", \"hour\", \"minute\", \"second\", \"bx\", \"by\", \"bz\", 'e1', 'e2', 'e3', 'e4',\"latitude\", \"longitude\"]\n", + "l424_list[0]._df[columns][0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2775f0cf-b3ae-42e5-8cfa-b38952c6125b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['year', 'month', 'day', 'hour', 'minute', 'second', 'bx', 'by', 'bz',\n", + " 'temperature_e', 'temperature_h', 'e1', 'e2', 'e3', 'e4', 'battery',\n", + " 'elevation', 'latitude', 'lat_hemisphere', 'longitude',\n", + " 'lon_hemisphere', 'n_satellites', 'gps_fix', 'tdiff'],\n", + " dtype='object')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l424_list[0]._df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "4bbb966a-d056-4d11-869d-efdf76f3e1cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmonthdayhourminutesecondbxbybztemperature_e...e4batteryelevationlatitudelat_hemispherelongitudelon_hemispheren_satellitesgps_fixtdiff
020209302021023813.621729.81641802.04239.76...9.71513.012204.53404.83911N10712.84475W1220
120209302021123813.586729.84241802.03039.76...9.54013.012204.53404.83911N10712.84473W1220
220209302021223813.553729.87541802.05839.75...9.52113.012204.63404.83910N10712.84470W1220
320209302021323813.477729.87841802.04239.81...9.35713.012204.73404.83910N10712.84468W1220
420209302021423813.449729.90841802.03439.77...9.42813.012204.73404.83909N10712.84467W1220
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " year month day hour minute second bx by bz \\\n", + "0 2020 9 30 20 21 0 23813.621 729.816 41802.042 \n", + "1 2020 9 30 20 21 1 23813.586 729.842 41802.030 \n", + "2 2020 9 30 20 21 2 23813.553 729.875 41802.058 \n", + "3 2020 9 30 20 21 3 23813.477 729.878 41802.042 \n", + "4 2020 9 30 20 21 4 23813.449 729.908 41802.034 \n", + "\n", + " temperature_e ... e4 battery elevation latitude lat_hemisphere \\\n", + "0 39.76 ... 9.715 13.01 2204.5 3404.83911 N \n", + "1 39.76 ... 9.540 13.01 2204.5 3404.83911 N \n", + "2 39.75 ... 9.521 13.01 2204.6 3404.83910 N \n", + "3 39.81 ... 9.357 13.01 2204.7 3404.83910 N \n", + "4 39.77 ... 9.428 13.01 2204.7 3404.83909 N \n", + "\n", + " longitude lon_hemisphere n_satellites gps_fix tdiff \n", + "0 10712.84475 W 12 2 0 \n", + "1 10712.84473 W 12 2 0 \n", + "2 10712.84470 W 12 2 0 \n", + "3 10712.84468 W 12 2 0 \n", + "4 10712.84467 W 12 2 0 \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l424_list[0]._df[0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4aa1d729-9c13-4108-8d86-242c4bc8bb8f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmonthdayhourminutesecondbxbybztemperature_e...e4batteryelevationlatitudelat_hemispherelongitudelon_hemispheren_satellitesgps_fixtdiff
431202093020281123784.690215.65341830.50342.52...11.65013.012204.83404.83945N10712.84481W1220
432202093020281223784.723215.63741830.47242.54...12.19313.012204.83404.83946N10712.84481W1220
433202093020281323784.766215.61941830.44142.54...12.81613.012204.83404.83948N10712.84479W1220
434202093020281423784.856215.60141830.44142.58...13.34513.002204.83404.83949N10712.84479W1220
435202093020281523784.928215.57341830.44542.57...13.81713.002204.73404.83950N10712.84478W1220
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " year month day hour minute second bx by bz \\\n", + "431 2020 9 30 20 28 11 23784.690 215.653 41830.503 \n", + "432 2020 9 30 20 28 12 23784.723 215.637 41830.472 \n", + "433 2020 9 30 20 28 13 23784.766 215.619 41830.441 \n", + "434 2020 9 30 20 28 14 23784.856 215.601 41830.441 \n", + "435 2020 9 30 20 28 15 23784.928 215.573 41830.445 \n", + "\n", + " temperature_e ... e4 battery elevation latitude \\\n", + "431 42.52 ... 11.650 13.01 2204.8 3404.83945 \n", + "432 42.54 ... 12.193 13.01 2204.8 3404.83946 \n", + "433 42.54 ... 12.816 13.01 2204.8 3404.83948 \n", + "434 42.58 ... 13.345 13.00 2204.8 3404.83949 \n", + "435 42.57 ... 13.817 13.00 2204.7 3404.83950 \n", + "\n", + " lat_hemisphere longitude lon_hemisphere n_satellites gps_fix tdiff \n", + "431 N 10712.84481 W 12 2 0 \n", + "432 N 10712.84481 W 12 2 0 \n", + "433 N 10712.84479 W 12 2 0 \n", + "434 N 10712.84479 W 12 2 0 \n", + "435 N 10712.84478 W 12 2 0 \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l424_list[0]._df[-5:]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3ee6c95f-cd75-45f7-88e7-25e6a4420261", + "metadata": {}, + "outputs": [], + "source": [ + "COLUMNS = [\"file_path\",\n", + " \"first_sample_time\", \n", + " \"last_sample_time\", \n", + " \"num_lines\", \n", + " \"run_id\", \n", + " \"sample_rate\",\n", + " \"new_run\",\n", + " \"file_base\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cad8c364-b4ef-48b1-96e1-746138260416", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_files = len(files_list)\n", + "n_files\n", + "#start, end are first and last sampe time respectivel" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c8db9eff-df74-4eef-879b-f44b46c0c440", + "metadata": {}, + "outputs": [], + "source": [ + "data_dict = {}\n", + "for col in COLUMNS:\n", + " data_dict[col] = n_files * [None]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2eaefcde-8bcb-43df-af3b-7e7661c18e7a", + "metadata": {}, + "outputs": [], + "source": [ + "for i_file in range(n_files):\n", + " data_dict[\"file_path\"][i_file] = files_list[i_file]\n", + " data_dict[\"first_sample_time\"][i_file] = pd.Timestamp(l424_list[i_file].start)\n", + " data_dict[\"last_sample_time\"][i_file] = pd.Timestamp(l424_list[i_file].end)\n", + " data_dict[\"num_lines\"][i_file] = len(l424_list[i_file]._df)\n", + " data_dict[\"run_id\"][i_file] = \"\"\n", + " data_dict[\"sample_rate\"][i_file] = l424_list[i_file].sample_rate\n", + " data_dict[\"new_run\"][i_file] = True\n", + " data_dict[\"file_base\"][i_file] = files_list[i_file].name\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "92bed2d6-2e41-4527-be73-281a3cf68c29", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_sample_timelast_sample_timenum_linesrun_idsample_ratenew_runfile_base
02020-09-30 20:21:002020-09-30 20:28:154361.0True202009302021.TXT
12020-09-30 20:29:002020-09-30 20:42:167971.0True202009302029.TXT
22020-09-30 20:54:002020-09-30 21:11:0110221.0True202009302054.TXT
32020-09-30 21:12:002020-09-30 21:13:451061.0True202009302112.TXT
42020-09-30 21:14:002020-09-30 23:59:5999601.0True202009302114.TXT
52020-10-01 00:00:002020-10-01 23:59:59864001.0True202010010000.TXT
62020-10-02 00:00:002020-10-02 23:59:59864001.0True202010020000.TXT
72020-10-03 00:00:002020-10-03 23:59:59864001.0True202010030000.TXT
82020-10-04 00:00:002020-10-04 23:59:59864001.0True202010040000.TXT
92020-10-05 00:00:002020-10-05 23:59:59864001.0True202010050000.TXT
102020-10-06 00:00:002020-10-06 23:59:59864001.0True202010060000.TXT
112020-10-07 00:00:002020-10-07 14:19:46515871.0True202010070000.TXT
\n", + "
" + ], + "text/plain": [ + " first_sample_time last_sample_time num_lines run_id sample_rate \\\n", + "0 2020-09-30 20:21:00 2020-09-30 20:28:15 436 1.0 \n", + "1 2020-09-30 20:29:00 2020-09-30 20:42:16 797 1.0 \n", + "2 2020-09-30 20:54:00 2020-09-30 21:11:01 1022 1.0 \n", + "3 2020-09-30 21:12:00 2020-09-30 21:13:45 106 1.0 \n", + "4 2020-09-30 21:14:00 2020-09-30 23:59:59 9960 1.0 \n", + "5 2020-10-01 00:00:00 2020-10-01 23:59:59 86400 1.0 \n", + "6 2020-10-02 00:00:00 2020-10-02 23:59:59 86400 1.0 \n", + "7 2020-10-03 00:00:00 2020-10-03 23:59:59 86400 1.0 \n", + "8 2020-10-04 00:00:00 2020-10-04 23:59:59 86400 1.0 \n", + "9 2020-10-05 00:00:00 2020-10-05 23:59:59 86400 1.0 \n", + "10 2020-10-06 00:00:00 2020-10-06 23:59:59 86400 1.0 \n", + "11 2020-10-07 00:00:00 2020-10-07 14:19:46 51587 1.0 \n", + "\n", + " new_run file_base \n", + "0 True 202009302021.TXT \n", + "1 True 202009302029.TXT \n", + "2 True 202009302054.TXT \n", + "3 True 202009302112.TXT \n", + "4 True 202009302114.TXT \n", + "5 True 202010010000.TXT \n", + "6 True 202010020000.TXT \n", + "7 True 202010030000.TXT \n", + "8 True 202010040000.TXT \n", + "9 True 202010050000.TXT \n", + "10 True 202010060000.TXT \n", + "11 True 202010070000.TXT " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "station_data_df = pd.DataFrame(data=data_dict)\n", + "station_data_df[COLUMNS[1:]]" + ] + }, + { + "cell_type": "markdown", + "id": "000d81a2-6942-4842-9903-b688052bc873", + "metadata": {}, + "source": [ + "Here is some logic we will wrap as a function to figure out which files are part of a contiguous run" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "447a8eb9-608f-46cf-9d88-c2f421b7e614", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SAME RUN\n", + "SAME RUN\n", + "SAME RUN\n", + "SAME RUN\n", + "SAME RUN\n", + "SAME RUN\n", + "SAME RUN\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_sample_timelast_sample_timenum_linesrun_idsample_ratenew_runfile_base
02020-09-30 20:21:002020-09-30 20:28:154360011.0True202009302021.TXT
12020-09-30 20:29:002020-09-30 20:42:167970021.0True202009302029.TXT
22020-09-30 20:54:002020-09-30 21:11:0110220031.0True202009302054.TXT
32020-09-30 21:12:002020-09-30 21:13:451060041.0True202009302112.TXT
42020-09-30 21:14:002020-09-30 23:59:5999600051.0True202009302114.TXT
52020-10-01 00:00:002020-10-01 23:59:59864000051.0False202010010000.TXT
62020-10-02 00:00:002020-10-02 23:59:59864000051.0False202010020000.TXT
72020-10-03 00:00:002020-10-03 23:59:59864000051.0False202010030000.TXT
82020-10-04 00:00:002020-10-04 23:59:59864000051.0False202010040000.TXT
92020-10-05 00:00:002020-10-05 23:59:59864000051.0False202010050000.TXT
102020-10-06 00:00:002020-10-06 23:59:59864000051.0False202010060000.TXT
112020-10-07 00:00:002020-10-07 14:19:46515870051.0False202010070000.TXT
\n", + "
" + ], + "text/plain": [ + " first_sample_time last_sample_time num_lines run_id sample_rate \\\n", + "0 2020-09-30 20:21:00 2020-09-30 20:28:15 436 001 1.0 \n", + "1 2020-09-30 20:29:00 2020-09-30 20:42:16 797 002 1.0 \n", + "2 2020-09-30 20:54:00 2020-09-30 21:11:01 1022 003 1.0 \n", + "3 2020-09-30 21:12:00 2020-09-30 21:13:45 106 004 1.0 \n", + "4 2020-09-30 21:14:00 2020-09-30 23:59:59 9960 005 1.0 \n", + "5 2020-10-01 00:00:00 2020-10-01 23:59:59 86400 005 1.0 \n", + "6 2020-10-02 00:00:00 2020-10-02 23:59:59 86400 005 1.0 \n", + "7 2020-10-03 00:00:00 2020-10-03 23:59:59 86400 005 1.0 \n", + "8 2020-10-04 00:00:00 2020-10-04 23:59:59 86400 005 1.0 \n", + "9 2020-10-05 00:00:00 2020-10-05 23:59:59 86400 005 1.0 \n", + "10 2020-10-06 00:00:00 2020-10-06 23:59:59 86400 005 1.0 \n", + "11 2020-10-07 00:00:00 2020-10-07 14:19:46 51587 005 1.0 \n", + "\n", + " new_run file_base \n", + "0 True 202009302021.TXT \n", + "1 True 202009302029.TXT \n", + "2 True 202009302054.TXT \n", + "3 True 202009302112.TXT \n", + "4 True 202009302114.TXT \n", + "5 False 202010010000.TXT \n", + "6 False 202010020000.TXT \n", + "7 False 202010030000.TXT \n", + "8 False 202010040000.TXT \n", + "9 False 202010050000.TXT \n", + "10 False 202010060000.TXT \n", + "11 False 202010070000.TXT " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Builds the boolean valued column \"new run\".\n", + "True when there is a gap between the data of the row under consideration and False if contiguous\n", + "\"\"\"\n", + "new_run = n_files * [True]\n", + "for i_row, row in station_data_df.iterrows():\n", + " if i_row == 0:\n", + " pass\n", + "# row.run_id = run_id_str\n", + " else:\n", + " #Check of sample rate changed\n", + " previous = station_data_df.loc[i_row-1]\n", + " if previous.sample_rate != row.sample_rate:\n", + " print(\"CHANGED SAMPLE RATE\")\n", + " new_run[i_row] = True\n", + " #row.new_run = True\n", + " continue\n", + " \n", + " #check for continuity with previous\n", + " dt = pd.Timedelta(seconds=1./previous.sample_rate)\n", + " previous_next_sample = previous.last_sample_time + dt\n", + "\n", + " \n", + " if row.first_sample_time == previous_next_sample:\n", + " print(\"SAME RUN\")\n", + " new_run[i_row] = False\n", + " #station_data_df.iloc[i_row].replace(to_replace=True, value = False)\n", + " #station_data_df.at[i_row].new_run = False\n", + " continue\n", + " \n", + "station_data_df[\"new_run\"] = new_run \n", + "station_data_df[COLUMNS[1:]]\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "4e1897f3-c164-45f6-9545-12c48939255a", + "metadata": {}, + "source": [ + "Now all the rows that have True in column \"new_run\" are separate runs.\n", + "\n", + "All the columns that are false should recursively look upward until they find a True value to see the file with the start of the run they belong to.\n", + "\n", + "Next step is to issue some run labels. Here we will use a scheme like:\n", + "001\n", + "002\n", + "...\n", + "\n", + "\n", + "So call the first run 001" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "2783b71a-06ad-4855-b92f-d9db81940a7b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'001'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "run_id_int = 1\n", + "run_id_str = str(run_id_int).zfill(3)\n", + "run_id_str" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "d5cf56ca-1b46-421f-99cb-adf377b661f5", + "metadata": {}, + "outputs": [], + "source": [ + "run_ids = n_files * [\"\"]\n", + "run_ids[0] = run_id_str" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "f5e4b88f-1055-47ae-9b7d-ff4d97ffdbbf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['001', '002', '003', '004', '005', '005', '005', '005', '005', '005', '005', '005']\n" + ] + } + ], + "source": [ + "for i_row, row in station_data_df.iterrows():\n", + " if i_row==0:\n", + " run_ids[i_row] = run_id_str\n", + " continue\n", + " if row.new_run:\n", + " run_id_int += 1\n", + " run_id_str = str(run_id_int).zfill(3)\n", + " run_ids[i_row] = run_id_str\n", + "print(run_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "b37ac4bd-87e3-44a8-aadc-c32c21fee65d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_sample_timelast_sample_timenum_linesrun_idsample_ratenew_runfile_base
02020-09-30 20:21:002020-09-30 20:28:154360011.0True202009302021.TXT
12020-09-30 20:29:002020-09-30 20:42:167970021.0True202009302029.TXT
22020-09-30 20:54:002020-09-30 21:11:0110220031.0True202009302054.TXT
32020-09-30 21:12:002020-09-30 21:13:451060041.0True202009302112.TXT
42020-09-30 21:14:002020-09-30 23:59:5999600051.0True202009302114.TXT
52020-10-01 00:00:002020-10-01 23:59:59864000051.0False202010010000.TXT
62020-10-02 00:00:002020-10-02 23:59:59864000051.0False202010020000.TXT
72020-10-03 00:00:002020-10-03 23:59:59864000051.0False202010030000.TXT
82020-10-04 00:00:002020-10-04 23:59:59864000051.0False202010040000.TXT
92020-10-05 00:00:002020-10-05 23:59:59864000051.0False202010050000.TXT
102020-10-06 00:00:002020-10-06 23:59:59864000051.0False202010060000.TXT
112020-10-07 00:00:002020-10-07 14:19:46515870051.0False202010070000.TXT
\n", + "
" + ], + "text/plain": [ + " first_sample_time last_sample_time num_lines run_id sample_rate \\\n", + "0 2020-09-30 20:21:00 2020-09-30 20:28:15 436 001 1.0 \n", + "1 2020-09-30 20:29:00 2020-09-30 20:42:16 797 002 1.0 \n", + "2 2020-09-30 20:54:00 2020-09-30 21:11:01 1022 003 1.0 \n", + "3 2020-09-30 21:12:00 2020-09-30 21:13:45 106 004 1.0 \n", + "4 2020-09-30 21:14:00 2020-09-30 23:59:59 9960 005 1.0 \n", + "5 2020-10-01 00:00:00 2020-10-01 23:59:59 86400 005 1.0 \n", + "6 2020-10-02 00:00:00 2020-10-02 23:59:59 86400 005 1.0 \n", + "7 2020-10-03 00:00:00 2020-10-03 23:59:59 86400 005 1.0 \n", + "8 2020-10-04 00:00:00 2020-10-04 23:59:59 86400 005 1.0 \n", + "9 2020-10-05 00:00:00 2020-10-05 23:59:59 86400 005 1.0 \n", + "10 2020-10-06 00:00:00 2020-10-06 23:59:59 86400 005 1.0 \n", + "11 2020-10-07 00:00:00 2020-10-07 14:19:46 51587 005 1.0 \n", + "\n", + " new_run file_base \n", + "0 True 202009302021.TXT \n", + "1 True 202009302029.TXT \n", + "2 True 202009302054.TXT \n", + "3 True 202009302112.TXT \n", + "4 True 202009302114.TXT \n", + "5 False 202010010000.TXT \n", + "6 False 202010020000.TXT \n", + "7 False 202010030000.TXT \n", + "8 False 202010040000.TXT \n", + "9 False 202010050000.TXT \n", + "10 False 202010060000.TXT \n", + "11 False 202010070000.TXT " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "station_data_df[\"run_id\"] = run_ids\n", + "station_data_df[COLUMNS[1:]]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7d1e5e3a-fa85-407c-8c5f-e70bb0ed733c", + "metadata": {}, + "outputs": [], + "source": [ + "grouper = station_data_df.groupby(\"run_id\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "53460041-dcfc-4da4-9d4e-0c8308d9fcf4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n", + "001\n", + "002\n", + "003\n", + "004\n", + "005\n" + ] + } + ], + "source": [ + "print(len(grouper))\n", + "fns = {}#len(grouper) * [None]\n", + "for run, grouped_df in grouper:\n", + " print(run)\n", + " #print(grouped_df[\"run_id\"])\n", + " fns[run] = grouped_df[\"file_path\"].to_list()\n", + " #print(grouped_df[\"file_path\"].to_list())" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "42adef43-c662-4495-9671-16e9304e9590", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'001': [PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302021.TXT')],\n", + " '002': [PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302029.TXT')],\n", + " '003': [PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302054.TXT')],\n", + " '004': [PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302112.TXT')],\n", + " '005': [PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202009302114.TXT'),\n", + " PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010010000.TXT'),\n", + " PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010020000.TXT'),\n", + " PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010030000.TXT'),\n", + " PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010040000.TXT'),\n", + " PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010050000.TXT'),\n", + " PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010060000.TXT'),\n", + " PosixPath('/home/kkappler/software/irismt/aurora/tests/LEMI/stations/station_53/202010070000.TXT')]}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fns" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "c9d341b9-e3bc-4498-9175-300d40010046", + "metadata": {}, + "outputs": [], + "source": [ + "lemis = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "4a7f9d48-6548-4f3c-81b1-f0cb97724965", + "metadata": {}, + "outputs": [], + "source": [ + "for run_id in fns.keys():\n", + " tmp = LEMI424(fn=fns[run_id]) \n", + " tmp = tmp.to_run_ts()\n", + " tmp.run_metadata.id = run_id\n", + " lemis[run_id] = tmp" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "aa6e0913-2fce-431b-bc85-3087aadfab37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RunTS Summary:\n", + "\tStation: None\n", + "\tRun: 001\n", + "\tStart: 2020-09-30T20:21:00+00:00\n", + "\tEnd: 2020-09-30T20:28:15+00:00\n", + "\tSample Rate: 1.0\n", + "\tComponents: ['bx', 'by', 'bz', 'e1', 'e2', 'temperature_e', 'temperature_h']" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lemis[\"001\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "f1b22561-7c05-436a-abad-a7b3c6cf39f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{\n", + " \"run\": {\n", + " \"channels_recorded_auxiliary\": [\n", + " \"temperature_e\",\n", + " \"temperature_h\"\n", + " ],\n", + " \"channels_recorded_electric\": [\n", + " \"e1\",\n", + " \"e2\"\n", + " ],\n", + " \"channels_recorded_magnetic\": [\n", + " \"bx\",\n", + " \"by\",\n", + " \"bz\"\n", + " ],\n", + " \"data_logger.firmware.author\": null,\n", + " \"data_logger.firmware.name\": null,\n", + " \"data_logger.firmware.version\": null,\n", + " \"data_logger.id\": null,\n", + " \"data_logger.manufacturer\": null,\n", + " \"data_logger.timing_system.drift\": 0.0,\n", + " \"data_logger.timing_system.type\": \"GPS\",\n", + " \"data_logger.timing_system.uncertainty\": 0.0,\n", + " \"data_logger.type\": null,\n", + " \"data_type\": \"BBMT\",\n", + " \"id\": \"001\",\n", + " \"sample_rate\": 1.0,\n", + " \"time_period.end\": \"2020-09-30T20:28:15+00:00\",\n", + " \"time_period.start\": \"2020-09-30T20:21:00+00:00\"\n", + " }\n", + "}" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lemis[\"001\"].run_metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "31afe739-e93e-4718-b0ab-881ab2de0e59", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mth5.timeseries.run_ts.RunTS" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(lemis[\"001\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "4a6689ff-e1aa-437d-9a33-2bbd800bd0ef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:        (time: 436)\n",
+       "Coordinates:\n",
+       "  * time           (time) datetime64[ns] 2020-09-30T20:21:00 ... 2020-09-30T2...\n",
+       "Data variables:\n",
+       "    bx             (time) float64 2.381e+04 2.381e+04 ... 2.378e+04 2.378e+04\n",
+       "    by             (time) float64 729.8 729.8 729.9 729.9 ... 215.6 215.6 215.6\n",
+       "    bz             (time) float64 4.18e+04 4.18e+04 ... 4.183e+04 4.183e+04\n",
+       "    e1             (time) float64 131.0 130.9 130.9 130.9 ... 133.8 134.2 134.6\n",
+       "    e2             (time) float64 -111.0 -111.2 -111.2 ... -109.3 -108.7 -108.3\n",
+       "    temperature_e  (time) float64 39.76 39.76 39.75 39.81 ... 42.54 42.58 42.57\n",
+       "    temperature_h  (time) float64 40.48 40.48 40.49 40.49 ... 41.31 41.32 41.32\n",
+       "Attributes:\n",
+       "    channels_recorded_auxiliary:            ['temperature_e', 'temperature_h']\n",
+       "    channels_recorded_electric:             ['e1', 'e2']\n",
+       "    channels_recorded_magnetic:             ['bx', 'by', 'bz']\n",
+       "    data_logger.firmware.author:            None\n",
+       "    data_logger.firmware.name:              None\n",
+       "    data_logger.firmware.version:           None\n",
+       "    data_logger.id:                         None\n",
+       "    data_logger.manufacturer:               None\n",
+       "    data_logger.timing_system.drift:        0.0\n",
+       "    data_logger.timing_system.type:         GPS\n",
+       "    data_logger.timing_system.uncertainty:  0.0\n",
+       "    data_logger.type:                       None\n",
+       "    data_type:                              BBMT\n",
+       "    id:                                     None\n",
+       "    sample_rate:                            1.0\n",
+       "    time_period.end:                        2020-09-30T20:28:15+00:00\n",
+       "    time_period.start:                      2020-09-30T20:21:00+00:00
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 436)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2020-09-30T20:21:00 ... 2020-09-30T2...\n", + "Data variables:\n", + " bx (time) float64 2.381e+04 2.381e+04 ... 2.378e+04 2.378e+04\n", + " by (time) float64 729.8 729.8 729.9 729.9 ... 215.6 215.6 215.6\n", + " bz (time) float64 4.18e+04 4.18e+04 ... 4.183e+04 4.183e+04\n", + " e1 (time) float64 131.0 130.9 130.9 130.9 ... 133.8 134.2 134.6\n", + " e2 (time) float64 -111.0 -111.2 -111.2 ... -109.3 -108.7 -108.3\n", + " temperature_e (time) float64 39.76 39.76 39.75 39.81 ... 42.54 42.58 42.57\n", + " temperature_h (time) float64 40.48 40.48 40.49 40.49 ... 41.31 41.32 41.32\n", + "Attributes:\n", + " channels_recorded_auxiliary: ['temperature_e', 'temperature_h']\n", + " channels_recorded_electric: ['e1', 'e2']\n", + " channels_recorded_magnetic: ['bx', 'by', 'bz']\n", + " data_logger.firmware.author: None\n", + " data_logger.firmware.name: None\n", + " data_logger.firmware.version: None\n", + " data_logger.id: None\n", + " data_logger.manufacturer: None\n", + " data_logger.timing_system.drift: 0.0\n", + " data_logger.timing_system.type: GPS\n", + " data_logger.timing_system.uncertainty: 0.0\n", + " data_logger.type: None\n", + " data_type: BBMT\n", + " id: None\n", + " sample_rate: 1.0\n", + " time_period.end: 2020-09-30T20:28:15+00:00\n", + " time_period.start: 2020-09-30T20:21:00+00:00" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lemis[\"001\"].dataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "978b6a3f-e317-44b2-86df-1713674e1efe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:        (time: 579947)\n",
+       "Coordinates:\n",
+       "  * time           (time) datetime64[ns] 2020-09-30T21:14:00 ... 2020-10-07T1...\n",
+       "Data variables:\n",
+       "    bx             (time) float64 2.378e+04 2.378e+04 ... 2.378e+04 2.378e+04\n",
+       "    by             (time) float64 213.9 213.9 213.9 213.9 ... 247.2 247.3 247.3\n",
+       "    bz             (time) float64 4.183e+04 4.183e+04 ... 4.184e+04 4.184e+04\n",
+       "    e1             (time) float64 134.8 134.9 135.0 135.1 ... 130.4 130.3 130.2\n",
+       "    e2             (time) float64 -109.8 -109.9 -109.9 ... -40.88 -40.95 -40.93\n",
+       "    temperature_e  (time) float64 46.5 46.46 46.47 46.47 ... 18.18 18.17 18.17\n",
+       "    temperature_h  (time) float64 36.3 36.3 36.3 36.3 ... 19.61 19.6 19.6 19.6\n",
+       "Attributes:\n",
+       "    channels_recorded_auxiliary:            ['temperature_e', 'temperature_h']\n",
+       "    channels_recorded_electric:             ['e1', 'e2']\n",
+       "    channels_recorded_magnetic:             ['bx', 'by', 'bz']\n",
+       "    data_logger.firmware.author:            None\n",
+       "    data_logger.firmware.name:              None\n",
+       "    data_logger.firmware.version:           None\n",
+       "    data_logger.id:                         None\n",
+       "    data_logger.manufacturer:               None\n",
+       "    data_logger.timing_system.drift:        0.0\n",
+       "    data_logger.timing_system.type:         GPS\n",
+       "    data_logger.timing_system.uncertainty:  0.0\n",
+       "    data_logger.type:                       None\n",
+       "    data_type:                              BBMT\n",
+       "    id:                                     None\n",
+       "    sample_rate:                            1.0\n",
+       "    time_period.end:                        2020-10-07T14:19:46+00:00\n",
+       "    time_period.start:                      2020-09-30T21:14:00+00:00
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 579947)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2020-09-30T21:14:00 ... 2020-10-07T1...\n", + "Data variables:\n", + " bx (time) float64 2.378e+04 2.378e+04 ... 2.378e+04 2.378e+04\n", + " by (time) float64 213.9 213.9 213.9 213.9 ... 247.2 247.3 247.3\n", + " bz (time) float64 4.183e+04 4.183e+04 ... 4.184e+04 4.184e+04\n", + " e1 (time) float64 134.8 134.9 135.0 135.1 ... 130.4 130.3 130.2\n", + " e2 (time) float64 -109.8 -109.9 -109.9 ... -40.88 -40.95 -40.93\n", + " temperature_e (time) float64 46.5 46.46 46.47 46.47 ... 18.18 18.17 18.17\n", + " temperature_h (time) float64 36.3 36.3 36.3 36.3 ... 19.61 19.6 19.6 19.6\n", + "Attributes:\n", + " channels_recorded_auxiliary: ['temperature_e', 'temperature_h']\n", + " channels_recorded_electric: ['e1', 'e2']\n", + " channels_recorded_magnetic: ['bx', 'by', 'bz']\n", + " data_logger.firmware.author: None\n", + " data_logger.firmware.name: None\n", + " data_logger.firmware.version: None\n", + " data_logger.id: None\n", + " data_logger.manufacturer: None\n", + " data_logger.timing_system.drift: 0.0\n", + " data_logger.timing_system.type: GPS\n", + " data_logger.timing_system.uncertainty: 0.0\n", + " data_logger.type: None\n", + " data_type: BBMT\n", + " id: None\n", + " sample_rate: 1.0\n", + " time_period.end: 2020-10-07T14:19:46+00:00\n", + " time_period.start: 2020-09-30T21:14:00+00:00" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lemis[\"005\"].dataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c93ebc85-6723-4e19-94b8-85f7dcd709c3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{\n", + " \"run\": {\n", + " \"channels_recorded_auxiliary\": [\n", + " \"temperature_e\",\n", + " \"temperature_h\"\n", + " ],\n", + " \"channels_recorded_electric\": [\n", + " \"e1\",\n", + " \"e2\"\n", + " ],\n", + " \"channels_recorded_magnetic\": [\n", + " \"bx\",\n", + " \"by\",\n", + " \"bz\"\n", + " ],\n", + " \"data_logger.firmware.author\": null,\n", + " \"data_logger.firmware.name\": null,\n", + " \"data_logger.firmware.version\": null,\n", + " \"data_logger.id\": null,\n", + " \"data_logger.manufacturer\": null,\n", + " \"data_logger.timing_system.drift\": 0.0,\n", + " \"data_logger.timing_system.type\": \"GPS\",\n", + " \"data_logger.timing_system.uncertainty\": 0.0,\n", + " \"data_logger.type\": null,\n", + " \"data_type\": \"BBMT\",\n", + " \"id\": \"005\",\n", + " \"sample_rate\": 1.0,\n", + " \"time_period.end\": \"2020-10-07T14:19:46+00:00\",\n", + " \"time_period.start\": \"2020-09-30T21:14:00+00:00\"\n", + " }\n", + "}" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lemis[\"005\"].run_metadata" + ] + }, + { + "cell_type": "markdown", + "id": "04f98f09-d535-4d2e-9b37-8b4d22ad14e0", + "metadata": {}, + "source": [ + "\n", + "### We have run time series, now let's pack it into an mth5" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "35dec618-dec2-4a72-a351-3c2b969511e5", + "metadata": {}, + "outputs": [], + "source": [ + "h5_fn = \"magdelena.h5\"\n", + "station_id = \"0110\"" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "840b5aa0-db95-4ebd-9c85-31113236ef5f", + "metadata": {}, + "outputs": [], + "source": [ + "# write some simple metadata for the survey\n", + "survey = metadata.Survey()\n", + "survey.acquired_by.author = \"MT Meister\"\n", + "survey.archive_id = \"LEMI_TEST_01\"\n", + "survey.archive_network = \"MT\"\n", + "survey.name = \"magdelena\"" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "2b4b714d-32f9-4e7b-b0c5-14079c99d31d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-08-05 17:01:07,351 [line 591] mth5.mth5.MTH5.open_mth5 - WARNING: magdelena.h5 will be overwritten in 'w' mode\n", + "2022-08-05 17:01:08,014 [line 656] mth5.mth5.MTH5._initialize_file - INFO: Initialized MTH5 0.1.0 file magdelena.h5 in mode w\n" + ] + } + ], + "source": [ + "m = mth5.MTH5(file_version=\"0.1.0\")\n", + "m.open_mth5(h5_fn, \"w\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "54c72d9e-e17e-45ff-9409-485788d8680a", + "metadata": {}, + "outputs": [], + "source": [ + "# add survey metadata\n", + "survey_group = m.survey_group\n", + "survey_group.metadata.update(survey)\n", + "survey_group.write_metadata()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "e8b96ee4-5139-448b-b670-9869499dee1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Station was just initialized\n" + ] + } + ], + "source": [ + "# initialize a station\n", + "station_group = m.add_station(station_id)\n", + "print(\"Station was just initialized\")\n", + "station_group.validate_station_metadata()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "aaae5911-5d73-42cd-b489-7eee29beb6fd", + "metadata": {}, + "outputs": [], + "source": [ + "for run, run_ts in lemis.items():\n", + " run_group = station_group.add_run(run_ts.run_metadata.id, run_metadata=run_ts.run_metadata)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "b311fe48-59b2-4ff7-91e1-74cb4b0baeea", + "metadata": {}, + "outputs": [], + "source": [ + "station_group.validate_station_metadata()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "ddc2c102-c4fc-4bb5-8c9d-691625529b58", + "metadata": {}, + "outputs": [], + "source": [ + "survey_group.update_survey_metadata()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "5bd20e6a-3b17-4002-a6a5-5bf37e1473aa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-08-05 17:01:13,615 [line 731] mth5.mth5.MTH5.close_mth5 - INFO: Flushing and closing magdelena.h5\n" + ] + } + ], + "source": [ + "m.close_mth5()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "11f733da-43a4-48ed-a034-d065b875c56c", + "metadata": {}, + "outputs": [], + "source": [ + "mm = mth5.MTH5(file_version=\"0.1.0\")\n", + "mm.open_mth5(h5_fn, \"a\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "0818f400-7f65-4111-a1c9-593b68435a20", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "/:\n", + "====================\n", + " |- Group: Survey\n", + " ----------------\n", + " |- Group: Filters\n", + " -----------------\n", + " |- Group: coefficient\n", + " ---------------------\n", + " |- Group: fap\n", + " -------------\n", + " |- Group: fir\n", + " -------------\n", + " |- Group: time_delay\n", + " --------------------\n", + " |- Group: zpk\n", + " -------------\n", + " |- Group: Reports\n", + " -----------------\n", + " |- Group: Standards\n", + " -------------------\n", + " --> Dataset: summary\n", + " ......................\n", + " |- Group: Stations\n", + " ------------------\n", + " |- Group: 0110\n", + " --------------\n", + " |- Group: 001\n", + " -------------\n", + " |- Group: 002\n", + " -------------\n", + " |- Group: 003\n", + " -------------\n", + " |- Group: 004\n", + " -------------\n", + " |- Group: 005\n", + " -------------\n", + " |- Group: Transfer_Functions\n", + " ----------------------------\n", + " --> Dataset: channel_summary\n", + " ..............................\n", + " --> Dataset: tf_summary\n", + " ........................." + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mm" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "84ea94e5-0523-43fd-99ef-c4c6541ed401", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-08-05 17:01:18,659 [line 731] mth5.mth5.MTH5.close_mth5 - INFO: Flushing and closing magdelena.h5\n" + ] + } + ], + "source": [ + "mm.close_mth5()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "383af849-4458-4978-a243-5f7fe7c1b5d4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py37", + "language": "python", + "name": "py37" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}