### `themachinethatgoesping` tutorial series
# Tutorial 2: Introduction

In this tutorial, we show how to speed op repeated file opening using caching.

`themachinethatgoesping` concepts covered:
- "File Handler" object
- data loading
- file caching

## Summary

In [None]:
%matplotlib widget
import os

from matplotlib import pyplot as plt
from themachinethatgoesping.echosounders import index_functions
from themachinethatgoesping.echosounders import kongsbergall
from time import time

folders = []
folders.append("../unittest_data")

# list raw data files
files = index_functions.find_files(folders, [".all","wcd"])
files.sort()

# -- File caching --
# So the steps where the same as in the previous demo, no we create cache directories using the get_cache_file_paths function
cacheFilePaths = index_functions.get_cache_file_paths(file_paths=files)
index_functions.print_cache_file_statistics(cacheFilePaths)
index_functions.remove_name_from_cache(cacheFilePaths, "FilePackageIndex")
# cacheFilePaths is a dictionary with the same keys as files, but the values are the paths to the cache files
# Passing cacheFilePahts to the FileHandler causes the FileHandler to either 
# - create the cache files (if they don't exist) 
# - or load them (if they do exist)
fh = kongsbergall.KongsbergAllFileHandler(files, file_cache_paths = cacheFilePaths)

# compare loading times in repeated file loading
t1 = time()
# without using the cache
fh = kongsbergall.KongsbergAllFileHandler(files)
t2 = time()
# using the cache
fh = kongsbergall.KongsbergAllFileHandler(files, file_cache_paths = cacheFilePaths)
t3 = time()

print("\n-- Compare loading times --")
print(f"Time without cache: {round(t2-t1,3)} seconds")
print(f"Time with cache:    {round(t3-t2,3)} seconds")

# -- Investigate the created cache --
# Here we print the created cache files to understand how big they are
print("\n-- Cache file statistics --")
index_functions.print_cache_file_statistics(cacheFilePaths)

In [None]:
ping = fh.get_pings()[0]
print(ping)

## Step-by-step
### 1. Find raw data files (see previous demo)

In [None]:
# define a list of folder(s) to search for raw data files
# notes: 
#   - subdirectories will be searched as well
#   - you can add multiple folders by appending them to the list
#   - pair of files (e.g. .all and .wcd) don't have to be in the same folder
folders = []
folders.append("../unittest_data")

# find all Kongsberg files in the list of folders
from themachinethatgoesping.echosounders import index_functions
files = index_functions.find_files(folders, [".all","wcd"])

# show files found
print(f"The output is a {type(files)} object with {len(files)} elements:")
files.sort()
for i, file in enumerate(files):
    print(f"({i}/{len(files)}) {file}")

### 2. Find/create cacheFilePaths data

In [None]:
# caching files when read the first time allows speeding-up loading next times

# each file has a corresponding cache file, by default:
cacheFilePaths = index_functions.get_cache_file_paths(file_paths=files)

# By default, the cache files are stored in the same directory 
# as the notebook in a newly created "cache" folder

for f, c in cacheFilePaths.items():
    print(f"File: {f}")
    print(f"    Cache: {c}")

In [None]:
# -- Investigate the created cache --
# Here we print the created cache files to understand how big they are
print("\n-- Cache file statistics --")
index_functions.print_cache_file_statistics(cacheFilePaths)

# Note, if this is the first time you run the code, 
# the cache files do not exist yet, and there are no statistics to print

### 3. Load data with cache file paths

In [None]:
# load the data with the cache files (if this is the first time you run it, the cache files will be created):
from themachinethatgoesping.echosounders import kongsbergall
fh = kongsbergall.KongsbergAllFileHandler(files, file_cache_paths = cacheFilePaths)


### 4. Compare loading times

In [None]:
# compare loading times in repeated file loading
t1 = time()
# load data without using the cache
fh = kongsbergall.KongsbergAllFileHandler(files)
t2 = time()
# load data using the cache files
fh = kongsbergall.KongsbergAllFileHandler(files, file_cache_paths = cacheFilePaths)
t3 = time()

print("\n-- Compare loading times --")
print(f"Time without cache: {round(t2-t1,3)} seconds")
print(f"Time with cache:    {round(t3-t2,3)} seconds")

### 4. Investigate cache files

In [None]:
# -- Investigate the created cache --
# Here we print the created cache files to understand how big they are
print("\n-- Cache file statistics --")
index_functions.print_cache_file_statistics(cacheFilePaths)