# 6.8300 Final Project: Data Processing Notebook

## Catherine Mei and Linette Kunin

### Imports

In [None]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
import cv2
from PIL import Image
# from scipy.io import wavfile
# from scipy import signal
# import librosa.display

### Import Data

In [None]:
use_gdrive = True

In [None]:
if use_gdrive:
  from google.colab import drive
  drive.mount('/content/drive')

  data_dir = "/content/drive/MyDrive/6.8300FinalProject"
else:
  data_dir = "./data"

Mounted at /content/drive


In [None]:
downloads_dir = data_dir + '/downloads'
datasets_dir = data_dir + '/vis-data-256'
models_dir = data_dir + '/models'
pretrained_models = data_dir + '/pretrained_models'
q2c_data_dir = data_dir + '/q2c_data'
results_dir = data_dir + "/vis-data-processed"

In [None]:
data_folder = os.listdir(datasets_dir)
suffixes = ['_times.txt', '_denoised_thumb.mp4', '_mic.mp4', '_mic.wav', '_denoised.mp4', '_denoised.wav']

In [None]:
ids = set()
for name in data_folder:
    for suffix in suffixes:
        idx = name.find(suffix)
        if idx == -1:
            continue
        prefix = name[:idx]
        ids.add(prefix)
print("Unique videos: ", len(ids))
ids = list(ids)

Unique videos:  977


### Identify All Material Categories

In [None]:
def parse_classes(name, idx):
  elems = set()
  with open(f'{datasets_dir + "/" + name}', 'r') as f:
      input = f.read()
      lines = input.split("\n")

  for line in lines:
      words = line.split()
      if len(words) < idx+1:
          continue
      if words[idx] == 'None':
          continue
      elems.add(words[idx])

  # return list of materials
  return list(elems)

In [None]:
all_materials = set()

for name in data_folder:

  # not text file or train / test text file - then skip
  if name.find('.txt') == -1 or 'train.txt' in name or 'test.txt' in name:
      continue
  elems = parse_classes(name, 1)

  # adding materials from current file into total set of materials
  all_materials.update(elems)

all_materials = list(all_materials)
categories = {}
for i, cat in enumerate(all_materials):
    categories[cat] = i

print('Materials Dictionary:')
print(categories)

processed:  0
processed:  1000
processed:  2000


### Process Hits: for each hit, segment corresponding video / audio file

In [None]:
categories = {'plastic': 0, 'drywall': 1, 'rock': 2, 'metal': 3, 'leaf': 4, 'grass': 5, 'paper': 6, 'water': 7, 'gravel': 8, 'glass': 9, 'tile': 10, 'ceramic': 11, 'plastic-bag': 12, 'dirt': 13, 'cloth': 14, 'wood': 15, 'carpet': 16}

In [None]:
# sort IDs and output them into text file
# sorted_ids = sorted(ids)

# joined = "\n".join(sorted_ids)
# f = open(f'{data_dir + "/stimuli_ids.txt"}', "w")
# print(joined, file=f)
# f.close()

In [None]:
# In case the code dies, run this cell to read the file of ids
with open(f'{data_dir + "/stimuli_ids.txt"}', 'r') as f:
  input = f.read()
  sorted_ids = input.split("\n")
  sorted_ids = [i for i in sorted_ids if len(i)]

In [None]:
sorted_ids.reverse()
process_ids = sorted_ids[700:]

print(process_ids)
print(len(process_ids))

['2015-03-28-20-20-05', '2015-03-28-20-19-02', '2015-03-28-20-17-09', '2015-03-28-20-15-56', '2015-03-28-20-15-13', '2015-03-28-20-14-13', '2015-03-28-20-13-18', '2015-03-28-20-10-45', '2015-03-28-20-09-53', '2015-03-28-20-05-10', '2015-03-28-20-03-25', '2015-03-28-20-01-17', '2015-03-28-20-00-25', '2015-03-28-19-59-35', '2015-03-28-19-57-57', '2015-03-28-19-56-15', '2015-03-28-19-54-20', '2015-03-28-19-50-12', '2015-03-28-19-48-39', '2015-03-28-19-47-32', '2015-03-28-19-46-36', '2015-03-28-19-45-24', '2015-03-28-19-44-21', '2015-03-28-19-43-23', '2015-03-28-19-41-13', '2015-03-28-19-36-18', '2015-03-28-19-35-11', '2015-03-28-19-34-13', '2015-03-28-18-49-59', '2015-03-28-18-48-33', '2015-03-28-18-46-39', '2015-03-28-18-44-44', '2015-03-28-18-42-29', '2015-03-28-18-39-43', '2015-03-28-18-38-13', '2015-03-28-18-37-10', '2015-03-28-18-29-54', '2015-03-28-18-27-40', '2015-03-28-18-25-59', '2015-03-28-18-24-12', '2015-03-28-18-21-03', '2015-03-28-18-16-04', '2015-03-28-18-12-20', '2015-03-2

In [None]:
import soundfile as sf
import pandas as pd
from scipy.io import wavfile
from scipy import signal
from librosa.core.spectrum import stft

def segment_hits(file_id):
  audio_time_series, sampling_rate = librosa.load(f'{datasets_dir + "/" + file_id}_denoised.wav')
  vidcap = cv2.VideoCapture(f'{datasets_dir + "/" + file_id}_denoised_thumb.mp4')

  with open(f'{datasets_dir + "/" + file_id}_times.txt', 'r') as f:
      input = f.read()
      lines = input.split("\n")

  hit_num = 0
  hit_exists = False
  for line in lines:
    words = line.split()

    # skip if no hit
    if len(words) < 2:
      continue
    if words[1] == "None":
      continue

    hit_material = categories[words[1]]
    hit_time = float(words[0])
    time_window = 0.5

    os.makedirs(f'{results_dir + "/" + file_id}', exist_ok=True)

    # segment video with audio of relevant context of hit
    window = audio_time_series[int(sampling_rate*(hit_time - time_window)):int(sampling_rate*(hit_time+time_window))]
    sf.write(f'{results_dir + "/" + file_id + "/"}{file_id}_sound_{hit_num}.wav', window, sampling_rate, 'PCM_24')

    # Generate spectrogram
    spectrogram_sig = np.abs(librosa.core.spectrum.stft(window))
    with open(f'{results_dir + "/" + file_id + "/"}{file_id}_spectrogram_{hit_num}.npy', 'wb') as f:
      np.save(f, spectrogram_sig)

    # save both label and image of hit
    with open(f'{results_dir + "/" + file_id + "/"}{file_id}_labels_{hit_num}.txt', 'w') as f:
      f.write(str(hit_material))
    vidcap.set(cv2.CAP_PROP_POS_MSEC,hit_time * 1000)
    _, image = vidcap.read()
    cv2.imwrite(f'{results_dir + "/" + file_id + "/"}{file_id}_image_{hit_num}.png', image)

    plt.close()
    hit_num += 1
    hit_exists = True

  if not hit_exists:
    print("No hits found for:", file_id)

In [None]:
for count, id in enumerate(process_ids):
  segment_hits(id)
  print("Processed", count, ": ", id)

In [None]:
# check how many folders are written out
results_folder = os.listdir(results_dir)
print(results_folder)
print(len(results_folder))

['2015-02-16-16-49-06', '2015-02-16-16-56-35', '2015-02-16-16-58-57', '2015-02-16-17-02-05', '2015-02-16-17-03-55', '2015-02-16-17-08-51', '2015-02-16-17-15-23', '2015-02-16-17-27-53', '2015-02-16-17-32-18', '2015-02-16-17-35-07', '2015-02-16-17-40-35', '2015-02-16-17-46-48', '2015-02-16-17-52-15', '2015-02-16-17-54-47', '2015-02-16-18-02-38', '2015-02-16-18-07-52', '2015-02-21-17-08-25', '2015-02-21-17-09-17', '2015-02-21-17-13-35', '2015-02-21-17-14-45', '2015-02-21-17-16-04', '2015-02-21-17-18-56', '2015-02-21-17-27-48', '2015-02-21-17-29-20', '2015-02-21-17-31-46', '2015-02-21-17-33-03', '2015-02-21-17-34-22', '2015-02-21-17-37-20', '2015-02-21-17-39-27', '2015-02-21-17-41-06', '2015-02-21-17-42-56', '2015-02-21-17-45-03', '2015-02-21-17-49-40', '2015-02-21-17-51-20', '2015-02-21-17-53-09', '2015-02-21-17-54-02', '2015-02-21-17-55-40', '2015-02-21-17-57-20', '2015-02-22-14-20-13', '2015-02-22-14-21-41', '2015-02-22-14-23-33', '2015-02-22-14-24-52', '2015-02-22-14-28-15', '2015-02-2

In [None]:
# find missing Ids
set_results = set(results_folder)
set_total_ids = set(sorted_ids)

missing_ids = list(set_total_ids - set_results)
missing_ids.sort()

print(missing_ids)

['2015-02-21-17-48-19', '2015-03-28-20-13-18']


In [None]:
# for each folder, check how many files are in that folder

flagged_ids = []
for folder in results_folder:
  all_files = os.listdir(f'{results_dir + "/" + folder}')

  if len(all_files) < 6:
    print("Folder:", folder, "only has:", len(all_files), "files")
    flagged_ids.append(folder)


Folder: 2015-03-29-16-59-00 only has: 4 files
Folder: 2015-03-31-00-24-43 only has: 4 files
Folder: 2015-03-30-00-39-48 only has: 4 files


### Normalize Dataset (Calculate mean and SD)

In [None]:
global_min = np.load(f'{data_dir + "/min_all_files.npy"}')

# All subfolders containing videos
data_folders = os.listdir(results_dir) # ids AKA dates

# read npy spectrogram file from each folder and concatenate into long list

count = 0
for folder_id in data_folders:
  print("Processing:", count, "(", folder_id, ")")
  flattened_data = np.array([])
  count += 1

  all_files = os.listdir(f'{results_dir + "/" + folder_id}')

  # only process npy files
  for file in all_files:
    if file.endswith(".npy"):
      npy_file = np.load(f'{results_dir + "/" + folder_id + "/" + file}')
      flat = npy_file.flatten()
      flattened_data = np.concatenate((flattened_data, flat), axis = None)

  # replace zeros with smallest non-zero value
  flattened_data[flattened_data == 0] = global_min

  log_vals = np.log10(flattened_data)
  log_sum = sum(log_vals)
  num_vals = len(log_vals)

  with open(f'{results_dir + "/" + folder_id}/log_sum.npy', 'wb') as f:
    np.save(f, log_sum)

  with open(f'{results_dir + "/" + folder_id}/log_sum_nvals.npy', 'wb') as f:
    np.save(f, num_vals)


Processing: 0 ( 2015-02-16-16-49-06 )
Processing: 1 ( 2015-02-16-16-56-35 )
Processing: 2 ( 2015-02-16-16-58-57 )
Processing: 3 ( 2015-02-16-17-02-05 )
Processing: 4 ( 2015-02-16-17-03-55 )
Processing: 5 ( 2015-02-16-17-08-51 )
Processing: 6 ( 2015-02-16-17-15-23 )
Processing: 7 ( 2015-02-16-17-27-53 )
Processing: 8 ( 2015-02-16-17-32-18 )
Processing: 9 ( 2015-02-16-17-35-07 )
Processing: 10 ( 2015-02-16-17-40-35 )
Processing: 11 ( 2015-02-16-17-46-48 )
Processing: 12 ( 2015-02-16-17-52-15 )
Processing: 13 ( 2015-02-16-17-54-47 )
Processing: 14 ( 2015-02-16-18-02-38 )
Processing: 15 ( 2015-02-16-18-07-52 )
Processing: 16 ( 2015-02-21-17-08-25 )
Processing: 17 ( 2015-02-21-17-09-17 )
Processing: 18 ( 2015-02-21-17-13-35 )
Processing: 19 ( 2015-02-21-17-14-45 )
Processing: 20 ( 2015-02-21-17-16-04 )
Processing: 21 ( 2015-02-21-17-18-56 )
Processing: 22 ( 2015-02-21-17-27-48 )
Processing: 23 ( 2015-02-21-17-29-20 )
Processing: 24 ( 2015-02-21-17-31-46 )
Processing: 25 ( 2015-02-21-17-33-0

In [None]:
# Go through all the folders, read all the files with the log_sum information and find the average log_sum

global_sum = 0
global_count = 0

# All subfolders containing videos
data_folders = os.listdir(results_dir) # ids AKA dates
count = 0
for folder_id in data_folders:
  print("Processing:", count, "(", folder_id, ")")
  count += 1

  log_sum = np.load(f'{results_dir + "/" + folder_id}/log_sum.npy')
  cur_count = np.load(f'{results_dir + "/" + folder_id}/log_sum_nvals.npy')

  global_sum += log_sum
  global_count += cur_count

mean_all_files = global_sum / global_count

with open(f'{data_dir}/mean_all_files.npy', 'wb') as f:
  np.save(f, mean_all_files)

Processing: 0 ( 2015-02-16-16-49-06 )
Processing: 1 ( 2015-02-16-16-56-35 )
Processing: 2 ( 2015-02-16-16-58-57 )
Processing: 3 ( 2015-02-16-17-02-05 )
Processing: 4 ( 2015-02-16-17-03-55 )
Processing: 5 ( 2015-02-16-17-08-51 )
Processing: 6 ( 2015-02-16-17-15-23 )
Processing: 7 ( 2015-02-16-17-27-53 )
Processing: 8 ( 2015-02-16-17-32-18 )
Processing: 9 ( 2015-02-16-17-35-07 )
Processing: 10 ( 2015-02-16-17-40-35 )
Processing: 11 ( 2015-02-16-17-46-48 )
Processing: 12 ( 2015-02-16-17-52-15 )
Processing: 13 ( 2015-02-16-17-54-47 )
Processing: 14 ( 2015-02-16-18-02-38 )
Processing: 15 ( 2015-02-16-18-07-52 )
Processing: 16 ( 2015-02-21-17-08-25 )
Processing: 17 ( 2015-02-21-17-09-17 )
Processing: 18 ( 2015-02-21-17-13-35 )
Processing: 19 ( 2015-02-21-17-14-45 )
Processing: 20 ( 2015-02-21-17-16-04 )
Processing: 21 ( 2015-02-21-17-18-56 )
Processing: 22 ( 2015-02-21-17-27-48 )
Processing: 23 ( 2015-02-21-17-29-20 )
Processing: 24 ( 2015-02-21-17-31-46 )
Processing: 25 ( 2015-02-21-17-33-0

In [None]:
print("Global Sum:", global_sum)
print("Global Count:", global_count)
print("Mean of all files:", mean_all_files)

Global Sum: -2896571772.2039256
Global Count: 1282012600
Mean of all files: -2.2593941527594392


In [None]:
# find the standard deviation given the global mean
global_mean = np.load(f'{data_dir}/mean_all_files.npy')
global_min = np.load(f'{data_dir + "/min_all_files.npy"}')

avoid_files = {'sd_sum.npy', 'sd_num_vals.npy', 'log_sum.npy', 'log_sum_nvals.npy'}

data_folders = os.listdir(results_dir) # ids AKA dates

count = 0
for folder_id in data_folders:
  print("Processing:", count, "(", folder_id, ")")
  flattened_data = np.array([])
  count += 1

  all_files = os.listdir(f'{results_dir + "/" + folder_id}')

  # only process npy files
  for file in all_files:
    if file.endswith(".npy") and (file not in avoid_files):
      npy_file = np.load(f'{results_dir + "/" + folder_id + "/" + file}')
      flat = npy_file.flatten()
      flattened_data = np.concatenate((flattened_data, flat), axis = None)


  flattened_data[flattened_data == 0] = global_min

  diffs_squared = (flattened_data - global_mean)**2
  sd_sum = np.sum(diffs_squared)
  num_sds = len(flattened_data)

  with open(f'{results_dir + "/" + folder_id}/sd_sum.txt', 'w') as f:
    f.write(str(sd_sum))

  with open(f'{results_dir + "/" + folder_id}/sd_num_vals.npy', 'wb') as f:
    np.save(f, num_sds)


Processing: 0 ( 2015-02-16-16-49-06 )
Processing: 1 ( 2015-02-16-16-56-35 )
Processing: 2 ( 2015-02-16-16-58-57 )
Processing: 3 ( 2015-02-16-17-02-05 )
Processing: 4 ( 2015-02-16-17-03-55 )
Processing: 5 ( 2015-02-16-17-08-51 )
Processing: 6 ( 2015-02-16-17-15-23 )
Processing: 7 ( 2015-02-16-17-27-53 )
Processing: 8 ( 2015-02-16-17-32-18 )
Processing: 9 ( 2015-02-16-17-35-07 )
Processing: 10 ( 2015-02-16-17-40-35 )
Processing: 11 ( 2015-02-16-17-46-48 )
Processing: 12 ( 2015-02-16-17-52-15 )
Processing: 13 ( 2015-02-16-17-54-47 )
Processing: 14 ( 2015-02-16-18-02-38 )
Processing: 15 ( 2015-02-16-18-07-52 )
Processing: 16 ( 2015-02-21-17-08-25 )
Processing: 17 ( 2015-02-21-17-09-17 )
Processing: 18 ( 2015-02-21-17-13-35 )
Processing: 19 ( 2015-02-21-17-14-45 )
Processing: 20 ( 2015-02-21-17-16-04 )
Processing: 21 ( 2015-02-21-17-18-56 )
Processing: 22 ( 2015-02-21-17-27-48 )
Processing: 23 ( 2015-02-21-17-29-20 )
Processing: 24 ( 2015-02-21-17-31-46 )
Processing: 25 ( 2015-02-21-17-33-0

In [None]:
# Read in all the SD files and find the global SD

global_sd_sum = 0
global_count = 0

# All subfolders containing videos
data_folders = os.listdir(results_dir) # ids AKA dates
count = 0
for folder_id in data_folders:
  print("Processing:", count, "(", folder_id, ")")
  count += 1

  sd_sum = np.load(f'{results_dir + "/" + folder_id}/sd_sum.npy')
  with open(f'{results_dir + "/" + folder_id}/sd_sum.txt', 'r') as f:
    input = f.read()
    lines = input.split("\n")
    sd_sum = float(lines[0])

  cur_count = np.load(f'{results_dir + "/" + folder_id}/sd_num_vals.npy')

  global_sd_sum += sd_sum
  global_count += cur_count

denominator = global_count - 1
before_sqrt = global_sd_sum / (global_count - 1)
print(before_sqrt)
global_sd = np.sqrt(before_sqrt)

with open(f'{data_dir}/sd_all_files.npy', 'wb') as f:
  np.save(f, global_sd)

Processing: 0 ( 2015-02-16-16-49-06 )
Processing: 1 ( 2015-02-16-16-56-35 )
Processing: 2 ( 2015-02-16-16-58-57 )
Processing: 3 ( 2015-02-16-17-02-05 )
Processing: 4 ( 2015-02-16-17-03-55 )
Processing: 5 ( 2015-02-16-17-08-51 )
Processing: 6 ( 2015-02-16-17-15-23 )
Processing: 7 ( 2015-02-16-17-27-53 )
Processing: 8 ( 2015-02-16-17-32-18 )
Processing: 9 ( 2015-02-16-17-35-07 )
Processing: 10 ( 2015-02-16-17-40-35 )
Processing: 11 ( 2015-02-16-17-46-48 )
Processing: 12 ( 2015-02-16-17-52-15 )
Processing: 13 ( 2015-02-16-17-54-47 )
Processing: 14 ( 2015-02-16-18-02-38 )
Processing: 15 ( 2015-02-16-18-07-52 )
Processing: 16 ( 2015-02-21-17-08-25 )
Processing: 17 ( 2015-02-21-17-09-17 )
Processing: 18 ( 2015-02-21-17-13-35 )
Processing: 19 ( 2015-02-21-17-14-45 )
Processing: 20 ( 2015-02-21-17-16-04 )
Processing: 21 ( 2015-02-21-17-18-56 )
Processing: 22 ( 2015-02-21-17-27-48 )
Processing: 23 ( 2015-02-21-17-29-20 )
Processing: 24 ( 2015-02-21-17-31-46 )
Processing: 25 ( 2015-02-21-17-33-0

In [None]:
print("Global SD Sum:", global_sd_sum)
print("Global SD Count:", global_count)
print("SD of all files:", global_sd)

Global SD Sum: 10264764922.762405
Global SD Count: 1282012600
SD of all files: 2.8296215693815205


In [None]:
# Find mean and SD
without_zeros = flattened_data[np.where(flattened_data > 0)]
global_min = np.min(without_zeros)

# replace zeros with smallest non-zero value
flattened_data[flattened_data == 0] = global_min

log_vals = np.log10(flattened_data)
global_mean = np.mean(log_vals)
global_sd = np.std(log_vals)

with open(f'{results_dir}/mean_sd.txt', 'w') as f:
  f.write(str(global_mean) + "\n" + str(global_sd) + "\n" + str(global_min))

In [None]:
plt.hist(log_vals)

### Errors in Dataset:

Flagging the ids below because they have issues:

* 2015-02-21-17-48-19
* 2015-03-28-20-13-18

In [None]:
file = np.load(f'{results_dir + "/2015-02-16-16-49-06/"}2015-02-16-16-49-06_spectrogram_0.npy')
# phase = np.load(f'{results_dir + "/2015-02-16-16-49-06/"}2015-02-16-16-49-06_phase_0.npy')
audio_signal = librosa.core.spectrum.griffinlim(file)
sf.write(f'{data_dir}/2015-02-16-16-49-06_spectrogram_0.wav', audio_signal, 22050, 'PCM_24')
# wavfile.write(f'{data_dir}/2015-02-16-16-49-06_spectrogram_0.wav', 22050, audio_signal)
y = librosa.istft(file * phase)
sf.write(f'{data_dir}/2015-02-16-16-49-06_spectrogram_0.wav', y, 22050, 'PCM_24')

In [None]:
# import soundfile as sf
# import pandas as pd
# from scipy.io import wavfile
# from scipy import signal

# def segment_hits(file_id):
#   audio_time_series, sampling_rate = librosa.load(f'{datasets_dir + "/" + file_id}_denoised.wav')

#   sf.write(f'{data_dir}/test_test_test.wav', audio_time_series, sampling_rate)
#   print(audio_time_series)
#   return
#   # sampling_rate, samples = wavfile.read(f'{datasets_dir + "/" + file_id}_denoised.wav')
#   # frequencies, times, spectrogram = signal.spectrogram(samples, sampling_rate)

#   vidcap = cv2.VideoCapture(f'{datasets_dir + "/" + file_id}_denoised_thumb.mp4')

#   with open(f'{datasets_dir + "/" + file_id}_times.txt', 'r') as f:
#       input = f.read()
#       lines = input.split("\n")

#   # split each line and see if there was a hit

#   hit_num = 0
#   hit_exists = False
#   for line in lines:
#     words = line.split()

#     # skip if no hit
#     if len(words) < 2:
#       continue
#     if words[1] == "None":
#       continue

#     hit_material = categories[words[1]]
#     hit_time = float(words[0])
#     time_window = 0.5

#     os.makedirs(f'{results_dir + "/" + file_id}', exist_ok=True)

#     # segment video with relevant context of hit
#     window = audio_time_series[int(sampling_rate*(hit_time - time_window)):int(sampling_rate*(hit_time+time_window))]
#     # fourier_window = librosa.stft(window)
#     # window_db_amplitude = librosa.amplitude_to_db(abs(fourier_window))
#     # fig, ax = plt.subplots()
#     # img = librosa.display.specshow(window_db_amplitude,y_axis='log', x_axis='time', ax=ax)
#     img = librosa.feature.melspectrogram(y=window, sr=sampling_rate)
#     final_spectro =Image.fromarray(img).convert('F')
#     final_spectro.save(f'{results_dir + "/"+ file_id + "/"}{file_id}_spectrogram_{hit_num}.tiff')

#     # plt.axis('off')
#     # fig.savefig(f'{results_dir + "/"+ file_id + "/"}{file_id}_spectrogram_{hit_num}.png', bbox_inches='tight', pad_inches=0)
#     # fig.savefig(f'{results_dir + "/"+ file_id + "/"}{file_id}_spectrogram_{hit_num}.png')

#     window_db_core = np.abs(librosa.core.spectrum.stft(window))
#     audio_signal = librosa.core.spectrum.griffinlim(window_db_core)
#     sf.write(f'{results_dir + "/" + file_id + "/"}{file_id}_sound_{hit_num}.wav', audio_signal, sampling_rate, 'PCM_24')

#     np.savetxt(f'{results_dir + "/" + file_id + "/"}{file_id}_window_db_{hit_num}.csv', window_db_core, delimiter=',')

#     # save both label, image of spectrogram, image of hit
#     with open(f'{results_dir + "/" + file_id + "/"}{file_id}_labels_{hit_num}.txt', 'w') as f:
#       f.write(str(hit_material))
#     vidcap.set(cv2.CAP_PROP_POS_MSEC,hit_time * 1000)
#     _, image = vidcap.read()
#     cv2.imwrite(f'{results_dir + "/" + file_id + "/"}{file_id}_image_{hit_num}.png', image)

#     plt.close()
#     hit_num += 1
#     hit_exists = True

#   if not hit_exists:
#     print("No hits found for:", file_id)