In [1]:
import os
import json
import csv
from typing import List
from datetime import datetime, timedelta

class DataMerger:
  def organize_and_merge_data(self, directory: str):
    file_names = [f for f in os.listdir(directory) if f.endswith('.json')]

    sorted_file_names = self.get_sorted_file_names(file_names)

    previous_symbol = None
    previous_timestamp = None
    current_data = []

    for symbol, timestamp, file_name in sorted_file_names:
      file_path = os.path.join(directory, file_name)
      with open(file_path, "r", encoding="utf-8") as f:
          data = json.load(f)

      if previous_symbol is None and previous_timestamp is None:
        previous_symbol = symbol
        previous_timestamp = timestamp
        current_data = data["data"]

        continue

      time_diff = (timestamp - previous_timestamp).total_seconds() / 60

      if time_diff <= 12 and previous_symbol == symbol:
        current_data.extend(data["data"])

        previous_timestamp = timestamp

        continue

      # Save the merged data
      self.save_merged_data(directory, previous_symbol, previous_timestamp, current_data)

      previous_symbol = symbol
      previous_timestamp = timestamp
      current_data = data["data"]

    # Save any remaining merged data
    if current_data:
      self.save_merged_data(directory, previous_symbol, previous_timestamp, current_data)

  def get_sorted_file_names(self, file_names: List[str]):
    sorted_file_names = []

    for file_name in file_names:
      symbol, timestamp = self.extract_data_from_file_name(file_name)

      sorted_file_names.append((symbol, timestamp, file_name))

    sorted_file_names.sort(key=lambda x: x[1])

    return sorted_file_names

  def extract_data_from_file_name(self, file_name: str):
    symbol, time_str = file_name.split("-", 1)

    time_str = time_str.rsplit(".json", 1)[0].replace("_", ":")

    return (symbol, datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ"))

  def save_merged_data(self, directory: str, symbol: str, timestamp, data: List):
    output_dir = os.path.join(directory, "1-merged")
    os.makedirs(output_dir, exist_ok=True)

    timestamp_str = timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
    output_filename = f"{symbol}-{timestamp_str}.csv"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, "w", newline="", encoding="utf-8") as csvfile:
      writer = csv.writer(csvfile)
      for row in data:
          writer.writerow(row)

In [2]:
from google.colab import drive

drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [5]:
BASE_DIRECTORY = "/content/drive/My Drive"
TARGET = ""

PATH = os.path.join(BASE_DIRECTORY, TARGET)

merger = DataMerger()
merger.organize_and_merge_data(PATH)