<a href="https://colab.research.google.com/github/manumeehl/trajectories/blob/main/trajectories.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt install tesseract-ocr
!pip install opencv-python-headless pytesseract pytube

In [None]:
from google.colab.patches import cv2_imshow as show
from pytube import YouTube
import pytesseract
import cv2
import re

In [19]:
def get_video(url, path):
  vid = YouTube(url)
  print(f"Downloading Video '{vid.streams[0].title}'")
  vid.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(output_path=path, filename='launch.mp4')
  print('Download complete')

In [None]:
def custom_crop(frame):
  height, width, _ = frame.shape
  return frame[height-140:height,0:1920]

In [None]:
def clock_to_secs(clockstring):
  ''' Convert a mission clock string to an integer second value '''
  tense = -1 if clockstring[:2] == 't-' else 1
  # Remove the T+- sign
  clockstring = clockstring [2:]
  # Get the seconds
  units = clockstring.split(':')
  hours = int(units[0])
  minutes = int(units[1])
  seconds = int(units(2))

  return hours * 3600 + minutes * 60 + seconds

In [50]:
def get_props(string):
  ''' Extract mission clock, altitude and velocity from a given string '''
  string = string.lower()

  # Apply a blacklist
  blacklist = ['stage 1', 'stage 2', 'falcon 9']
  for w in blacklist:
    string = string.replace(w, '')

  # Find all numbers and floats in the string
  numbers = re.findall(r'\d+(?:\.\d+)?', string)

  # Find the mission clock (T+00:00:00)
  mission_clock = re.findall(r't[+-]\d{2}:\d{2}:\d{2}', string)

  # Remove numbers which also occur in the mission clock
  mission_clock_numbers = re.findall(r'\d+(?:\.\d+)?', ' '.join(mission_clock))
  for num in mission_clock_numbers:
      numbers.remove(num)

  # Remove duplicates from the numbers list
  numbers = list(set(numbers))

  altitude, velocity = 0, 0
  if numbers:
    altitude = min(numbers)
    velocity = max(numbers)

  # Return the sorted numbers and mission clock
  return {
      'clock': mission_clock,
      'velocity': velocity,
      'altitude': altitude
  }

In [None]:
def extract_data(frame):
  crop = custom_crop(frame)
  gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
  inverted = cv2.bitwise_not(gray)

  text = pytesseract.image_to_string(inverted)
  return get_props(text)

In [39]:
def await_liftoff():
  ''' Identify the frame at which liftoff occurs '''
  framepos = 0
  interval = 300
  clock = ''
  # Await first mission clock appearance
  while vid.isOpened():
    vid.set(cv2.CAP_PROP_POS_FRAMES, int(framepos))
    framepos += interval

    ret, frame = vid.read()
    data = extract_data(frame)
    if data:
      print(data)

    '''if data[0]:
      clock = data[0]
      print(clock)
      break

    # Now wait until the next full second
    interval = 1
    while vid.isOpened():
      vid.set(cv2.CAP_PROP_POS_FRAMES, int(framepos))
      framepos += interval

      ret, frame = vid.read()
      data = extract_data(frame)

      if data[1] and data[1] != clock:
        print(data[1])
      else:
        print(data)'''




In [21]:
url = 'https://www.youtube.com/watch?v=agYuEAkEljw'
get_video(url, 'launches')

Downloading Video 'Starlink Mission'
Download complete


In [51]:
vidpath = 'launches/launch.mp4'
vid = cv2.VideoCapture(vidpath)
fps = vid.get(cv2.CAP_PROP_FPS)

print('Awaiting liftoff')
await_liftoff()

Awaiting liftoff
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['50', '00', '05']
['00', '40', '05']
['30', '00', '05']
['20', '00', '05']
['10', '00', '05']
[]
['t-00:05:00']
[]
['t-00:04:50']
[]
['t-00:04:40']
[]
['t-00:04:30']
['20', '00', '04']
['10', '00', '04']
[]
[]
['00', '40', '03']
['30', '00', '03']
['20', '00', '03']
['10', '00', '03']
['00', '03']
[]
['t-00:02:50']
['00', '02', '40']
['30', '00', '02']
['20', '00', '02']
['10', '00', '02']
['00', '02']
['50', '00', '01']
[]
['t-00:01:40']
[]
['t-00:01:30']
['20', '00', '01']
['10', '00', '01']
['00', '9', '01']
['50', '00']
['00', '40']
['30', '00']
['20', '00']
[]
[]
['t+00:00:00']
[]
['t+00:00:10']
['278', '0.7']
['t+00:00:20']
['30', '00', '18', '483']
['00', '40', '3.5', '727']
['5.8', '1018']
['t+00:00:50']
['1240', '8.7']
['t+00:01:00']
['1600', '12.1', '2']
['t+00:01:10']


KeyboardInterrupt: ignored