In [13]:
import os
import time
import json
import copy
import math
import random
import requests
import webvtt

In [14]:
from data import course_map

In [15]:
def load_cookie_dict():
  with open('cookies.txt', 'r') as f:
    return dict(line.strip().split('=', 1) for line in f if line.strip())


cookies = load_cookie_dict()
len(cookies)

18

In [16]:
headers = {
    "accept": "application/json, text/javascript, */*; q=0.01",
    "accept-language": "en-IN,en-GB;q=0.9,en-US;q=0.8,en;q=0.7,hi;q=0.6",
    "content-type": "application/json",
    "origin": "https://lms.simplilearn.com",
    "priority": "u=1, i",
    "referer": "https://lms.simplilearn.com/courses/3971/Introduction-to-Cloud-Computing/syllabus",
    "sec-ch-ua": '"Google Chrome";v="143", "Chromium";v="143", "Not A(Brand";v="24"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
    "x-requested-with": "XMLHttpRequest",
    "Cookie": "; ".join(f"{k}={v}" for k, v in cookies.items())
}

In [17]:
publish_payload = {
    "applicationId": "2",
    "applicationName": "lms.simplilearn.com",
    "userEmail": "k26rahul.m@gmail.com",
    "userId": "9973716",
    "userName": "Rahul Maurya",
    "eventTime": "1768296170126",
    "referalUrl": "https://accounts.simplilearn.com/",
    "url": "https://lms.simplilearn.com/courses/3971/Introduction-to-Cloud-Computing/syllabus",
    "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
    "verb": "videolearning",
    "objectType": "elearning",
    "object": "Introduction to Cloud Computing",
    "objectId": 3971,
    "value": "",
    "dataVals": {
        "userKey": "ab3490edf32bb4f01ccd6e9e5397fab751cfe6384",
        "duration": 999,
        "onMarker": "3",
        "product": "2",
        "lessonId": "30501",  # Will be updated dynamically
        "lessonName": "Lesson 3 - Adopting Cloud",
        "topicId": "419201",  # Will be updated dynamically
        "topicName": "3.8 Four-Step Plan for Cloud Migration",
        "type": "mp4",
        "videoLen": 267,
        "oslId": 3971,
        "isMpClp": False
    },
    "signature": "0926afe81cf8c71369f9871574854123"
}

In [18]:
video_info_payload = {
    "deviceCompatibility": "1",
    "elordering": "8",
    "filePath": "MP4/default/Cloud/Intro_to_cloud_computing/Lesson 2 - Basics of Cloud Computing/2.9_Hybrid_Cloud.mp4",
    "hasChildren": "0",
    "isAvailableOffline": "1",
    "learningelementId": "419198",  # TARGET TO UPDATE (Topic ID)
    "learninglabelId": "30500",   # TARGET TO UPDATE (Lesson ID)
    "length": "123",
    "nodeType": "element",
    "order": "9",
    "testId": "0",
    "title": "2.9 Hybrid Cloud",
    "type": "mp4",
    "labelName": "Lesson 2 - Basics of Cloud Computing",
    "labeIndex": "2",
    "topicIndex": "9",
    "elearningId": "3971",
    "primaryElearningId": "3971"
}

In [19]:
URL_VIDEO_INFO = "https://lms.simplilearn.com/user/learning/video"
URL_PUBLISH = "https://lms.simplilearn.com/api/lrs/publish"

In [20]:
def timestamp_to_seconds(ts):
  """Converts VTT timestamp (00:00:00.000) to total seconds."""
  try:
    h, m, rest = ts.split(":")
    s, ms = rest.split(".")
    return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000
  except Exception as e:
    print(f"Error parsing timestamp {ts}: {e}")
    return 0

In [21]:
CACHE_FILE = "video_duration_cache.json"


def get_video_duration(lesson_id, topic_id):
  """
  Calculates video duration by fetching the transcript and reading the last cue's timestamp.
  Uses a local JSON file to cache results to avoid redundant network requests.
  """
  cache_key = f"{lesson_id}.{topic_id}"
  cache_data = {}

  # --- Step 0: Check File Cache ---
  if os.path.exists(CACHE_FILE):
    try:
      with open(CACHE_FILE, "r") as f:
        cache_data = json.load(f)
        if cache_key in cache_data:
          return cache_data[cache_key]
    except (json.JSONDecodeError, IOError):
      # If cache is corrupt, we simply ignore it and proceed
      pass

  try:
    # --- Step 1: Prepare Request ---
    # Update payload with specific lesson/topic IDs
    payload = video_info_payload.copy()
    payload.update({
        'learninglabelId': lesson_id,
        'learningelementId': topic_id
    })

    # Remove 'content-type' to let requests library handle form-encoding automatically
    request_headers = headers.copy()
    request_headers.pop('content-type', None)

    # --- Step 2: Get Video Metadata ---
    response = requests.post(URL_VIDEO_INFO, headers=request_headers, data=payload)
    if response.status_code != 200:
      print(f"Metadata request failed: {response.status_code}")
      return 0

    # Extract transcript URL (handle potential missing keys safely)
    paths = response.json().get('transcriptPath', [])
    transcript_url = paths[0].get('file') if paths else None

    if not transcript_url:
      print("No transcript URL found in response.")
      return 0

    # --- Step 3: Download and Parse Transcript ---
    vtt_response = requests.get(transcript_url)
    if vtt_response.status_code != 200:
      print(f"Transcript download failed: {vtt_response.status_code}")
      return 0

    # Save to a temp file because webvtt.read requires a file path
    temp_filename = f"temp_{topic_id}.vtt"
    try:
      with open(temp_filename, "wb") as f:
        f.write(vtt_response.content)

      # Parse cues to find the final timestamp
      captions = webvtt.read(temp_filename)
      if not captions:
        print("VTT file contained no cues.")
        return 0

      duration = timestamp_to_seconds(captions[-1].end)

      # --- Step 4: Update Cache ---
      # Reload cache to minimize overwrite race conditions (simple approach)
      if os.path.exists(CACHE_FILE):
        try:
          with open(CACHE_FILE, "r") as f:
            current_cache = json.load(f)
            cache_data.update(current_cache)
        except Exception:
          pass

      cache_data[cache_key] = duration

      try:
        with open(CACHE_FILE, "w") as f:
          json.dump(cache_data, f, indent=2)
      except IOError as e:
        print(f"Warning: Could not write to cache file: {e}")

      return duration

    finally:
      # Ensure cleanup happens even if parsing errors occur
      if os.path.exists(temp_filename):
        os.remove(temp_filename)

  except Exception as e:
    print(f"Error in get_video_duration: {e}")
    return 0

In [22]:
def send_progress_update(lesson_id, topic_id, marker, duration):
  """
  Sends a progress update to the server for a specific timestamp (marker).
  """
  try:
    # --- Step 1: Prepare Payload ---
    # Deep copy is required because 'dataVals' is a nested dictionary
    payload = copy.deepcopy(publish_payload)
    payload['dataVals'].update({
        'lessonId': lesson_id,
        'topicId': topic_id,
        'onMarker': str(marker),
        'duration': duration,
    })

    # --- Step 2: Prepare Headers ---
    # Ensure Content-Type is set to JSON for this specific request
    request_headers = headers.copy()
    request_headers['content-type'] = 'application/json'

    # --- Step 3: Send Request ---
    requests.post(URL_PUBLISH, headers=request_headers, json=payload)

  except Exception as e:
    print(f"Error sending marker {marker}: {e}")

In [23]:
def process_course_progress(course_map, use_delay=False):
  """
  Iterates through course map, calculates markers via VTT, and tracks progress in a JSON file.
  """
  print(f"Starting processing for {len(course_map)} lessons...")
  progress_file = "progress.json"

  # --- Step 1: Load Existing Progress ---
  completed = {}
  if os.path.exists(progress_file):
    try:
      with open(progress_file, 'r') as f:
        completed = json.load(f)
    except json.JSONDecodeError:
      pass  # Start fresh if file is corrupt

  # --- Step 2: Iterate Lessons and Topics ---
  for l_idx, (l_id, topics) in enumerate(course_map.items(), 1):
    print(f"--- Lesson {l_id} (Order {l_idx}) ---")

    for t_idx, t_id in enumerate(topics, 1):
      prefix = f"{l_idx}.{t_idx}"
      print(f" > [{prefix}] Topic {t_id}: Fetching Transcript...")

      # --- Step 3: Get Duration ---
      duration = get_video_duration(l_id, t_id)

      if use_delay:
        time.sleep(random.uniform(0.1, 0.3))

      if duration <= 0:
        print(f"   [{prefix}] Skipping (Invalid duration)")
        continue

      # --- Step 4: Process Markers ---
      # Using floor as requested
      max_markers = math.floor(duration / 15)
      print(f"   [{prefix}] Duration: {duration:.2f}s | Markers: 0-{max_markers}")

      for m in range(max_markers, max_markers + 1):
        key = f"{l_id}.{t_id}.{m}"

        if key in completed:
          print(f"   [{prefix}] Skipping marker {m} (Done)")
          continue

        send_progress_update(l_id, t_id, m, int(duration-1))
        print(f"   [{prefix}] Sent marker {m}")

        # --- Step 5: Save Progress ---
        completed[key] = True
        with open(progress_file, 'w') as f:
          json.dump(completed, f)

        if use_delay:
          time.sleep(random.uniform(0.1, 0.3))

  print("Batch processing complete.")

In [24]:
process_course_progress(course_map)

Starting processing for 7 lessons...
--- Lesson 30499 (Order 1) ---
 > [1.1] Topic 419181: Fetching Transcript...
   [1.1] Duration: 183.49s | Markers: 0-12
   [1.1] Sent marker 12
--- Lesson 30500 (Order 2) ---
 > [2.1] Topic 419188: Fetching Transcript...
   [2.1] Duration: 30.71s | Markers: 0-2
   [2.1] Sent marker 2
 > [2.2] Topic 419189: Fetching Transcript...
   [2.2] Duration: 155.81s | Markers: 0-10
   [2.2] Sent marker 10
 > [2.3] Topic 419190: Fetching Transcript...
   [2.3] Duration: 57.44s | Markers: 0-3
   [2.3] Sent marker 3
 > [2.4] Topic 419191: Fetching Transcript...
   [2.4] Duration: 89.62s | Markers: 0-5
   [2.4] Sent marker 5
 > [2.5] Topic 419192: Fetching Transcript...
   [2.5] Duration: 150.56s | Markers: 0-10
   [2.5] Sent marker 10
 > [2.6] Topic 419193: Fetching Transcript...
   [2.6] Duration: 11.16s | Markers: 0-0
   [2.6] Sent marker 0
 > [2.7] Topic 419194: Fetching Transcript...
   [2.7] Duration: 66.01s | Markers: 0-4
   [2.7] Sent marker 4
 > [2.8] Top