In [1]:
%run utils/devtools.py

In [32]:
%reloadmypkg utils

import json
import os
from tqdm import tqdm
from utils.url2platform import problem_url2platform
from utils.standardise_url import expand_url, strip_query_params
from utils.parse4db import problems_parse4db, tutorials_parse4db
from utils.enums import ProblemPlatform

✅ Reloaded package 'utils' and its submodules.


In [3]:
sheet_path = "../raw-data/striver/sde-sheet-response-data.json"
export_file = "../cleaned-data/striver/sde.json"

In [4]:
with open(sheet_path) as file:
        raw = json.load(file)
problems_data = [
        {"gfg": topic["gfg_link"], "c360": topic["cs_link"], "lc": topic["lc_link"], "title": topic["title"], "yt": topic["yt_link"], "post": topic["post_link"]}
        for step in raw["sheetData"]
        for topic in step["topics"]
]

In [5]:
urls_titles = []
for data in problems_data:
    url = ""
    if data["lc"]:
        url = data["lc"]
    elif data["gfg"]:
        url = data["gfg"]
    elif data["c360"]:
        url = data["c360"]
    else:
        url = data["yt"] if data["yt"] is not None else data["post"]
    urls_titles.append({"url": url, "title": data["title"]})

In [6]:
len(urls_titles)

191

In [7]:
urls_titles[:3]

[{'url': 'https://leetcode.com/problems/set-matrix-zeroes/',
  'title': 'Set Matrix Zeros'},
 {'url': 'https://leetcode.com/problems/pascals-triangle/',
  'title': "Pascal's Triangle"},
 {'url': 'https://leetcode.com/problems/next-permutation/',
  'title': 'Next Permutation'}]

In [8]:
urls_titles[90]

{'url': 'https://leetcode.com/accounts/login/?next=/problems/find-the-celebrity/',
 'title': 'The Celebrity Problem'}

In [9]:
# Replace premium LeetCode problem with LintCode link
urls_titles[90]["url"] = "https://www.lintcode.com/problem/645/"

urls_titles[90]

{'url': 'https://www.lintcode.com/problem/645/',
 'title': 'The Celebrity Problem'}

In [10]:
for index, item in enumerate(urls_titles):
    if "implement-strstr" in item["url"]:
        print(f"{index}: {item}")

97: {'url': 'https://leetcode.com/problems/implement-strstr/', 'title': 'Z-Function'}
98: {'url': 'https://leetcode.com/problems/implement-strstr/', 'title': 'KMP algo / LPS(pi) array'}


In [11]:
# Fix the redirect urls
urls_titles[97]["url"] = "https://leetcode.com/problems/find-the-index-of-the-first-occurrence-in-a-string/"
urls_titles[98]["url"] = "https://leetcode.com/problems/find-the-index-of-the-first-occurrence-in-a-string/"

In [12]:
print(urls_titles[97])
print(urls_titles[98])

{'url': 'https://leetcode.com/problems/find-the-index-of-the-first-occurrence-in-a-string/', 'title': 'Z-Function'}
{'url': 'https://leetcode.com/problems/find-the-index-of-the-first-occurrence-in-a-string/', 'title': 'KMP algo / LPS(pi) array'}


In [14]:
problems_urls_titles = []
tutorials_urls_titles = []
for index in tqdm(range(len(urls_titles)), desc="Expanding SDE URLS", ncols=100):
    item = urls_titles[index]
    url = item["url"]
    title = item["title"]
    if "bit.ly" in url or "codingninjas.com/codestudio" in url or "codingninjas.com/studio" in url:
        url = expand_url(url)
    platform = problem_url2platform(url)
    if platform != ProblemPlatform.UNKNOWN:
        url = strip_query_params(url)
        problems_urls_titles.append({"url": url, "title": title})
    else:
        tutorials_urls_titles.append({"url": url, "title": title})

Expanding SDE URLS:   6%|██▋                                       | 12/191 [00:00<00:09, 18.47it/s]

Unknown platform for url: https://www.geeksforgeeks.org/find-minimum-number-of-coins-that-make-a-change/


Expanding SDE URLS: 100%|█████████████████████████████████████████| 191/191 [00:20<00:00,  9.20it/s]


In [15]:
len(problems_urls_titles)

190

In [16]:
problems_urls_titles[:5]

[{'url': 'https://leetcode.com/problems/set-matrix-zeroes/',
  'title': 'Set Matrix Zeros'},
 {'url': 'https://leetcode.com/problems/pascals-triangle/',
  'title': "Pascal's Triangle"},
 {'url': 'https://leetcode.com/problems/next-permutation/',
  'title': 'Next Permutation'},
 {'url': 'https://leetcode.com/problems/maximum-subarray/',
  'title': "Kadane's Algorithm"},
 {'url': 'https://leetcode.com/problems/sort-colors/',
  'title': "Sort an array of 0's, 1's and 2's"}]

In [17]:
len(tutorials_urls_titles)

1

In [18]:
tutorials_urls_titles

[{'url': 'https://www.geeksforgeeks.org/find-minimum-number-of-coins-that-make-a-change/',
  'title': 'Greedy algorithm to find minimum number of coins'}]

In [19]:
urls = [item["url"] for item in problems_urls_titles]

In [20]:
len(urls)

190

In [21]:
urls[:5]

['https://leetcode.com/problems/set-matrix-zeroes/',
 'https://leetcode.com/problems/pascals-triangle/',
 'https://leetcode.com/problems/next-permutation/',
 'https://leetcode.com/problems/maximum-subarray/',
 'https://leetcode.com/problems/sort-colors/']

In [33]:
parsed_data = problems_parse4db(urls, "striver-sde-progress.json")
len(parsed_data)

Loaded 87 previously parsed items.


Processing URLs:  47%|█████████████████████                        | 89/190 [00:01<01:14,  1.36it/s]

No slug found for url: https://www.lintcode.com/problem/645/


Processing URLs: 100%|████████████████████████████████████████████| 190/190 [01:40<00:00,  1.03it/s]


190

In [34]:
parsed_data[:3]

[{'id': '73LC',
  'type': 'problem',
  'title': 'Set Matrix Zeroes',
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/set-matrix-zeroes/'},
 {'id': '118LC',
  'type': 'problem',
  'title': "Pascal's Triangle",
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/pascals-triangle/'},
 {'id': '31LC',
  'type': 'problem',
  'title': 'Next Permutation',
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/next-permutation/'}]

In [35]:
problems_parsed = [{**parsed_data[i], "title": problems_urls_titles[i]["title"]} for i in range(len(parsed_data))]

In [36]:
len(problems_parsed)

190

In [37]:
problems_parsed[97]

{'id': '28LC',
 'type': 'problem',
 'title': 'KMP algo / LPS(pi) array',
 'platform': 'LC',
 'href': 'https://leetcode.com/problems/find-the-index-of-the-first-occurrence-in-a-string/'}

In [38]:
problems_parsed[:3]

[{'id': '73LC',
  'type': 'problem',
  'title': 'Set Matrix Zeros',
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/set-matrix-zeroes/'},
 {'id': '118LC',
  'type': 'problem',
  'title': "Pascal's Triangle",
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/pascals-triangle/'},
 {'id': '31LC',
  'type': 'problem',
  'title': 'Next Permutation',
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/next-permutation/'}]

In [39]:
tutorials_parsed = tutorials_parse4db(tutorials_urls_titles)

Processing URLs: 100%|███████████████████████████████████████████████| 1/1 [00:00<00:00, 187.00it/s]


In [40]:
tutorials_parsed

[{'id': '53efce83-b7d1-5934-9c6e-9d2c02c1523aGFG',
  'type': 'tutorial',
  'title': 'Greedy algorithm to find minimum number of coins',
  'platform': 'GFG',
  'href': 'https://www.geeksforgeeks.org/find-minimum-number-of-coins-that-make-a-change/'}]

In [41]:
parsed_merged = problems_parsed + tutorials_parsed

In [42]:
len(parsed_merged)

191

In [43]:
parsed_merged[:3]

[{'id': '73LC',
  'type': 'problem',
  'title': 'Set Matrix Zeros',
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/set-matrix-zeroes/'},
 {'id': '118LC',
  'type': 'problem',
  'title': "Pascal's Triangle",
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/pascals-triangle/'},
 {'id': '31LC',
  'type': 'problem',
  'title': 'Next Permutation',
  'platform': 'LC',
  'href': 'https://leetcode.com/problems/next-permutation/'}]

In [44]:
os.makedirs(os.path.dirname(export_file), exist_ok=True)

In [45]:
with open(export_file, "w", encoding="utf-8") as f:
    json.dump(parsed_merged, f, ensure_ascii=False, indent=4)