In [32]:
import requests
import pandas as pd
import time
import random

In [33]:
questions = pd.read_csv('./problem_set.csv')

In [34]:
def scrape(query, variables, url='https://leetcode.com/graphql/'):
    headers = {
        'authority': 'leetcode.com','method': 'POST','path': '/graphql/',
        'scheme': 'https','accept': '*/*','accept-encoding': 'gzip, deflate, br, zstd','accept-language': 'en-US,en;q=0.9',
        'content-type': 'application/json','origin': 'https://leetcode.com','referer': 'https://leetcode.com/problems/spiral-matrix/solutions/','user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)...',
    }
    data = {"query": query, "variables": variables}
    response = requests.post(url, headers=headers, json=data)
    response.raise_for_status()
    return response.json()

In [51]:
import json
data = []

with open('errors.txt', 'r') as file:
    for line in file:
        question_slug = line.strip()
        query = "query getQuestionDetail($titleSlug: String!) {\n  isCurrentUserAuthenticated\n  question(titleSlug: $titleSlug) {\n    questionId\n    questionFrontendId\n    questionTitle\n    translatedTitle\n    questionTitleSlug\n    content\n    translatedContent\n    difficulty\n    stats\n    allowDiscuss\n    contributors {\n      username\n      profileUrl\n      __typename\n    }\n    similarQuestions\n    mysqlSchemas\n    randomQuestionUrl\n    sessionId\n    categoryTitle\n    submitUrl\n    interpretUrl\n    codeDefinition\n    sampleTestCase\n    enableTestMode\n    metaData\n    enableRunCode\n    enableSubmit\n    judgerAvailable\n    infoVerified\n    envInfo\n    urlManager\n    article\n    questionDetailUrl\n    libraryUrl\n    adminUrl\n    companyTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    companyTagStats\n    topicTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    __typename\n  }\n  interviewed {\n    interviewedUrl\n    companies {\n      id\n      name\n      slug\n      __typename\n    }\n    timeOptions {\n      id\n      name\n      __typename\n    }\n    stageOptions {\n      id\n      name\n      __typename\n    }\n    __typename\n  }\n  subscribeUrl\n  isPremium\n  loginUrl\n}\n"
        variables = {"titleSlug": question_slug}
        try:
            response = scrape(query, variables)
            question = response['data']['question']
            similar_questions = json.loads(question['similarQuestions'])
            stats = json.loads(question['stats'])
            data_dict = {
                'questionFrontendId': question['questionFrontendId'],
                'questionTitle': question['questionTitle'],
                'translatedTitle': question['translatedTitle'],
                'TitleSlug': question['questionTitleSlug'], 
                'content': question['content'],
                'translatedContent': question['translatedContent'],
                'difficulty': question['difficulty'],
                'totalAccepted': stats['totalAccepted'],
                'totalSubmission': stats['totalSubmission'],
                'totalAcceptedRaw': stats['totalAcceptedRaw'],
                'totalSubmissionRaw': stats['totalSubmissionRaw'],
                'acRate': stats['acRate'],
                'similarQuestions': [q['titleSlug'] for q in similar_questions],
                'mysqlSchemas': question['mysqlSchemas'],
                'category': question['categoryTitle'],
                'codeDefinition': question['codeDefinition'],
                'sampleTestCase': question['sampleTestCase'],
                'metaData': question['metaData'],
                'envInfo': question['envInfo'],
                'topicTags': [tag['name'] for tag in question['topicTags']],
            }
            data.append(data_dict)
        except requests.exceptions.RequestException as e:
            print(f"{question_slug} | Request failed: {e}")
            continue

In [52]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,questionFrontendId,questionTitle,translatedTitle,TitleSlug,content,translatedContent,difficulty,totalAccepted,totalSubmission,totalAcceptedRaw,totalSubmissionRaw,acRate,similarQuestions,mysqlSchemas,category,codeDefinition,sampleTestCase,metaData,envInfo,topicTags
0,1684,Count the Number of Consistent Strings,,count-the-number-of-consistent-strings,<p>You are given a string <code>allowed</code>...,,Easy,389.3K,441.2K,389301,441226,88.2%,[count-pairs-of-similar-strings],[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","""ab""\n[""ad"",""bd"",""aaab"",""baa"",""badab""]","{\n ""name"": ""countConsistentStrings"",\n ""par...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","[Array, Hash Table, String, Bit Manipulation, ..."
1,1982,Find Array Given Subset Sums,,find-array-given-subset-sums,<p>You are given an integer <code>n</code> rep...,,Hard,6.6K,13.6K,6586,13572,48.5%,"[subsets, subsets-ii, recover-the-original-array]",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","3\n[-3,-2,-1,0,0,1,2,3]","{\n ""name"": ""recoverArray"",\n ""params"": [\n ...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","[Array, Divide and Conquer]"
2,2269,Find the K-Beauty of a Number,,find-the-k-beauty-of-a-number,<p>The <strong>k-beauty</strong> of an integer...,,Easy,73.5K,119.8K,73494,119804,61.3%,[],[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...",240\n2,"{\n ""name"": ""divisorSubstrings"",\n ""params"":...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","[Math, String, Sliding Window]"
3,2270,Number of Ways to Split Array,,number-of-ways-to-split-array,<p>You are given a <strong>0-indexed</strong> ...,,Medium,212.2K,378.3K,212195,378285,56.1%,"[split-array-largest-sum, find-pivot-index, wa...",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[10,4,-8,7]","{\n ""name"": ""waysToSplitArray"",\n ""params"": ...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","[Array, Prefix Sum]"
4,2273,Find Resultant Array After Removing Anagrams,,find-resultant-array-after-removing-anagrams,<p>You are given a <strong>0-indexed</strong> ...,,Easy,74.5K,125.9K,74544,125885,59.2%,"[group-anagrams, valid-anagram]",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[""abba"",""baba"",""bbaa"",""cd"",""cd""]","{\n ""name"": ""removeAnagrams"",\n ""params"": [\...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","[Array, Hash Table, String, Sorting]"


In [55]:
df = pd.DataFrame(data)
df.to_csv('questions_deets_errors.csv', index=False)

In [None]:
import json

data = []
for i, row in questions.iterrows():
    question_slug = row['titleSlug']
    query = "query getQuestionDetail($titleSlug: String!) {\n  isCurrentUserAuthenticated\n  question(titleSlug: $titleSlug) {\n    questionId\n    questionFrontendId\n    questionTitle\n    translatedTitle\n    questionTitleSlug\n    content\n    translatedContent\n    difficulty\n    stats\n    allowDiscuss\n    contributors {\n      username\n      profileUrl\n      __typename\n    }\n    similarQuestions\n    mysqlSchemas\n    randomQuestionUrl\n    sessionId\n    categoryTitle\n    submitUrl\n    interpretUrl\n    codeDefinition\n    sampleTestCase\n    enableTestMode\n    metaData\n    enableRunCode\n    enableSubmit\n    judgerAvailable\n    infoVerified\n    envInfo\n    urlManager\n    article\n    questionDetailUrl\n    libraryUrl\n    adminUrl\n    companyTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    companyTagStats\n    topicTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    __typename\n  }\n  interviewed {\n    interviewedUrl\n    companies {\n      id\n      name\n      slug\n      __typename\n    }\n    timeOptions {\n      id\n      name\n      __typename\n    }\n    stageOptions {\n      id\n      name\n      __typename\n    }\n    __typename\n  }\n  subscribeUrl\n  isPremium\n  loginUrl\n}\n"
    variables = {"titleSlug": question_slug}
    try:
        response = scrape(query, variables)
        question = response['data']['question']
        similar_questions = json.loads(question['similarQuestions'])
        stats = json.loads(question['stats'])
        data_dict = {
            'questionFrontendId': question['questionFrontendId'],
            'questionTitle': question['questionTitle'],
            'translatedTitle': question['translatedTitle'],
            'TitleSlug': question['questionTitleSlug'], 
            'content': question['content'],
            'translatedContent': question['translatedContent'],
            'difficulty': question['difficulty'],
            'totalAccepted': stats['totalAccepted'],
            'totalSubmission': stats['totalSubmission'],
            'totalAcceptedRaw': stats['totalAcceptedRaw'],
            'totalSubmissionRaw': stats['totalSubmissionRaw'],
            'acRate': stats['acRate'],
            'similarQuestions': [q['titleSlug'] for q in similar_questions],
            'mysqlSchemas': question['mysqlSchemas'],
            'category': question['categoryTitle'],
            'codeDefinition': question['codeDefinition'],
            'sampleTestCase': question['sampleTestCase'],
            'metaData': question['metaData'],
            'envInfo': question['envInfo'],
            'topicTags': [tag['name'] for tag in question['topicTags']],
        }
        data.append(data_dict)
    except requests.exceptions.RequestException as e:
        print(f"{question_slug} | Request failed: {e}")
        continue

In [68]:
df = pd.read_csv('questions_deets.csv')
df

Unnamed: 0,questionFrontendId,questionTitle,translatedTitle,TitleSlug,content,translatedContent,difficulty,totalAccepted,totalSubmission,totalAcceptedRaw,totalSubmissionRaw,acRate,similarQuestions,mysqlSchemas,category,codeDefinition,sampleTestCase,metaData,envInfo,topicTags
0,1,Two Sum,,two-sum,<p>Given an array of integers <code>nums</code...,,Easy,16.8M,30.3M,16787625,30334274,55.3%,"['3sum', '4sum', 'two-sum-ii-input-array-is-so...",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[2,7,11,15]\n9","{\n ""name"": ""twoSum"",\n ""params"": [\n {\n...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Array', 'Hash Table']"
1,2,Add Two Numbers,,add-two-numbers,<p>You are given two <strong>non-empty</strong...,,Medium,5.6M,12.3M,5606520,12261739,45.7%,"['multiply-strings', 'add-binary', 'sum-of-two...",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[2,4,3]\n[5,6,4]","{\n ""name"": ""addTwoNumbers"",\n ""params"": [\n...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Linked List', 'Math', 'Recursion']"
2,3,Longest Substring Without Repeating Characters,,longest-substring-without-repeating-characters,"<p>Given a string <code>s</code>, find the len...",,Medium,7.2M,19.8M,7234473,19794857,36.5%,['longest-substring-with-at-most-two-distinct-...,[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","""abcabcbb""","{ \n ""name"": ""lengthOfLongestSubstring"",\n ""...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Hash Table', 'String', 'Sliding Window']"
3,4,Median of Two Sorted Arrays,,median-of-two-sorted-arrays,<p>Given two sorted arrays <code>nums1</code> ...,,Hard,3.3M,7.6M,3284688,7596612,43.2%,['median-of-a-row-wise-sorted-matrix'],[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[1,3]\n[2]","{\n ""name"": ""findMedianSortedArrays"",\n ""par...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Array', 'Binary Search', 'Divide and Conquer']"
4,5,Longest Palindromic Substring,,longest-palindromic-substring,"<p>Given a string <code>s</code>, return <em>t...",,Medium,3.7M,10.5M,3742977,10545358,35.5%,"['shortest-palindrome', 'palindrome-permutatio...",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","""babad""","{ \n ""name"": ""longestPalindrome"",\n ""params""...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Two Pointers', 'String', 'Dynamic Programming']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3506,2273,Find Resultant Array After Removing Anagrams,,find-resultant-array-after-removing-anagrams,<p>You are given a <strong>0-indexed</strong> ...,,Easy,74.5K,125.9K,74544,125885,59.2%,"['group-anagrams', 'valid-anagram']",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[""abba"",""baba"",""bbaa"",""cd"",""cd""]","{\n ""name"": ""removeAnagrams"",\n ""params"": [\...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Array', 'Hash Table', 'String', 'Sorting']"
3507,2275,Largest Combination With Bitwise AND Greater T...,,largest-combination-with-bitwise-and-greater-t...,<p>The <strong>bitwise AND</strong> of an arra...,,Medium,149.7K,185K,149733,185042,80.9%,['count-number-of-maximum-bitwise-or-subsets'],[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[16,17,71,62,12,24,14]","{\n ""name"": ""largestCombination"",\n ""params""...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Array', 'Hash Table', 'Bit Manipulation', 'C..."
3508,2276,Count Integers in Intervals,,count-integers-in-intervals,<p>Given an <strong>empty</strong> set of inte...,,Hard,22.6K,62.8K,22645,62831,36.0%,"['merge-intervals', 'insert-interval', 'data-s...",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[""CountIntervals"",""add"",""add"",""count"",""add"",""c...","{\n ""classname"": ""CountIntervals"",\n ""constr...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Design', 'Segment Tree', 'Ordered Set']"
3509,2488,Count Subarrays With Median K,,count-subarrays-with-median-k,<p>You are given an array <code>nums</code> of...,,Hard,14.8K,32.9K,14778,32857,45.0%,"['number-of-subarrays-with-bounded-maximum', '...",[],Algorithms,"[{""value"": ""cpp"", ""text"": ""C++"", ""defaultCode""...","[3,2,1,4,5]\n4","{\n ""name"": ""countSubarrays"",\n ""params"": [\...","{""cpp"": [""C++"", ""<p>Compiled with <code> clang...","['Array', 'Hash Table', 'Prefix Sum']"


In [69]:
df = df.drop(columns=['translatedTitle','translatedContent'])

In [70]:
df.to_csv('questions_cleaned.csv', index=False)

In [65]:
slug = 'count-univalue-subtrees'
query = "query getQuestionDetail($titleSlug: String!) {\n  isCurrentUserAuthenticated\n  question(titleSlug: $titleSlug) {\n    questionId\n    questionFrontendId\n    questionTitle\n    translatedTitle\n    questionTitleSlug\n    content\n    translatedContent\n    difficulty\n    stats\n    allowDiscuss\n    contributors {\n      username\n      profileUrl\n      __typename\n    }\n    similarQuestions\n    mysqlSchemas\n    randomQuestionUrl\n    sessionId\n    categoryTitle\n    submitUrl\n    interpretUrl\n    codeDefinition\n    sampleTestCase\n    enableTestMode\n    metaData\n    enableRunCode\n    enableSubmit\n    judgerAvailable\n    infoVerified\n    envInfo\n    urlManager\n    article\n    questionDetailUrl\n    libraryUrl\n    adminUrl\n    companyTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    companyTagStats\n    topicTags {\n      name\n      slug\n      translatedName\n      __typename\n    }\n    __typename\n  }\n  interviewed {\n    interviewedUrl\n    companies {\n      id\n      name\n      slug\n      __typename\n    }\n    timeOptions {\n      id\n      name\n      __typename\n    }\n    stageOptions {\n      id\n      name\n      __typename\n    }\n    __typename\n  }\n  subscribeUrl\n  isPremium\n  loginUrl\n}\n"
variables = {"titleSlug": slug}
response = scrape(query, variables)

In [66]:
response

{'data': {'isCurrentUserAuthenticated': False,
  'question': {'questionId': '250',
   'questionFrontendId': '250',
   'questionTitle': 'Count Univalue Subtrees',
   'translatedTitle': None,
   'questionTitleSlug': 'count-univalue-subtrees',
   'content': None,
   'translatedContent': None,
   'difficulty': 'Medium',
   'stats': '{"totalAccepted": "162.9K", "totalSubmission": "285.5K", "totalAcceptedRaw": 162907, "totalSubmissionRaw": 285484, "acRate": "57.1%"}',
   'allowDiscuss': True,
   'contributors': [],
   'similarQuestions': '[{"title": "Subtree of Another Tree", "titleSlug": "subtree-of-another-tree", "difficulty": "Easy", "translatedTitle": null}, {"title": "Longest Univalue Path", "titleSlug": "longest-univalue-path", "difficulty": "Medium", "translatedTitle": null}]',
   'mysqlSchemas': [],
   'randomQuestionUrl': '/classic/problems/random-one-question/',
   'sessionId': '0',
   'categoryTitle': 'Algorithms',
   'submitUrl': '/problems/count-univalue-subtrees/submit/',
   'i

In [67]:
df.to_csv('questions_deets_droped.csv', index=False)