### **1. Import Dependencies**

In [2]:
import pandas as pd
import numpy as np

import gc
import os
import requests
import json

import concurrent.futures
import asyncio
import httpx
from urllib.parse import urlencode

from typing import List, Dict, Optional
from glob import glob

import joblib

from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier

from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


### **2. Load Data**

In [3]:
def fetch_data(url: str, params=None):
  try:
    response = requests.get(url, params=params)
    if (response.status_code == 200):
      return response.json()
    
    print(f"Error response with status code: {response.status_code}")
  except Exception as error:
    print(f'Failed to fetch data: {error}')

def urls_builder(base_url: str, n_fetch: int, limit: int, products: List[str], **kwargs):
  urls = []
  for product in products:
    for i in range(n_fetch):
      
      param = {
        'offset': i * limit,
        'limit': limit,
        'product': product,
        **kwargs,
      }
      
      full_url = base_url + '?' + urlencode(param)
      urls.append(full_url)
    
  return urls

def save_json(data, path: str):
  with open(path, 'w') as json_file:
    json.dump(data, json_file)

def load_json(path: str):
  with open(path, 'r') as json_file:
    loaded_data = json.load(json_file)
  return loaded_data

In [4]:
selected_fields = ['id', 'duplicates', 'summary', 'status', 'resolution', 'platform', 'product', 'type', 'priority', 'severity', 'component']
products = ['Firefox']

base_params = {
  'include_fields': ', '.join(selected_fields),
}
saved_data_path = os.path.join('data', 'firefox_raw_data.json')

base_url = 'https://bugzilla.mozilla.org/rest/bug'
n_fetch = 250
limit = 10000

if os.path.exists(saved_data_path):
  response_data = load_json(saved_data_path)
else:
  urls = urls_builder(base_url, n_fetch, limit, products, **base_params)
  response_data = []

  max_workers = 100
  with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
    response_data = list(executor.map(fetch_data, urls))
    
  response_data = [item['bugs'] for item in response_data]
  response_data = [item for sublist in response_data for item in sublist]
  
  save_json(response_data, saved_data_path)

In [14]:
df = pd.DataFrame(response_data).set_index('id')
df

Unnamed: 0_level_0,priority,type,status,product,severity,resolution,duplicates,component,platform,summary
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10954,P3,enhancement,RESOLVED,Firefox,normal,WONTFIX,[],Settings UI,All,Dialup properties needs to be exposed in prefs
14871,--,defect,RESOLVED,Firefox,minor,DUPLICATE,[],General,All,[Find] Find whole word only
19118,--,enhancement,RESOLVED,Firefox,normal,WONTFIX,"[32380, 57760, 64040, 66644, 95646, 96695, 106...",Settings UI,All,Plug-In Manager (ui for choosing mimetype-plug...
21482,P3,enhancement,NEW,Firefox,S3,,[],File Handling,All,Improvement to Save File dialog: folder based ...
23207,P5,enhancement,NEW,Firefox,S3,,"[26366, 172317, 177285]",File Handling,Unspecified,"Options in Save As (location of saved images, ..."
...,...,...,...,...,...,...,...,...,...,...
1875963,--,task,NEW,Firefox,--,,[],Address Bar,Unspecified,[meta] Implement Yelp Suggestions
1875966,--,task,ASSIGNED,Firefox,--,,[],Address Bar,Unspecified,Add Yelp-modifiers data for test
1875971,--,enhancement,NEW,Firefox,--,,[],General,Unspecified,Re-add anonymous FTP client as compile option
1875974,--,defect,UNCONFIRMED,Firefox,--,,[],Untriaged,Unspecified,"Bug about ""Open application menu"""
