In [15]:
import requests as r 
from recommender.main import get_memory_per_model_and_tgi, get_tgi_memory, get_quantization_type, get_max_prompt_length

headers = {
  "Authorization": "Bearer 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"
}

def check_for_gate(model_id):
  url = f"https://huggingface.co/api/models/{model_id}"
  response = r.get(url).json()
  error = response.get("error",None)
  if error and "gate" in error:
    return True
  
  return False


def get_tgi_models_and_parse(limit=250,type="likes30d",filter="text-generation-inference"):
  url=f"https://huggingface.co/api/models?sort={type}&direction=-1&filter={filter}&limit={limit}"
  # url = "https://huggingface.co/api/models/meta-llama/Llama-2-7b-chat-hf"
  # response = [r.get(url, headers=headers).json()]
  response = r.get(url, headers=headers).json()
  # map, filter list to remove gguf 
  filtered_models=[]
  for model in response:
    try:
      # backlist models which leads to crashes on my mac
      if model["id"] in ["LargeWorldModel/LWM-Text-Chat-1M","LargeWorldModel/LWM-Text-1M","LargeWorldModel/LWM-Text-512K", "LargeWorldModel/LWM-Chat-512K"]:
        continue
      
      # remove gguf models
      if "gguf" in model["tags"]:
        continue
      
      # check for gate
      gated = check_for_gate(model["id"])    
      
      # get license 
      license_value = next((tag.split(':', 1)[1] for tag in model["tags"] if tag.startswith('license:')), "N/A")
      
      # model size
      if not gated:
        # remove quantized models for now
        quantization_type = get_quantization_type(model_id=model["id"])
        if quantization_type:
          continue
        
        memory = get_memory_per_model_and_tgi(model["id"],8192,"float16")
        memory = memory['real_memory_in_gigabytes']
      else:
        memory=-1
            
      # model size   
      filtered_models.append({
        "model_id": model["id"],
        "url": f"https://huggingface.co/{model['id']}",
        "cotaniner": "PyTorch TGI GPU",
        "approx. min. required memory": f"{memory}GB",
        "license": license_value,
        "gated": gated,
        "private": model["private"],
        "likes": model["likes"],
        "likes30d": model["likes30d"],
        "downloads": model["downloads"],
      })
    except Exception as e:
      print(e)
      print(f"Error parsing model {model['id']}")
      continue
  return filtered_models

def get_emb_models_and_parse(limit=25,type="likes30d",filter="sentence-transformers"):
  url=f"https://huggingface.co/api/models?sort={type}&direction=-1&filter={filter}&limit={limit}"
  response = r.get(url, headers=headers).json()
  # supported TEI architectures
  architectures = ["bert", "roberta", "xlm-roberta","nomic_bert"]
  # map, filter list to remove gguf 
  filtered_models=[]
  for model in response:
    try:
      # backlist models which leads to crashes on my mac
      if model["id"] in []:
        continue
      # filter TEI supported architectures
      if not any(architecture in model["tags"] for architecture in architectures):
        continue
      
      # get license 
      license_value = next((tag.split(':', 1)[1] for tag in model["tags"] if tag.startswith('license:')), "N/A")
      
      # check for gate
      gated = check_for_gate(model["id"])    
                  
      # model size   
      filtered_models.append({
        "model_id": model["id"],
        "url": f"https://huggingface.co/{model['id']}",
        "cotaniner": "PyTorch TEI CPU/GPU",
        "license": license_value,
        "gated": gated,
        "private": model["private"],
        "likes": model["likes"],
        "likes30d": model["likes30d"],
        "downloads": model["downloads"],
      })
    except Exception as e:
      print(e)
      print(f"Error parsing model {model['id']}")
      continue
  return filtered_models  
  

In [6]:
response = get_tgi_models_and_parse()


Loading pretrained config for `mistralai/Mixtral-8x7B-Instruct-v0.1` from `transformers`...
Loading pretrained config for `bigcode/starcoder2-15b` from `transformers`...
Loading pretrained config for `BioMistral/BioMistral-7B` from `transformers`...
Loading pretrained config for `mistralai/Mistral-7B-Instruct-v0.2` from `transformers`...
Loading pretrained config for `NousResearch/Genstruct-7B` from `transformers`...
Loading pretrained config for `CohereForAI/aya-101` from `transformers`...
Loading pretrained config for `microsoft/phi-2` from `transformers`...
Loading pretrained config for `mistralai/Mistral-7B-v0.1` from `transformers`...
Loading pretrained config for `01-ai/Yi-9B` from `transformers`...
Loading pretrained config for `NousResearch/Hermes-2-Pro-Mistral-7B` from `transformers`...
Loading pretrained config for `abacusai/Smaug-72B-v0.1` from `transformers`...
Loading pretrained config for `mistralai/Mixtral-8x7B-v0.1` from `transformers`...
Loading pretrained config for `

config.json: 100%|██████████| 730/730 [00:00<00:00, 177kB/s]


Loading pretrained config for `ibivibiv/alpaca-dragon-72b-v1` from `transformers`...


In [8]:
from datasets import Dataset 

ds = Dataset.from_list(response)

ds.to_csv("tgi_models.csv")


Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 253.25ba/s]


29353

In [16]:
embedding = get_emb_models_and_parse(limit=50)


In [19]:
len(embedding)

40

In [20]:
from datasets import Dataset 

emb_ds = Dataset.from_list(embedding)

emb_ds.to_csv("emb_models.csv")


Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 522.78ba/s]


5575