In [None]:
!pip install transformers



# obtain a list of the top-20 "Text Classification" and top-20 "Text Generation" models on https://huggingface.co/, ranked based on popularity

In [None]:
import csv
from huggingface_hub import HfApi

# Function to get all models with a specific tag
def get_models_by_tag(tag):
    hf_api = HfApi(endpoint="https://huggingface.co")
    all_models = hf_api.list_models()
    tag_models = [model for model in all_models if tag.lower() in [model_tag.lower() for model_tag in model.tags]]
    return tag_models

# Get all "text classification" models
text_classification_models = get_models_by_tag("text-classification")

# Sort models by downloads in descending order
text_classification_models_sorted = sorted(text_classification_models, key=lambda x: x.downloads, reverse=True)

# Prepare data for CSV
csv_data = [["Rank", "Model ID", "Downloads"]]
for i, model in enumerate(text_classification_models_sorted[:20]):
    csv_data.append([i + 1, model.modelId, model.downloads])

# Save to CSV file
csv_file_path = "top_text_classification_models.csv"
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(csv_data)

print(f"CSV file saved at: {csv_file_path}")


CSV file saved at: top_text_classification_models.csv


 # obtain and compare the number of ML apps ("spaces") for each "Text Classification" and "Text Generation" model obtained in step 1

In [None]:
!pip install requests




In [None]:
!pip install requests beautifulsoup4




## obtain and compare the number of ML apps ("spaces") for each "Text Classification"

In [None]:
import requests
from bs4 import BeautifulSoup


# Function to get the number of articles for a model
def get_num_articles(classification_model_name):
    url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    response = requests.get(url)

    if response.status_code == 200 and 'text/html' in response.headers['content-type']:
        try:
            html_content = response.text
            soup = BeautifulSoup(html_content, 'html.parser')

            # Find the div with class "grid grid-cols-1 gap-x-4 gap-y-6 md:grid-cols-3 xl:grid-cols-4"
            class_div = soup.find('div', class_='grid grid-cols-1 gap-x-4 gap-y-6 md:grid-cols-3 xl:grid-cols-4')

            if class_div:
                # Find all articles under the div
                articles = class_div.find_all('article', class_='')

                # Return the number of articles
                return len(articles)
            else:
                return 0
        except Exception as e:
            print(f"Error: {e}")
            return 0
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return 0

# Function to get all models with a specific tag
def get_models_by_tag(tag):
    hf_api = HfApi(endpoint="https://huggingface.co")
    all_models = hf_api.list_models()
    tag_models = [model for model in all_models if tag.lower() in [model_tag.lower() for model_tag in model.tags]]
    return tag_models

# Get all "text classification" models
text_classification_models = get_models_by_tag("text-classification")

# Sort models by downloads in descending order
text_classification_models_sorted = sorted(text_classification_models, key=lambda x: x.downloads, reverse=True)

# Save model names in an array
classification_model_names = []

# Display information about "text classification" models and get the number of articles for each model
for i, model in enumerate(text_classification_models_sorted[:20]):
    classification_model_names.append(model.modelId)
    modified_url = f"https://huggingface.co/spaces?sort=likes&search={model.modelId}"

    # Fetch the number of articles for the current model
    num_articles = get_num_articles(model.modelId)

    print(f"{i+1}. {model.modelId} - Downloads: {model.downloads}")
    print(f"   Tags: {', '.join(model.tags)}")
    print(f"   Last Modified: {model.lastModified}")
    print(f"   Number of apps: {num_articles}\n")

# Now model_names contains the names of the first 10 text classification models
print("Model names array:", classification_model_names)


1. distilbert-base-uncased-finetuned-sst-2-english - Downloads: 8486425
   Tags: transformers, pytorch, tf, rust, onnx, safetensors, distilbert, text-classification, en, dataset:sst2, dataset:glue, arxiv:1910.01108, doi:10.57967/hf/0181, license:apache-2.0, model-index, autotrain_compatible, endpoints_compatible, has_space, region:us
   Last Modified: None
   Number of apps: 16

2. cardiffnlp/twitter-roberta-base-irony - Downloads: 7177328
   Tags: transformers, pytorch, tf, jax, roberta, text-classification, en, dataset:tweet_eval, arxiv:2010.12421, autotrain_compatible, endpoints_compatible, has_space, region:us
   Last Modified: None
   Number of apps: 2

3. lxyuan/distilbert-base-multilingual-cased-sentiments-student - Downloads: 7053643
   Tags: transformers, pytorch, safetensors, distilbert, text-classification, sentiment-analysis, zero-shot-distillation, distillation, zero-shot-classification, debarta-v3, en, ar, de, es, fr, ja, zh, id, hi, it, ms, pt, dataset:tyqiangz/multiling

## csv for classification model

In [None]:
print(classification_model_names)

['distilbert-base-uncased-finetuned-sst-2-english', 'cardiffnlp/twitter-roberta-base-irony', 'lxyuan/distilbert-base-multilingual-cased-sentiments-student', 'mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis', 'SamLowe/roberta-base-go_emotions', 'marieke93/MiniLM-evidence-types', 'Ashishkr/query_wellformedness_score', 'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli', 'cardiffnlp/twitter-roberta-base-sentiment', 'facebook/bart-large-mnli', 'cardiffnlp/twitter-roberta-base-sentiment-latest', 'nlptown/bert-base-multilingual-uncased-sentiment', 'cardiffnlp/twitter-xlm-roberta-base-sentiment', 'papluca/xlm-roberta-base-language-detection', 'ProsusAI/finbert', 'cross-encoder/ms-marco-TinyBERT-L-2-v2', 'cross-encoder/ms-marco-MiniLM-L-4-v2', 'martin-ha/toxic-comment-model', 'alexandrainst/scandi-nli-large', 'laiyer/deberta-v3-base-prompt-injection']


### Classification model: mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis "]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps4.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps4.csv' has been created with model names, app numbers, and combined names.


### Classification model: SamLowe/roberta-base-go_emotions

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["SamLowe/roberta-base-go_emotions"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps5.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps5.csv' has been created with model names, app numbers, and combined names.


### classification model: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps6.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps6.csv' has been created with model names, app numbers, and combined names.


### Classification model:cardiffnlp/twitter-roberta-base-sentiment

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["cardiffnlp/twitter-roberta-base-sentiment"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps7.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps7.csv' has been created with model names, app numbers, and combined names.


### classification model: facebook/bart-large-mnli

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["facebook/bart-large-mnli"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps8.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps8.csv' has been created with model names, app numbers, and combined names.


### classification model: cardiffnlp/twitter-roberta-base-sentiment-latest

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["cardiffnlp/twitter-roberta-base-sentiment-latest"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps9.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps9.csv' has been created with model names, app numbers, and combined names.


### classification model: nlptown/bert-base-multilingual-uncased-sentiment

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["nlptown/bert-base-multilingual-uncased-sentiment"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps10.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps10.csv' has been created with model names, app numbers, and combined names.


### classification model: cardiffnlp/twitter-xlm-roberta-base-sentiment

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["cardiffnlp/twitter-xlm-roberta-base-sentiment"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps11.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps11.csv' has been created with model names, app numbers, and combined names.


### classification model: ProsusAI/finbert

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to get the combined names for all apps associated with a model and their numbers
def get_combined_names_for_model(classification_model_name):
    base_url = f"https://huggingface.co/spaces?sort=likes&search={classification_model_name}"
    classification_combined_names = []

    def extract_names_from_page(url):
        response = requests.get(url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            try:
                html_content = response.text
                soup = BeautifulSoup(html_content, 'html.parser')

                # Find all h4 elements without specifying a class
                h4_elements = soup.find_all('h4')

                # Find all a elements with class "truncate font-mono text-sm text-black"
                a_elements = soup.find_all('a', class_='truncate font-mono text-sm text-black')

                # Iterate through each pair of h4 and a elements and combine the names
                for index, (h4_element, a_element) in enumerate(zip(h4_elements, a_elements), start=1):
                    app_name = h4_element.text.strip()
                    creator_name = a_element.text.strip()
                    classification_combined_name = f"{creator_name}/{app_name}"
                    classification_combined_names.append({'Model': classification_model_name, 'App Number': index, 'Combined Name': classification_combined_name})

            except Exception as e:
                print(f"Error: {e}")

    # Extract names from the first page
    extract_names_from_page(base_url)

    # Extract names from subsequent pages
    page_number = 1
    while True:
        next_page_url = f"{base_url}&p={page_number}"
        response = requests.get(next_page_url)
        if response.status_code == 200 and 'text/html' in response.headers['content-type']:
            extract_names_from_page(next_page_url)
            page_number += 1
        else:
            break

    return classification_combined_names

# Example list of model names
classification_model_names = ["ProsusAI/finbert"]

# Create a list to store model names and associated apps
classification_model_apps_list = []

# Iterate through each model and get combined names for all apps
for classification_model_name in classification_model_names:
    classification_combined_names = get_combined_names_for_model(classification_model_name)
    if classification_combined_names:
        classification_model_apps_list.extend(classification_combined_names)

# Save the data to a CSV file
csv_file_path = 'classification_model_apps12.csv'
fieldnames = ['Model', 'App Number', 'Combined Name']

with open(csv_file_path, mode='w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(classification_model_apps_list)

print(f"CSV file '{csv_file_path}' has been created with model names, app numbers, and combined names.")


CSV file 'classification_model_apps12.csv' has been created with model names, app numbers, and combined names.


### combining all classification models apps.

In [None]:
import pandas as pd
import glob

# Specify the path where your CSV files are located
csv_files_path = '/content/*.csv'


# Use glob to get a list of all CSV files in the specified path
csv_files = glob.glob(csv_files_path)

# Create an empty DataFrame to store the combined data
combined_df = pd.DataFrame()

# Iterate through each CSV file and concatenate the data
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    combined_df = pd.concat([combined_df, df], ignore_index=True)

# Save the combined data to a new CSV file
combined_csv_file_path = '/content/all_classification_model_apps.csv'
combined_df.to_csv(combined_csv_file_path, index=False)

print(f"Combined CSV file '{combined_csv_file_path}' has been created.")


Combined CSV file '/content/all_classification_model_apps.csv' has been created.


# obtain and compare the source code size of the ML apps ("spaces") obtained in step 2 (HINT: check the "Files" tab at the top-right of a given space's page)

## classification model apps size

In [None]:
import csv
import requests
from bs4 import BeautifulSoup

# Function to extract size from HTML content
def get_size_from_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    byte_sum = 0
    for tag in soup.find_all(text=True):
        if 'byte' in tag.lower():
            try:
                byte_value = int(tag.split()[0])
                byte_sum += byte_value
            except ValueError:
                pass
    return byte_sum

# Function to fetch HTML content of a given URL
def get_html_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None

# Read CSV file
csv_file_path = '/content/all_classification_model_apps.csv'  # Replace with the path to your CSV file
output_csv_file_path = 'classification_app_size.csv'  # New CSV file for app sizes

with open(csv_file_path, 'r') as file, open(output_csv_file_path, 'w', newline='') as output_file:
    # Create CSV writer
    fieldnames = ['classification_Model', 'App Name', 'App Size (bytes)']
    writer = csv.DictWriter(output_file, fieldnames=fieldnames)

    # Write header to the output file
    writer.writeheader()

    reader = csv.DictReader(file)

    # Iterate through each row in the CSV
    for row in reader:
        model_name = row['Model']
        combined_name = row['App Name']

        # Replace spaces with dashes
        formatted_combined_name = combined_name.replace(' ', '-')

        # Generate the URL
        url = f'https://huggingface.co/spaces/{formatted_combined_name}/tree/main'
        print(url)

        # Fetch HTML content
        html_content = get_html_content(url)

        if html_content is not None:
            # Extract size from HTML
            app_size = get_size_from_html(html_content)

            # Write to the output file
            writer.writerow({'classification_Model': model_name, 'App Name': combined_name, 'App Size (bytes)': app_size})

            # Print or store the result as needed
            print(f'classification_Model: {model_name}, App Name : {combined_name}, App Size: {app_size} bytes')
        else:
            print(f'Error fetching content for Model: {model_name}, Combined Name: {combined_name}')


https://huggingface.co/spaces/happyhaplu/MoritzLaurer-DeBERTa-V3-Base-Mnli-Fever-Anli/tree/main


  for tag in soup.find_all(text=True):


classification_Model: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli, App Name : happyhaplu/MoritzLaurer DeBERTa V3 Base Mnli Fever Anli, App Size: 367 bytes
https://huggingface.co/spaces/Jofthomas/MoritzLaurer-DeBERTa-V3-Base-Mnli-Fever-Anli/tree/main
classification_Model: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli, App Name : Jofthomas/MoritzLaurer DeBERTa V3 Base Mnli Fever Anli, App Size: 369 bytes
https://huggingface.co/spaces/Avatarofhemant/MoritzLaurer-DeBERTa-V3-Base-Mnli-Fever-Anli/tree/main
classification_Model: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli, App Name : Avatarofhemant/MoritzLaurer DeBERTa V3 Base Mnli Fever Anli, App Size: 366 bytes
https://huggingface.co/spaces/awacke1/ZeroShotClassifiers-Facebook-Bart-Large-Mnli/tree/main
classification_Model: facebook/bart-large-mnli, App Name : awacke1/ZeroShotClassifiers Facebook Bart Large Mnli, App Size: 360 bytes
https://huggingface.co/spaces/awacke1/Zero-Shot-Classification-Facebook-Bart-Large-Mnli/tree/main
classifica

In [92]:
import pandas as pd

# Load data
df = pd.read_csv('/content/classification_app_size.csv')
app_size_stats = df['App Size (bytes)'].describe()
print(app_size_stats)

count     149.000000
mean      372.409396
std       122.584555
min         0.000000
25%       331.000000
50%       370.000000
75%       375.000000
max      1291.000000
Name: App Size (bytes), dtype: float64
