In [None]:
# Remove the # to install the dependencies on your system
!pip install pyalex semanticscholar habanero itables pandas

In [None]:
import requests
import pandas as pd
from itables import init_notebook_mode
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import pyalex
pyalex.config.email = "ly0@protonmail.com"
from habanero import counts

from semanticscholar import SemanticScholar
sch = SemanticScholar()

URL = "https://ost.ecosyste.ms/api/v1/projects?reviewed=true&per_page=2000"
FILE_TO_SAVE_AS = "ecosystems_repository_downloads.json" # the name you want to save file as

pages=20
resp = requests.get(URL) # making requests to server

#all_data = {}
#for page in range(1, pages + 1):
#    data = requests.get(URL.format(page)).json()
#    all_data.extend(data)
    
    
#for page in range(1, pages + 1):
#    print("Page:",page)
#    data = requests.get(URL.format(page)).json()
#    if 'status' in data:
#        break
#    all_data = {**all_data, **data[0]}

with open(FILE_TO_SAVE_AS, "wb") as f: # opening a file handler to create new file 
    f.write(resp.content) # writing content to file

In [None]:
df = pd.read_json(resp.content.decode())

In [None]:
len(df)

In [None]:
names = []
download_counts = []
url = []
description = []
category = []
sub_category = []
language = []
docker_downloads = []
doi = []
citations = []
total_citations = []


for index, row in df.iterrows():
    names.append(row['name'])
    package_downloads = 0
    docker_download_count = 0
    for package_manager in range(len(row['packages'])):
        if row['packages'][package_manager]['downloads']:
            if row['packages'][package_manager]['downloads_period'] == "last-month":
                package_downloads += row['packages'][package_manager]['downloads']
        if row['packages'][package_manager]['docker_downloads_count']:
            docker_download_count = row['packages'][package_manager]['docker_downloads_count']
        else:
            docker_download_count = 0
    download_counts.append(package_downloads)
    docker_downloads.append(docker_download_count)
    url.append(row['url'])
    description.append(row['description'])
    category.append(row['category'])
    sub_category.append(row['sub_category'])
    language.append(row['language'])
    total_citations.append(row['total_citations'])
    try:
        #print(next(iter(row['citation_counts'].values())))
        # Get the citations of the first DOI found
        citations.append(next(iter(row['citation_counts'].values())))
        doi.append(next(iter(row['citation_counts'].keys())))
        
        # Get the citations of the last DOI found
        #citations.append(next(reversed(row['citation_counts'].values())))
        #doi.append(next(reversed(row['citation_counts'].keys())))
    except:
        #print("none")
        citations.append(None)
        doi.append(None)
    #citations_ecosystems.append(row['repository']['metadata']['files']['citation'])
    #if row['readme_doi_urls']:
        #doi_split = row['readme_doi_urls'][0].rsplit('/')
        #doi = doi_split[-2] +"/"+ doi_split[-1]
        #print(doi)
        #try:
            #cites = counts.citation_count(doi = doi) 
            #citations.append(cites)
            #print("Citations found via crossref:",cites)
        #except:
            #citations.append(None)
            #print("Not found via Crossref")
        #try:
        #    paper = sch.get_paper(doi)
        #    print("Citations found via Semantic Scholar",paper.citationCount)
        #except:
        #    print("Not found via Semantic Scholar")

        #try:
        #    paper = sch.get_paper(doi)
        #    print("Citations found via OpenAlex: ",Works()[row['readme_doi_urls'][0]]["cited_by_count"])
        #except:
        #    print("Not found via OpenAlex")
    #else:
        #citations.append(None)
        #print("no DOI found")


In [None]:
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)

df_extract = pd.DataFrame()
df_extract['project_names'] = names
df_extract['download_counts'] = download_counts
df_extract['citations'] = citations
df_extract['total_citations'] = total_citations
df_extract['doi'] = doi
df_extract['sub_category'] = sub_category
df_extract['git_url'] = url
df_extract['description'] = description
df_extract['category'] = category
df_extract['sub_category'] = sub_category
df_extract['language'] = language
df_extract['docker_downloads'] = docker_downloads
#df_extract['citations_ecosystems'] = citations_ecosystems
init_notebook_mode(all_interactive=True)
df_extract


In [None]:
init_notebook_mode(all_interactive=False)


In [None]:
import textwrap

def text_to_link(project_name, git_url):
    return '<a href="' + git_url + '" target="_blank" style = "color: black">' + str(project_name) + "</a>"

def text_to_bolt(topic):
    return "<b>" + topic + "</b>"

def line_break_text(text):
    wrapped_text = '<br>'.join(textwrap.wrap(text, 64))
    return wrapped_text
                               
df_extract["description"] = df_extract.apply(
    lambda x: line_break_text(x.description), axis=1
)                             
                               
                               
df_extract["project_names"] = df_extract.apply(
    lambda x: text_to_link(x.project_names, x.git_url), axis=1
)
df_extract

In [None]:
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px

#df_extract = df_extract[df_extract["category"] == "Renewable Energy"]

number_of_projects_to_show = 300
top_downloaders = df_extract.nlargest(number_of_projects_to_show, "download_counts")
top_downloaders.index.name = "ranking"
color_discrete_sequence = px.colors.qualitative.Vivid
fig = px.bar(
    top_downloaders,
    x=top_downloaders["download_counts"],
    y=top_downloaders["project_names"],
    custom_data=["project_names", "download_counts", "docker_downloads", "git_url", "description","category","sub_category","language",top_downloaders.index+1],
    orientation="h",
    color = 'category',
    color_discrete_sequence=color_discrete_sequence,
)

fig.update_layout(
    height=number_of_projects_to_show*20,  # Added parameter
    #width=700,
    xaxis_title="",
    yaxis_title=None,
    title="Package downloads in the last month",
    dragmode=False,
)

fig.add_layout_image(
    dict(
        xref="paper",
        yref="paper",
        x=1,
        y=0,
        sizex=0.10,
        sizey=0.10,
        xanchor="right",
        yanchor="bottom",
    )
)
fig.update_layout(hovermode="y unified",hoverdistance=1000) 

fig.update_traces(
    hovertemplate="<extra></extra>"+"<br>".join(
        [   
            "Ranking: <b>%{customdata[8]}</b>",
            "Description: <b>%{customdata[4]}</b>",
            "Sub Category: <b>%{customdata[6]}</b>",
            "Language: <b>%{customdata[7]}</b>",
            "Downloads per month: <b>%{customdata[1]}</b>",
            #"Docker Downloads: <b>%{customdata[2]}</b>",
            "Category: <b>%{customdata[5]}</b>",

        ]
    )
                  
)

#fig.update(layout_showlegend=False)

fig.update_layout(modebar=dict(bgcolor="rgba(0,0,0,0)"))

config = {
  'toImageButtonOptions': {
    'format': 'png', # one of png, svg, jpeg, webp
  },
  'responsive':'true'
}
fig.update_layout(
    plot_bgcolor='white'
)

fig.update_layout(modebar_color="#009485", modebar_activecolor="#2563eb")
fig["layout"]["yaxis"]["autorange"] = "reversed"

fig.update_xaxes(showspikes=False)  # <-- add this line
fig.update_yaxes(showspikes=False)  # <-- add this line
#fig.update_layout(paper_bgcolor = "rgba(0,0,0,0)",
#                  plot_bgcolor = "rgba(0,0,0,0)")

#fig.update_layout(barmode='stack', xaxis={'categoryorder':'total ascending'})
fig.update_layout(xaxis_type = "log",yaxis_categoryorder = 'total descending',legend_title=None, xaxis={'side': 'top'}, )
fig.show(config=config)

pio.write_json(fig,"download_plot.json")
pio.write_html(fig,"download_plot.html")