# OSSR Statistics

In [None]:
from eossr.api.ossr import get_ossr_records
from eossr.api.zenodo import search_records
from IPython.display import Markdown as md
from datetime import date
import itertools
import matplotlib.pyplot as plt

In [None]:
plt.style.use('seaborn-v0_8-colorblind')

In [None]:
import matplotlib.pyplot as plt
from collections import Counter
from wordcloud import WordCloud

def create_pie_chart(occurrences, ax=None):
    # Get the counts of each occurrence
    counts = Counter(occurrences)

    ax = plt.gca() if ax is None else ax

    # Create the pie chart
    ax.pie(list(counts.values()), labels=list(counts.keys()), autopct='%1.1f%%', startangle=90)

    # Show the chart
    return ax


def create_word_cloud(words):
    # Generate a word frequency dictionary
    word_freq = {}
    for word in words:
        word = word.upper()
        word_freq[word] = word_freq.get(word, 0) + 1

    # Create a word cloud object
    wordcloud = WordCloud(width=1000, height=600, background_color='white', stopwords=set()).generate_from_frequencies(word_freq)

    # Plot the word cloud
    fig = plt.figure(figsize=(10, 6), facecolor=None)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.tight_layout(pad=0)
    return fig


In [None]:
title = f"## OSSR statistics generated the {date.today()}\n"

ossr_records = get_ossr_records()
stats_names = ['downloads', 'unique_downloads', 'views', 'unique_views', ]
sum_stats = {key: sum([int(rec.data['stats'][key]) for rec in ossr_records]) for key in stats_names}

text = title + "\n" + f"### There are {len(ossr_records)} records in the OSSR.\n"

for n in sum_stats:
    text += f"| {n.replace('_', ' ')} "
text += "| \n"
for n in sum_stats:
    text += "|----"
text += "| \n"
for k, v in sum_stats.items():
    text += f"| {v} "
text += "| \n"

In [None]:
md(text)

In [None]:
escape2020_community_records = search_records(communities='escape2020')

url_escape2020_not_ossr = 'https://zenodo.org/communities/escape2020/search?page=1&size=20&q=&type=publication&type=lesson&type=poster'
text = f"**Note that there are also {len(escape2020_community_records) - len(ossr_records)} records in the `escape2020` community that are not software or datasets.**\n\n"
text += f"You may find them [directly on Zenodo]({url_escape2020_not_ossr})"


In [None]:
md("## Distribution of licenses in the OSSR")

In [None]:
licenses = [record.metadata['license']['id'] for record in ossr_records if 'license' in record.metadata]

plt.figure(figsize=(7,7))
create_pie_chart(licenses);

In [None]:
md("## Cloud of OSSR keywords")

In [None]:
keywords = [record.metadata['keywords'] for record in ossr_records if 'keywords' in record.metadata]
keywords = list(itertools.chain(*keywords))

In [None]:
create_word_cloud(keywords);

In [None]:
md(text)