In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install tqdm
!pip install ipywidgets

In [None]:
!pip install SPARQLWrapper

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Specify the Australian Curriculum Endpoint
sparql = SPARQLWrapper("https://rdf.australiancurriculum.edu.au/api/sparql")

In [None]:
#########################################################################################
# WARNING: The following is not working because the australian curriculum endpoint expects
# the query parameter for the query to be called 'q', when most SPARQL implements use `query` :-(
#########################################################################################
sparql.setQuery("""
    PREFIX asn: <http://purl.org/ASN/schema/core/>

    SELECT ?concept (COUNT(*) AS ?frequency)
    WHERE {
        ?subject asn:conceptTerm ?concept
    } 
    GROUP BY ?concept
    ORDER BY DESC(?frequency)
    LIMIT 10
""")

# Convert results to JSON format
sparql.setReturnFormat(JSON)
result = sparql.query()

# The return data contains "bindings" (a list of dictionaries)
for hit in result["results"]["bindings"]:
    # We want the "value" attribute of the "concept" field
    print(hit["concept"]["value"])

## Read concepts from dataset
The dataset was created by running the following query against the sparqltest page at
[https://rdf.australiancurriculum.edu.au/sparqltest](https://rdf.australiancurriculum.edu.au/sparqltest)

Using the following query

```
PREFIX asn: <http://purl.org/ASN/schema/core/>

SELECT ?concept (COUNT(*) AS ?frequency)
WHERE {
    ?subject asn:conceptTerm ?concept
} 
GROUP BY ?concept
ORDER BY DESC(?frequency)
```

In [None]:
import json

with open('/kaggle/input/lakathon21oer-predictionaus-curriculum-concepts/concept_results.json') as json_file:
    data = json.load(json_file)

In [None]:
bindings = data['results']['bindings']

In [None]:
concept_frequencies = ((element['concept']['value'], int(element['frequency']['value'])) for element in bindings)

In [None]:
data = pd.DataFrame(concept_frequencies, columns=['concept_uri', 'frequency'])

In [None]:
data.dtypes

In [None]:
from matplotlib import rcParams

# figure size in inches
rcParams['figure.figsize'] = 11.7,8.27

In [None]:
sns.displot(data, x="frequency", height=10, aspect=2)

In [None]:
!pip install rdflib

## Load preferred label for each concept

In [None]:
from rdflib import Graph, URIRef

def format_concept(row):
    uri = row['concept_uri']
    g = Graph()
    g.load(f'{uri}.rdf')
    pref_label = g.preferredLabel(URIRef(uri), lang="en")
    if len(pref_label) > 0:
        (pred_uri, label) = pref_label[0]
        return label.value
    return ''

In [None]:
# from tqdm.notebook import tqdm, trange
from tqdm import tqdm, notebook
import time

tqdm.pandas()

In [None]:
data['label'] = data.progress_apply(lambda row : format_concept(row), axis=1)

In [None]:
data

## Save concepts & labels to file

In [None]:
data.to_csv('aus_curriculum_concept_labels.csv')

## Trying some RFDLib functions to get other labels

In [None]:
from rdflib.namespace import SKOS

g1 = Graph()
g1.load('http://vocabulary.curriculum.edu.au/scot/15075.rdf')
    
for o in g1.objects(subject=None, predicate=SKOS.altLabel):
    print(o)

In [None]:
for t in g1.triples((None, SKOS.altLabel, None)):
    (s, p, o) = t
    print(type(o))
    print(o)
#     print(t)

In [None]:
for t in g1.triples((None, SKOS.altLabel, None)):
    (s, p, o) = t
    print(type(o))
    print(o.language)
    print(t)

In [None]:
for t in g1.triples((None, SKOS.scopeNote, None)):
    (s, p, o) = t
    print(type(o))
    print(o.language)
    print(t)