In [None]:
# Run the following to install the MarkLogic Python client.
# %pip install marklogic_python_client

# Create an instance of the MarkLogic Python client, pointing at the out-of-the-box Documents database.

from marklogic import Client
client = Client("http://localhost:8000", digest=("python-user", "pyth0n"))

In [None]:
# Insert a MarkLogic TDE view to project rows from documents in the "employee" collection.

from marklogic.documents import Document
tde_view = {
    "template": {
        "context": "/",
        "collections": ["employee"],
        "rows": [{
            "schemaName": "example",
            "viewName": "employee",
            "columns": [
                {"name": "lastName", "scalarType": "string", "val": "Surname"},
                {"name": "firstName", "scalarType": "string", "val": "GivenName"},
                {"name": "state", "scalarType": "string", "val": "State"},
                {"name": "department", "scalarType": "string", "val": "Department"},
                {"name": "salary", "scalarType": "int", "val": "BaseSalary"}
            ]
        }]
    }
}

client.documents.write(
    Document(
        "/tde/employees.json", tde_view, 
        permissions={"rest-reader": ["read", "update"]}, 
        collections=["http://marklogic.com/xdmp/tde"]
    ),
    params={"database": "Schemas"}
)

In [None]:
# Load 500 JSON documents into the "employee" collection.

from marklogic.documents import Document, DefaultMetadata
import requests
import json
r = requests.get('https://raw.githubusercontent.com/marklogic/marklogic-spark-connector/master/src/test/resources/500-employees.json')

docs = [
    DefaultMetadata(permissions={"rest-reader": ["read", "update"]}, collections=["employee"])
]

for employee in json.loads(r.text):
    docs.append(Document(employee['uri'], json.dumps(employee['value'])))

client.documents.write(docs)

In [None]:
# Can use MarkLogic's Optic query language with the view.

client.rows.query("op.fromView('example', 'employee', '').limit(3)")["rows"]

In [None]:
# Can use SQL queries with the view.

client.rows.query(sql="select * from example.employee order by lastName limit 3")["rows"]

In [None]:
# Can use GraphQL queries with the view.

client.rows.query(graphql="query myQuery { example_employee { lastName firstName } }")

In [None]:
# Can return data as CSV for integration with pandas.

import io
import pandas

csv_data = client.rows.query("op.fromView('example', 'employee', '')", format="csv")
df = pandas.read_csv(io.StringIO(csv_data))
df


In [None]:
# Install matplotlib to visualize data.

%matplotlib inline

In [None]:
# Simple bar chart showing the count of each department.

df['department'].value_counts().plot(kind='bar')

In [None]:
# Can use MarkLogic Spark connector with Python.
# First create a Spark session that has access to the MarkLogic Spark connector jar file.

import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars "/Users/rudin/marklogic-spark-connector-2.2.0.jar" pyspark-shell'

%pip install pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").appName('My Notebook').getOrCreate()
spark.sparkContext.setLogLevel("WARN")
spark

In [None]:
# Create a Spark DataFrame via a MarkLogic Optic query.

df = spark.read.format("marklogic") \
    .option("spark.marklogic.client.uri", "python-user:pyth0n@localhost:8000") \
    .option("spark.marklogic.read.opticQuery", "op.fromView('example', 'employee', '')") \
    .load()

df.show()