In [3]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# Define a schema and print
schema = pa.schema([
    ('Name', pa.string()),
    ('Age', pa.int32()),
    ('City', pa.string())
])

# Sample data
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
}

# Create a Pandas DataFrame
df = pd.DataFrame(data)

# Convert to PyArrow Table and write to Parquet
table = pa.Table.from_pandas(df, schema=schema)
parquet_file = 'example_with_schema.parquet'
pq.write_table(table, parquet_file)

# Read back and convert to DataFrame
table_read = pq.read_table(parquet_file)
df_read = table_read.to_pandas()

# ===== NEW: Add CSV Export Functionality =====
csv_file = 'people_data.csv'
df_read.to_csv(csv_file, index=False)
print(f"\nData successfully exported to CSV: {csv_file}")

# ===== NEW: Generate Ontop Mapping File =====
mapping_content = f"""@prefix : <http://example.org/ontology#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

mapping:Person a rr:TriplesMap;
    rr:logicalTable [
        rr:tableName "{csv_file}"
    ];
    
    rr:subjectMap [
        rr:template "http://example.org/person/{{Name}}";
        rr:class :Person
    ];
    
    rr:predicateObjectMap [
        rr:predicate :name;
        rr:objectMap [ rr:column "Name" ]
    ];
    
    rr:predicateObjectMap [
        rr:predicate :age;
        rr:objectMap [ rr:column "Age"; rr:datatype xsd:integer ]
    ];
    
    rr:predicateObjectMap [
        rr:predicate :livesIn;
        rr:objectMap [ rr:column "City" ]
    ].
"""

mapping_file = 'mapping.ttl'
with open(mapping_file, 'w') as f:
    f.write(mapping_content)
print(f"Ontop mapping file created: {mapping_file}")

# ===== Example SPARQL Queries =====
sparql_examples = f"""
# Example SPARQL Queries you can run with Ontop:

# 1. Get all people
SELECT ?person ?name ?age WHERE {{
    ?person a :Person ;
            :name ?name ;
            :age ?age .
}}

# 2. Filter by age
SELECT ?name WHERE {{
    ?person a :Person ;
            :name ?name ;
            :age ?age .
    FILTER (?age > 30)
}}

# 3. Count people by city
SELECT ?city (COUNT(?person) as ?count) WHERE {{
    ?person a :Person ;
            :livesIn ?city .
}}
GROUP BY ?city
"""

print("\nSPARQL Query Examples:")
print(sparql_examples)


Data successfully exported to CSV: people_data.csv
Ontop mapping file created: mapping.ttl

SPARQL Query Examples:

# Example SPARQL Queries you can run with Ontop:

# 1. Get all people
SELECT ?person ?name ?age WHERE {
    ?person a :Person ;
            :name ?name ;
            :age ?age .
}

# 2. Filter by age
SELECT ?name WHERE {
    ?person a :Person ;
            :name ?name ;
            :age ?age .
    FILTER (?age > 30)
}

# 3. Count people by city
SELECT ?city (COUNT(?person) as ?count) WHERE {
    ?person a :Person ;
            :livesIn ?city .
}
GROUP BY ?city

