# Data Mover Notebook 
## Prerequisites:
- Python 3.8
- PyCelonis 2.0 or later

In [14]:
#Please Note: If you see the following error in your console after each exercise, you may simply ignore it:

#Your PyCelonis Version 2.11.0 is outdated (Newest Version: 2.11.1). Please upgrade the package via: pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis pycelonis_core --upgrade


#Steps for this module

#1.run the following command inside of your terminal - !python3.8 -m pip install --upgrade pip 

!python3.8 -m pip install --upgrade pip 
!pip install --upgrade pycelonis

from pathlib import Path 
from pycelonis import get_celonis 
from pycelonis.pql import PQL, PQLColumn 
from pycelonis.ems import ExportType, JobType
import os, pycelonis
print("PyCelonis version:", pycelonis.__version__)


#If you followed this tutorial correctly, you should see something like the following printed in your console after running this module:

#PyCelonis version: 2.11.0

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0m[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0m[31mERROR: Could not find a version that satisfies the requirement pycelonis (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pycelonis

## Step 1: Define Environment Variables
Replace the placeholders with your own details for the source and target environments.


In [10]:
import os, json
from pycelonis import get_celonis
from pathlib import Path

#Steps for this module:

#1. create a json file in the same directory and define the variables in the try block below (note that I have defined my file as config.json)
#2. put the file name in config_path

config_path = Path("config.json")
if config_path.is_file():
    with open(config_path) as f:
        config = json.load(f)
else:
    raise FileNotFoundError("Configuration file 'config.json' not found in the current directory.")

try:
    source_data_pool_id = config["source_data_pool_id"]
    source_data_model_id = config["source_data_model_id"]
    source_url = config["source_url"]
    source_api_token = config["source_api_token"]
    target_data_pool_id = config["target_data_pool_id"]
    target_data_model_id = config["target_data_model_id"]
    target_url = config["target_url"]
    target_api_token = config["target_api_token"]
    data_dir = Path(config.get("data_dir", Path().resolve() / "data"))
except KeyError as e:
    raise KeyError(f"Missing required configuration key: {e}")

print(f"Data directory: {data_dir}")

data_dir.mkdir(parents=True, exist_ok=True)

#If you followed this tutorial correctly, you should see something like the following printed in your console after running this module:

#Data directory: /Users/o.wu/owenfeature/pycelonis-examples/datamoverproject/data


Data directory: /Users/o.wu/owenfeature/pycelonis-examples/datamoverproject/data


## Step 2: Download data from source environment


In [15]:
#this module has no steps - run the module

celonis_from = get_celonis(base_url = source_url, api_token = source_api_token, permissions=False, key_type='APP_KEY') 
datapool_from = celonis_from.data_integration.get_data_pool(source_data_pool_id)
datamodel_from = datapool_from.get_data_model(source_data_model_id)
datamodel_from_tables = datamodel_from.get_tables()

print("Tables in this datamodel:")

for table in datamodel_from_tables:
    print(table.name) 

#if you followed this tutorial correctly, you should see something like the following printed in your console after running this module:

#Tables in this datamodel:
#newairqualityparquet


Your PyCelonis Version 2.11.0 is outdated (Newest Version: 2.11.1). Please upgrade the package via: pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis pycelonis_core --upgrade


Tables in this datamodel:
newairqualityparquet


## Step 3: Insert the tables you want to download

In [12]:
import json
from pathlib import Path


#Steps for this module:

#1. create a file.json in the same directory with a different name from step above (note that I have defined my json file as tables_to_download.json)
#2. input the json file name into the open method
#3. specify the directory under data_dir

with open("tables_to_download.json", "r") as file:
    config = json.load(file)
    tables_from = config["table_names"]

tables_from_detail = []

for i, Table in enumerate(tables_from):
    table = datamodel_from_tables.find(Table)
    if table:
        tables_from_detail.append(table)
        print("Added table: ", table.name)
    else:
        print(f"Table '{Table}' not found in datamodel.")

data_dir = Path("./data")  
data_dir.mkdir(parents=True, exist_ok=True)

def download_table_data(table):
    table_name = table.name
    query = PQL(distinct=False, limit=None)
    for column in table.get_columns():
        print(column)
        col_name = column.name
        if col_name != '_CELONIS_CHANGE_DATE':
            query += PQLColumn(name=col_name, query=f'"{table_name}"."{col_name}"')

    data_export = datamodel_from.create_data_export(query=query, export_type=ExportType.PARQUET)
    data_export.wait_for_execution()

    for i, chunk in enumerate(data_export.get_chunks()):  
        file_path = data_dir / f"{table_name}_{i}.parquet"
        with open(file_path, "wb") as file:
            file.write(chunk.read())
        print(f"Downloaded table: {table_name}, chunk: {i}")

for table in tables_from_detail:
    download_table_data(table) 

#if you followed this tutorial correctly, you should see something like the following printed in your console after running this module:

#Added table: newairqualityparquet



Added table:  newairqualityparquet
name='datetime' length=19 type_='STRING' client=<pycelonis_core.client.client.Client object at 0x137c55b80> data_pool_id='7873300c-ba33-4c61-ac8f-4d912f7c0312' data_model_id='cd425ef4-6815-4360-b155-388d54eea294' table_name='newairqualityparquet' table_alias=None
name='station_antwerp' length=15 type_='FLOAT' client=<pycelonis_core.client.client.Client object at 0x137c55b80> data_pool_id='7873300c-ba33-4c61-ac8f-4d912f7c0312' data_model_id='cd425ef4-6815-4360-b155-388d54eea294' table_name='newairqualityparquet' table_alias=None
name='station_paris' length=15 type_='FLOAT' client=<pycelonis_core.client.client.Client object at 0x137c55b80> data_pool_id='7873300c-ba33-4c61-ac8f-4d912f7c0312' data_model_id='cd425ef4-6815-4360-b155-388d54eea294' table_name='newairqualityparquet' table_alias=None
name='station_london' length=15 type_='FLOAT' client=<pycelonis_core.client.client.Client object at 0x137c55b80> data_pool_id='7873300c-ba33-4c61-ac8f-4d912f7c0312

## Step 6: Upload Data to Target Environment


In [16]:
import os

#Steps for this module:

#1. define the local path where your parquet file inside of parquet_file_path

parquet_file_path = "/Users/o.wu/Downloads/newairqualityparquet.parquet"

celonis_to = get_celonis(base_url=target_url, api_token=target_api_token, permissions=False, key_type='APP_KEY')
datapool_to = celonis_to.data_integration.get_data_pool(target_data_pool_id)
print(datapool_to.id)

#2. define your table names inside of table_names (note that the table I will upload is called "newairqualityparquet")

table_names = ["newairqualityparquet"]  
for table in table_names:
    data_push_job = datapool_to.create_data_push_job(target_name=table, type_=JobType.REPLACE)
    
    with open(parquet_file_path, "rb") as file:
        data_push_job.add_file_chunk(file)
        data_push_job.execute(wait=True)  
    
    print(f"Uploaded table: {table}")


#if you followed this tutorial correctly, you should see something like the following printed in your console after running this module:


#TableID

#Uploaded table: newairqualityparquet



Your PyCelonis Version 2.11.0 is outdated (Newest Version: 2.11.1). Please upgrade the package via: pip install --extra-index-url=https://pypi.celonis.cloud/ pycelonis pycelonis_core --upgrade


28050e78-1a20-417e-9f21-c6185c1505ba
Uploaded table: newairqualityparquet


## Notes
- **User Instructions**: Update variables in each section as per your environment’s requirements.
- **Modularity**: Functions for downloading and uploading tables improve reusability.
- **Logs and Feedback**: Each major step includes feedback, so the user knows the status of each operation.
