# Old way to use the APIs directly

In [1]:
from google.oauth2 import service_account

In [2]:
service_account_file = "manav-jit-test-2f1ad5006d9b.json"
credentials = service_account.Credentials.from_service_account_file(
      service_account_file, scopes=['https://www.googleapis.com/auth/cloud-platform'])

In [3]:
from googleapiclient.discovery import build
dp_service = build("datapipelines", "v1", credentials=credentials, cache_discovery=False)

In [4]:
res = dp_service.projects().locations().listPipelines(parent="projects/manav-jit-test/locations/us-central1").execute()

In [5]:
type(res)

dict

In [6]:
res["pipelines"][0]

{'name': 'projects/manav-jit-test/locations/us-central1/pipelines/test-xqhu-word-count-1',
 'displayName': 'test-xqhu-word-count-1',
 'type': 'PIPELINE_TYPE_BATCH',
 'state': 'STATE_ACTIVE',
 'createTime': '2022-11-13T02:48:52.607924Z',
 'lastUpdateTime': '2022-11-13T02:48:52.607924Z',
 'workload': {'dataflowLaunchTemplateRequest': {'projectId': 'manav-jit-test',
   'gcsPath': 'gs://dataflow-templates/latest/Word_Count',
   'launchParameters': {'jobName': 'test-xqhu-word-count-1',
    'parameters': {'inputFile': 'gs://dataflow-samples/shakespeare/kinglear.txt',
     'output': 'gs://pydf_test_bucket/results/output'},
    'environment': {'tempLocation': 'gs://pydf_test_bucket/results'}},
   'location': 'us-central1'}}}

In [7]:
res["pipelines"][0]["displayName"]

'test-xqhu-word-count-1'

# New way to manage data pipelines

In [8]:
%load_ext autoreload
%autoreload 2

In [9]:
from df import models as dm
from df import dataflow as flow

In [10]:
dw = flow.Dataflow(project_id="manav-jit-test", location_id="us-central1")

In [12]:
# list all data pipelines
dps = dw.list_data_pipelines()

In [13]:
dps

[DataPipeline(short_name='test-word-count', name='projects/manav-jit-test/locations/us-central1/pipelines/test-word-count', display_name='test-word-count', type='PIPELINE_TYPE_BATCH', state='STATE_ACTIVE'),
 DataPipeline(short_name='test-xqhu', name='projects/manav-jit-test/locations/us-central1/pipelines/test-xqhu', display_name='test-xqhu', type='PIPELINE_TYPE_BATCH', state='STATE_ARCHIVED'),
 DataPipeline(short_name='syndeo-streaming-test', name='projects/manav-jit-test/locations/us-central1/pipelines/syndeo-streaming-test', display_name=None, type='PIPELINE_TYPE_STREAMING', state='STATE_ACTIVE'),
 DataPipeline(short_name='abc8', name='projects/manav-jit-test/locations/us-central1/pipelines/abc8', display_name='abc8', type='PIPELINE_TYPE_BATCH', state='STATE_ACTIVE'),
 DataPipeline(short_name='abc7', name='projects/manav-jit-test/locations/us-central1/pipelines/abc7', display_name='abc7', type='PIPELINE_TYPE_BATCH', state='STATE_ACTIVE'),
 DataPipeline(short_name='abc4', name='proje

In [14]:
dps[0]._api_results

{'name': 'projects/manav-jit-test/locations/us-central1/pipelines/test-word-count',
 'displayName': 'test-word-count',
 'type': 'PIPELINE_TYPE_BATCH',
 'state': 'STATE_ACTIVE',
 'createTime': '2022-11-13T01:38:19.929693Z',
 'lastUpdateTime': '2022-11-13T01:38:58.249208Z',
 'workload': {'dataflowLaunchTemplateRequest': {'projectId': 'manav-jit-test',
   'gcsPath': 'gs://dataflow-templates-us-central1/latest/Word_Count',
   'launchParameters': {'jobName': 'test-word-count',
    'parameters': {'output': 'gs://tmp_xqhu',
     'inputFile': 'gs://tmp_xqhu/kinglear.txt'},
    'environment': {'tempLocation': 'gs://tmp_xqhu/temp/'}},
   'location': 'us-central1'}},
 'scheduleInfo': {'schedule': '40 20 * * *',
  'timeZone': 'America/New_York',
  'nextJobTime': '2023-02-12T01:40:00.098290Z'},
 'jobCount': 91}

In [15]:
# create a new data pipeline using the data pipeline builder
from df import options as op
word_count_dp = op.WordCountDataPipeline(short_name="test-xqhu-word-count-2",
    input_file="gs://dataflow-samples/shakespeare/kinglear.txt",
    output_file="gs://pydf_test_bucket/results/output",
    temp_location="gs://pydf_test_bucket/results")


In [16]:
word_count_dp

WordCountDataPipeline(short_name='test-xqhu-word-count-2', scheduler='15 * * * *', time_zone='America/New_York', name=None, display_name='test-xqhu-word-count-2', type=None, state=None, gcs_path='gs://dataflow-templates/latest/Word_Count', input_file='gs://dataflow-samples/shakespeare/kinglear.txt', output_file='gs://pydf_test_bucket/results/output', temp_location='gs://pydf_test_bucket/results')

In [17]:
# the payload body for the create API
word_count_dp.body(dw)

{'name': 'projects/manav-jit-test/locations/us-central1/pipelines/test-xqhu-word-count-2',
 'displayName': 'test-xqhu-word-count-2',
 'type': 'PIPELINE_TYPE_BATCH',
 'workload': {'dataflowLaunchTemplateRequest': {'projectId': 'manav-jit-test',
   'gcsPath': 'gs://dataflow-templates/latest/Word_Count',
   'launchParameters': {'jobName': 'test-xqhu-word-count-2',
    'parameters': {'output': 'gs://pydf_test_bucket/results/output',
     'inputFile': 'gs://dataflow-samples/shakespeare/kinglear.txt'},
    'environment': {'tempLocation': 'gs://pydf_test_bucket/results'}},
   'location': 'us-central1'}},
 'scheduleInfo': {'schedule': '15 * * * *', 'timeZone': 'America/New_York'}}

In [18]:
new_dp = dw.create_data_pipeline(word_count_dp)

In [19]:
new_dp.raw_results

{'name': 'projects/manav-jit-test/locations/us-central1/pipelines/test-xqhu-word-count-2',
 'displayName': 'test-xqhu-word-count-2',
 'type': 'PIPELINE_TYPE_BATCH',
 'state': 'STATE_ACTIVE',
 'createTime': '2023-02-11T15:13:11.940683Z',
 'lastUpdateTime': '2023-02-11T15:13:11.940683Z',
 'workload': {'dataflowLaunchTemplateRequest': {'projectId': 'manav-jit-test',
   'gcsPath': 'gs://dataflow-templates/latest/Word_Count',
   'launchParameters': {'jobName': 'test-xqhu-word-count-2',
    'parameters': {'output': 'gs://pydf_test_bucket/results/output',
     'inputFile': 'gs://dataflow-samples/shakespeare/kinglear.txt'},
    'environment': {'tempLocation': 'gs://pydf_test_bucket/results'}},
   'location': 'us-central1'}},
 'scheduleInfo': {'schedule': '15 * * * *',
  'timeZone': 'America/New_York',
  'nextJobTime': '1970-01-01T00:00:00Z'}}

In [20]:
# remove this data pipeline
new_dp.delete()