### 1. Load packages and set global variables

In [1]:
import os
import pandas as pd
from datetime import datetime as dt
import requests
import json
#import csv
import database as db # our script for easier db integration

# set data directory
data_path = (os.getcwd() + "/Data/")

endpoints = ['deals','organizations','persons','products'] # list all available features
api_token = 'cd42385eac29c54911d1dc82eb2f96eff29b8d9d' # eventually can update to oauth 2.0

# connection to Postgres database
conn = db.connect()

### 2. Extract data from Pipedrive

In [3]:
def get_data(api_token, endpoint):
    
    # define url params
    company_domain = 'https://actdental-c8c401.pipedrive.com/v1/'
    parameter = {'api_token':api_token}
    
    response = requests.get(company_domain + endpoint, params=parameter)
    
    if response.status_code == 200:
        json_data = response.json()['data']
        print('Retrieved {} data from Pipedrive'.format(endpoint))
    else:
        print(response.status_code)
    
    return json_data

In [4]:
def products_to_df(json_data, dtm_to_dte=True):
    # out_file_name='products.csv',
    
    # read product data
    products = pd.json_normalize(json_data)[['id','name','code','description','active_flag','owner_id.id','owner_id.name','add_time','update_time']]

    # rename columns
    products.rename(
        columns={
            'id':'product_id',
            'name':'product_nme',
            'code':'product_cde',
            'description':'product_dsc',
            'active_flag':'active_ind',
            'owner_id.id':'owner_id',
            'owner_id.name':'owner_nme',
            'add_time':'insert_dte',
            'update_time':'update_dte'
        }, inplace=True)

    # read individual price data
    prices = pd.json_normalize(json_data, 'prices')[['id','price','product_id']]

    # rename columns
    prices.rename(
        columns={
            'id':'price_id',
            'price':'price_amt'
        }, inplace=True)


    # join price data back to product -- unnecessary to keep both
    products = products.join(prices.set_index('product_id'), on='product_id')

    # convert to timestamps
    for col in ['insert_dte','update_dte']:
        products[col] = pd.to_datetime(products[col])
        # conditionally strip timestamp
        if dtm_to_dte:
            products[col] = products[col].dt.date
    
    # export to file
    #products.to_csv(data_path + out_file_name, index=False)
    
    # success message
    #print('Data can be found at {}'.format(data_path + out_file_name))
    
    return products

In [5]:
products_json = get_data(api_token, endpoints[3])

Retrieved products data from Pipedrive


In [6]:
product_data = products_to_df(products_json)

In [8]:
# drop and recreate table
db.execute_query(conn, "drop table products")
db.execute_query(conn, "create table products(product_id int primary key, product_nme varchar(50), product_cde varchar(10), product_dsc varchar(100), active_ind boolean, owner_id int, owner_nme varchar(50), insert_dte date, update_dte date, price_id int, price_amt numeric(9,2))")

0

In [10]:
# insert data from dataframe
db.execute_values(conn, product_data, 'products')

execute_values() done


In [11]:
# validate that data is inserted
db.execute_query(conn, 'select product_id from products')

[(1,), (3,), (4,), (5,), (6,), (8,), (9,)]

## New stuff

In [None]:
def paginate(endpoint, my_data=[], limit=1, start=0, api_token='cd42385eac29c54911d1dc82eb2f96eff29b8d9d'):
    
    parameters = {
        'start':start,
        'limit':limit,
        'api_token':api_token
    }
    
    url = 'https://{domain}.pipedrive.com/v1/{endpoint}'.format(domain=domain, endpoint=endpoint)
    
    response = requests.get(url, parameters)
    
    if response.status_code == 200:
        json_data = response.json()
    else:
        print(response.status_code)
    
    my_data.append(json_data['data'])
    
    if json_data['additional_data']['pagination']['more_items_in_collection']:
        paginate(endpoint, my_data, start=json_data['additional_data']['pagination']['next_start'])
    
    return my_data

In [None]:
paginate(endpoints[3])

In [None]:
endpoint = endpoints[3]

# define url params
#company_domain = 'https://actdental-c8c401.pipedrive.com/v1/'
domain = 'actdental-c8c401'
parameter = {'start':0, 'limit':10, 'api_token':api_token}

#response = requests.get(company_domain + endpoint, params=parameter)
response = requests.get('https://{domain}.pipedrive.com/v1/{endpoint}'.format(domain=domain, endpoint=endpoint), params=parameter)

if response.status_code == 200:
    json_data = response.json() #['data']
    print('Retrieved {} data from Pipedrive'.format(endpoint))
else:
    print(response.status_code)

In [None]:
json_data['data'][0]