# Get prospects in the recycle bin


## Configuration
We need some login details, we can pull in a config file or set some variables.
https://towardsdatascience.com/keeping-credentials-safe-in-jupyter-notebooks-fbd215a8e311

Provide a file name in "config_file" or provide values in the individual variables. The config file will override the listed variables.

If we have a list of export IDs we wish to use, we can use those.

In [None]:
cfg = {'pardot_api_creds':{}} # dictionary of configruation details, ignore this here

#provide a value for 

config_file = "pardotlogin.ipynb" 

# OR all of the below

cfg['pardot_api_creds']['PardotOath_Client_id'] = ""
cfg['pardot_api_creds']['PardotOath_client_secret'] = ""
cfg['pardot_api_creds']['PardotOath_username'] = ""
cfg['pardot_api_creds']['PardotOath_password'] = ""
cfg['pardot_api_creds']['PardotOath_token'] = ""

cfg['pardot_api_creds']['PardotURL'] = ""
cfg['pardot_api_creds']['apiversion'] = ""



exportids=[] # list of ids to use, leave blank to autocreate  [1264,1266,1268]


## Code Setup
Making sure we have login details, getting a Pardot API library, logging in, and also setting some number formatting so our IDs read well.

In [None]:
# get the login details

if config_file:
    """Ovrride any manually entered config above"""
    %run $config_file


# Pull in a Pardot API common library
%run ./PardotAPI.ipynb


#Log into Pardot
access_token = PardotLogin()

pd.options.display.float_format = '{:.0f}'.format # Get rid of adding trailing zeros to our numbers

## Create an export of all the Prospects in the recycle bin
Because the export has a limit of 1 year of data per export, we get the Pardot account creation date and create an export file for every year the account has existed. Easy to forget to get all the years when a new year comes to be.

Note that the filter Procedure Arguments '"deleted": True,' is what allows us to get data from the recycle bin

In [None]:
PardotAccount = PardotAccountRead()

if not exportids:
    years = range(int(PardotAccount['account']['created_at'][ 0 : 4 ]),2021+1) #stops before the specified number
    years_list = list(years)

    for i in years_list:

        url = cfg['pardot_api_creds']['PardotURL'] +"/api/export/version/"+ str(cfg['pardot_api_creds']['apiversion']) +"/do/create?format=json"

        payload = json.dumps({
          "object": "Prospect",
          "fields": [
            "id",
            "crm_lead_fid",
            "crm_contact_fid",
            "is_email_hard_bounced",
            "email_bounced_at",
            "last_activity_at",
            "score",
            "first_assigned_at",
            "crm_owner_fid",
            "crm_last_sync",
            "is_archived",
            "updated_at",
          ],
          "procedure": {
            "name": "filter_by_updated_at",
            "arguments": {
              "deleted": True,
              "updated_after": str(i)+"-01-01 00:00:00",
              "updated_before": str(i)+"-12-31 23:59:59"
            }
          }
        })
        headers = {
          'Content-Type': 'application/json',
          'Authorization': 'Bearer ' + access_token,
          'Pardot-Business-Unit-Id': cfg['pardot_api_creds']['PardotBusinesUnitID'],
        }

        response = requests.request("POST", url, headers=headers, data=payload)
        response_dict = json.loads(response.text)
        exportids.append(response_dict['export']['id'])

        #print(response.text)

### Read the export
Loop the export results and get data.
Note that there is a number of commented out options. We often need to get different things, and it's easy to uncomment the thing that we want, and comment out those that we don't so we don't need to look eveyrthing up again.

In [None]:
df = pd.DataFrame() # Pandas Datafame library  - import pandas as pd 

#object = "email_template_id" # "form_id" "email_template_id"
filelimit = None #1 # None or an int - This is used to limit impact while extending code so we aren't sucking in ALL the data everytime
filecount = 0

for exportid in exportids:
    print (exportid)
    FileURLs = PardotGetExportFilesURLList()    
    #print(FileURLs)
    for fileURL in FileURLs:
        print(fileURL)
        temp = PardotDownloadFileURL(fileURL)
        #temp = temp[temp.email_template_id.notnull()]
        #temp = temp[temp.email_id.notnull()]
        
        #temp = temp[temp[object].notnull()]
        #temp = temp.filter(['details', 'campaign_id', object, 'email_id'])
        #temp.drop_duplicates(subset =['details', object, "campaign_id"], inplace = True)     

        df = pd.concat([df, temp], axis=0)
        #df.drop_duplicates(subset =['details', object, "campaign_id"], inplace = True)     
        
        filecount +=1
        if filelimit:
            if filelimit <= filecount:
                break
    if filelimit:
        if filelimit <= filecount:
            break

df.shape
#temp

## Reports
Some pretty graphs because we can.

In [None]:
import matplotlib.pyplot as plt
# if using a Jupyter notebook, include:
%matplotlib inline

# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = [ str(df.count()['id'] - df.count()['crm_lead_fid'] - df.count()['crm_contact_fid'])+' Prospects', str(df.count()['crm_lead_fid'])+' Leads', str(df.count()['crm_contact_fid'])+' Contacts']
sizes = [df.count()['id'] - df.count()['crm_lead_fid'] - df.count()['crm_contact_fid'] , df.count()['crm_lead_fid'] , df.count()['crm_contact_fid'] ]


fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%')
ax.axis('equal')  # Equal aspect ratio ensures the pie chart is circular.
ax.set_title('Deleted Record Types')


plt.show()


# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = [ str(df.count()['id'] - df.count()['is_email_hard_bounced'] )+' No Bounces', str(df.count()['is_email_hard_bounced'])+' Bounced']
sizes = [df.count()['id'] - df.count()['is_email_hard_bounced']  , df.count()['is_email_hard_bounced']  ]


fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%')
ax.axis('equal')  # Equal aspect ratio ensures the pie chart is circular.
ax.set_title('Deleted Record Bounced')


plt.show()

## Safe file folder creation
Create folders if we need to automatically

In [None]:
# Taken from https://stackoverflow.com/a/600612/119527
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else: raise

def safe_open_w(path):
    ''' Open "path" for writing, creating any parent directories as needed.
    '''
    mkdir_p(os.path.dirname(path))
    return open(path, 'w', encoding='utf-8')

## Write out data

In [None]:
dt = date.today().strftime( '%Y-%m-%d')
AccountName = PardotAccount['account']['company'].replace(" ", "")


with safe_open_w(f'output/DeletedOwnership_{AccountName}_{dt}.csv') as csvfile:
 # creating a csv writer object 
    csvwriter = csv.writer(csvfile)    
    # writing the headers 
    csvwriter.writerow(['Record Owner','Leads ','Contacts']) 
        
    # writing the data rows 
    #csvwriter.writerows(ownership)
    df.to_csv(csvfile, header=False, index=False)
    
            
print("Complete - Records: ",len(df))