<a href="https://colab.research.google.com/github/rwong-current/colab/blob/main/Export_Zendesk_Chat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime, timedelta, date
import requests
import json

from google.colab import drive
drive.mount('/content/drive')

basedir = '/content/drive/My Drive/Colab Notebooks/'
wkdir = '/content/drive/My Drive/Colab Notebooks/'
datadir = '/content/drive/My Drive/Colab Notebooks/data/'

pd.options.display.max_columns = None
pd.options.display.float_format = '{:,.2f}'.format

Mounted at /content/drive


In [3]:
# load helper functions
utils = basedir.replace(' ','\ ') + 'utils/'
# %run -i {utils}zendesk.ipynb
# %run -i {utils}pdf.ipynb
%run -i {utils}colab.ipynb

get_ipython().events.register('pre_run_cell', resize_colab_cell)

In [4]:
import json
import requests
import math

class zendesk:
  def __init__(self):
    '''
    Returns dataframe of ticket details and messages given list of ticket IDs
    Run with method 'execute_order_66'
    '''
    print("V1")

    self.base_get_request = 'https://current.zendesk.com/api/v2/'
    self.username = 'ryan.wong@current.com'
    self.token = 'KDBftsLgOjyCB7vefyfq4X0i6M5lWKC9FnUwmrYy'

    self.ticket_details_columns = None
    self.ticket_messages_columns = None

  def chunk(self, list_to_chunk, max_chunk_size):
    assert isinstance(list_to_chunk, list)

    for i in range(0, len(list_to_chunk), max_chunk_size):
      yield list_to_chunk[i:i+max_chunk_size]

  def get_request(self, url):
    response = requests.request(
      "GET",
      url,
      auth=(self.username+'/token', self.token),
      headers={ "Content-Type": "application/json" }
    )

    return json.loads(response.text)

  def get_ticket_categories(self):
    url = self.base_get_request + "ticket_fields/360009607114"
    categories = self.get_request(url)['ticket_field']['custom_field_options']

    category_dict = { category['value']: category['name'] for category in categories }
    return category_dict

  def get_ticket_details(self, ticket_id_list):
    df = pd.DataFrame() # dataframe to return
    category_dict = self.get_ticket_categories()

    id_to_field = {
      360012410073: "CUID",
      360012410133: "Escalation Notes (Be Specific)",
      360044806533: "Total time spent (sec)",
      360044806553: "Time spent last update (sec)",
      4417248920731: "Received via",
      4417296967067: "Dispute Escalation Category",
      24234731253531: "Intent confidence",
      24234700067611: "Sentiment",
      24234700077083: "Sentiment confidence",
      24234731238811: "Intent",
      360009607114: "Category"
    }
    ids = [str(x) for x in ticket_id_list] if isinstance(ticket_id_list, list) else [str(ticket_id_list)]

    for chunk in self.chunk(ids, 100):
      url = self.base_get_request + f'tickets/show_many?ids={"%2C".join(chunk)}'
      tickets = self.get_request(url)['tickets']

      for ticket in tickets:
        ticket_details = {
          'Ticket ID': [ticket['id']],
          'Submitted': [ticket['created_at']],
          'Description': [ticket['description']],
          'Requester ID': [ticket['requester_id']],
          'Priority': [ticket['priority'].title()],
          'Status': [ticket['status'].title()]
        }

        for field in ticket['custom_fields']:
          field_name = id_to_field.get(field['id'])
          if field_name:
            if field_name == 'Category':
              ticket_details[field_name] = [category_dict[field['value']]]
            else:
              ticket_details[field_name] = [(field['value'].replace('\u200b','')
                                                        .replace('\n','<br>')
                                                        .replace('_',' ')
                                                        .title()
                                                        .replace(field_name.title()+'  ',''))]

        df = pd.concat([df, pd.DataFrame.from_dict(ticket_details)], ignore_index=True)

    return df

  def get_messages(self, ticket_id_list):

    df = pd.DataFrame() # dataframe to return

    ids = [str(x) for x in ticket_id_list] if isinstance(ticket_id_list, list) else [str(ticket_id_list)]

    for chunk in self.chunk(ids, 1):
      url = self.base_get_request + f'tickets/{"%2C".join(chunk)}/audits'
      audits = self.get_request(url)['audits']

      for audit in audits:
        if audit['author_id'] != -1 and audit['events'][0]['type'] == 'Comment':
          attachment_urls = []
          for attachment in audit['events'][0]['attachments']:
            attachment_urls.append(attachment['content_url'])

          audit_details = {
            'Ticket ID': [audit['ticket_id']],
            'Author ID':[audit['author_id']],
            'Message': [audit['events'][0]['html_body'].replace('&nbsp;','').replace('\ufeff','').replace('\n','')],
            'Attachment': [attachment_urls],
            'Message At': [audit['created_at']]
          }

          df = pd.concat([df, pd.DataFrame.from_dict(audit_details)], ignore_index=True)

    return df

  def get_users(self, user_id_list):

    df = pd.DataFrame() # dataframe to return
    ids = [str(x) for x in user_id_list] if isinstance(user_id_list, list) else [str(user_id_list)]

    for chunk in self.chunk(ids, 100):
      url = self.base_get_request + f'users/show_many?ids={"%2C".join(chunk)}'
      users = self.get_request(url)['users']

      for user in users:
        user_details = {
            'User ID': [user['id']],
            'Name': [user['name']],
            'Email': [user['email']],
            'Phone': [user['phone']]
        }

        df = pd.concat([df, pd.DataFrame.from_dict(user_details)], ignore_index=True)

    return df

  def execute_order_66(self, ticket_id_list):

    df = pd.DataFrame() # dataframe to return

    ticket_details = self.get_ticket_details(ticket_id_list)
    messages = self.get_messages(ticket_id_list)

    users = self.get_users(list(set(ticket_details['Requester ID'].to_list() + messages['Author ID'].to_list())))

    ticket_details = (ticket_details.merge(users, how='left', left_on='Requester ID', right_on='User ID')
                                  .drop(columns=['User ID'])
                                  .rename(columns = {col: 'Requester '+col for col in users.columns}))
    messages = (messages.merge(users, how='left', left_on='Author ID', right_on='User ID')
                                  .drop(columns=['User ID'])
                                  .rename(columns = {col: 'Author '+col for col in users.columns}))

    self.ticket_details_columns = ticket_details.columns
    self.ticket_messages_columns = messages.columns

    df = ticket_details.merge(messages, how='left', on='Ticket ID').reset_index(drop=True)

    return df

<IPython.core.display.Javascript object>

In [5]:
test = zendesk()
df = test.execute_order_66([11230128])
df

<IPython.core.display.Javascript object>

V1


Unnamed: 0,Ticket ID,Submitted,Description,Requester ID,Priority,Status,Category,CUID,Escalation Notes (Be Specific),Total time spent (sec),Time spent last update (sec),Received via,Dispute Escalation Category,Intent confidence,Sentiment,Sentiment confidence,Intent,Requester Name,Requester Email,Requester Phone,Author ID,Message,Attachment,Message At,Author Name,Author Email,Author Phone
0,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,24640724660123,"<div class=""zd-comment"" dir=""auto"">I gathered ...",[https://current.zendesk.com/attachments/token...,2024-06-09T22:30:54Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
1,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1264993065390,"<div class=""zd-comment"" dir=""auto"">Hi Caller,<...",[],2024-06-10T11:49:21Z,John D.,john.dalumpines@team.shoreoutsourcing.com,
2,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1264993065390,"<div class=""zd-comment"" dir=""auto"">Hi Carlos,<...",[],2024-06-10T11:49:40Z,John D.,john.dalumpines@team.shoreoutsourcing.com,
3,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,24640724660123,"<div class=""zd-comment"" dir=""auto""><p dir=""aut...",[https://current.zendesk.com/attachments/token...,2024-06-10T12:03:32Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
4,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,24640724660123,"<div class=""zd-comment"" dir=""auto""><p dir=""aut...",[],2024-06-10T12:04:07Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
5,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1267003494309,"<div class=""zd-comment"" dir=""auto"">Hi Carlos,<...",[],2024-06-12T12:46:50Z,MJ,mark.delacruz@team.shoreoutsourcing.com,
6,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1267003494309,"<div class=""zd-comment"" dir=""auto"">CUID: FRHar...",[https://current.zendesk.com/attachments/token...,2024-06-12T12:47:41Z,MJ,mark.delacruz@team.shoreoutsourcing.com,


In [6]:
test.ticket_details_columns

<IPython.core.display.Javascript object>

Index(['Ticket ID', 'Submitted', 'Description', 'Requester ID', 'Priority',
       'Status', 'Category', 'CUID', 'Escalation Notes (Be Specific)',
       'Total time spent (sec)', 'Time spent last update (sec)',
       'Received via', 'Dispute Escalation Category', 'Intent confidence',
       'Sentiment', 'Sentiment confidence', 'Intent', 'Requester Name',
       'Requester Email', 'Requester Phone'],
      dtype='object')

In [75]:
# !pip install reportlab
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem, Table, TableStyle
from reportlab.graphics.shapes import Drawing, Line
from reportlab.lib.units import inch
from bs4 import BeautifulSoup, NavigableString, Tag
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics

class generate_ticket_transcript_pdf:
  def __init__(self, chat_details, chat_messages, pdf_path):
    self.chat_details = chat_details # pandas dataframe, one column for each field required
    self.chat_messages = chat_messages # pandas dataframe, each row is a message
    self.doc = SimpleDocTemplate(pdf_path, pagesize=letter)
    self.styles = getSampleStyleSheet()
    self.pdf_elements = [] # elements to build pdf

    self.custom_style = ParagraphStyle(
        'Custom',
        parent=self.styles['Normal'],
        fontSize=12,
        leading=14,
        spaceAfter=12,
        spaceBefore=12,
    )

    pdfmetrics.registerFont(TTFont('Helvetica-Oblique', basedir+'fonts/Helvetica-Oblique.ttf'))

  def create_dotted_line(self, width, dash_length=1, space_length=1):
    drawing = Drawing(width, 1)
    line = Line(0, 0, width, 0)
    line.strokeDashArray = [dash_length, space_length]
    drawing.add(line)
    return drawing

  def process_html_in_dataframe(self, df):
    processed_data = []
    for index, row in df.iterrows():
        processed_row = []
        for item in row:
            elements = []
            item_clean = str(item).replace('<Br>','<br/>')
            if isinstance(item_clean, str) and ('<' in item_clean and '>' in item_clean):
                soup = BeautifulSoup(item_clean, 'html.parser')
                for tag in soup:
                    pdf_element_length = len(self.pdf_elements)
                    self.process_tag(tag, self.custom_style)
                    if len(self.pdf_elements) > pdf_element_length: self.pdf_elements.pop()
                if elements:
                    processed_row.append(elements[0])
                else:
                    processed_row.append(Paragraph(str(item_clean), self.custom_style))
            else:
                processed_row.append(Paragraph(str(item_clean), self.custom_style))
        processed_data.append(processed_row)

    return processed_data

  # Calculate the width of the content in each cell
  def calculate_cell_width(self, data):
    '''
    Max cell width: 4in
    Min cell width: 1in
    Table width: 8in
    8 characters = 1in
    Returns 2d array with cell widths, in multiples of inches (72pixels)
    '''

    # get width of cell with most data for each column
    num_cols = len(data[0])
    widest_row = dict(zip(list(range(num_cols)),[0]*num_cols))

    for row in data:
      for index, cell in enumerate(row):
        if len(str(cell.text)) > widest_row[index]: widest_row[index] = len(str(cell.text))

    # Assign column width based on widest cell for each column
    cell_widths_all = []
    cell_widths = []

    for width in widest_row.values():
      cell_width = min(4, math.ceil(width/8))
      if sum(cell_widths) + cell_width <= 8:
        cell_widths.append(cell_width)
      elif sum(cell_widths) + cell_width <= 10 and sum(cell_widths) != 8:
        cell_widths.append(8-sum(cell_widths))
        cell_widths_all.append(cell_widths)
        cell_widths = []
      else:
        cell_widths_all.append(cell_widths)
        cell_widths = [cell_width]

    if len(cell_widths) > 0: cell_widths_all.append(cell_widths)

    return [[j*inch for j in i] for i in cell_widths_all]

  # Function to convert DataFrame to a list of ReportLab Table objects
  def df_to_tables(self, df):
      processed_data = self.process_html_in_dataframe(df)
      cell_widths = self.calculate_cell_width(processed_data)
      tables = []

      header_style = ParagraphStyle(
          'Header',
          parent=self.styles['Normal'],
          fontName='Helvetica-Bold',
          fontSize=12,
          leading=14,
          spaceAfter=12,
          spaceBefore=12,
      )

      # Split the DataFrame into sub-DataFrames based on the number of columns that fit
      current_idx = 0
      for grouping in cell_widths:
        data = [row[current_idx:current_idx + len(grouping)] for row in processed_data]
        header = [Paragraph(col, header_style) for col in df.columns[current_idx:current_idx + len(grouping)]]
        table_data = [header] + data
        table = Table(table_data, colWidths=grouping)
        table.setStyle(TableStyle([
            ('BACKGROUND', (0, 0), (-1, -1), colors.white),
            ('TEXTCOLOR', (0, 0), (-1, -1), colors.black),
            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
            ('GRID', (0, 0), (-1, -1), 0, colors.white),  # Invisible grid
        ]))
        tables.append(table)
        tables.append(Spacer(1, 0.2 * inch))  # Add space between tables
        current_idx += len(grouping)

      self.pdf_elements.extend(tables)

  # Handle html tags
  def process_tag(self, tag, parent_style):
        if isinstance(tag, NavigableString):
            self.pdf_elements.append(Paragraph(str(tag), parent_style))
        elif isinstance(tag, Tag):
            if tag.name == 'p':
                content = ''
                for child in tag.children:
                    if isinstance(child, NavigableString):
                        content += str(child)
                    elif child.name == 'br':
                        content += '<br />'
                    else:
                        content += str(child)
                self.pdf_elements.append(Paragraph(content, self.custom_style))
            elif tag.name == 'h1':
                h1_style = ParagraphStyle(
                    'Heading1',
                    parent=self.styles['Heading1'],
                    fontSize=24,
                    spaceAfter=18,
                    spaceBefore=12,
                )
                self.pdf_elements.append(Paragraph(tag.text, h1_style))
            elif tag.name == 'h2':
                h2_style = ParagraphStyle(
                    'Heading2',
                    parent=self.styles['Heading2'],
                    fontSize=18,
                    spaceAfter=12,
                    spaceBefore=8,
                )
                self.pdf_elements.append(Paragraph(tag.text, h2_style))
            elif tag.name == 'i':  # Handle italicized text
              italicized_style = ParagraphStyle(
                  'Italicized',
                  fontSize=12,
                  spaceAfter=12,
                  spaceBefore=12,
                  fontName = 'Helvetica-Oblique'
              )
              self.pdf_elements.append(Paragraph(tag.text, italicized_style))
            elif tag.name == 'b':  # Handle bold text
              bold_style = ParagraphStyle(
                  'Bold',
                  fontSize=12,
                  spaceAfter=12,
                  spaceBefore=12,
                  fontName = 'Helvetica-Bold'
              )
              self.pdf_elements.append(Paragraph(tag.text, bold_style))
            elif tag.name == 'ul':
                list_items = []
                for child in tag.children:
                    if child.name == 'li':
                        list_items.append(ListItem(Paragraph(child.text, self.custom_style)))
                ul = ListFlowable(list_items, bulletType='bullet', start='circle')
                self.pdf_elements.append(ul)
            else:
                for child in tag.children:
                    self.process_tag(child, parent_style)


  def build_pdf(self):

    # Chat Details
    self.df_to_tables(self.chat_details)
    self.pdf_elements.append(self.create_dotted_line(self.doc.width, dash_length=1, space_length=0))

    for index, row in self.chat_messages.iterrows():

      # Author name
      self.pdf_elements.append(Paragraph(
        str(row['Author Name']),
        ParagraphStyle(
          'Bold',
          fontSize=12,
          spaceAfter=12,
          spaceBefore=12,
          fontName = 'Helvetica-Bold'
        )
      ))

      # Message timestamp
      self.pdf_elements.append(Paragraph(
        row['Message At'],
        ParagraphStyle(
          'Italicize',
          fontSize=12,
          spaceAfter=12,
          spaceBefore=12,
          fontName = 'Helvetica-Oblique'
        )
      ))

      # Message Body
      soup = BeautifulSoup(row['Message'], 'html.parser')
      for div in soup.find_all('div'): # remove div tags
        div.unwrap()
      for a in soup.find_all('a'): # remove a tags
        a.unwrap()
      for tag in soup:
        self.process_tag(tag, self.custom_style)

      # Attachments
      if len(row['Attachment']) > 0:
        self.df_to_tables(pd.DataFrame({'Attachments':row['Attachment']}))

      # Dotted line
      self.pdf_elements.append(Spacer(1, 0.2 * inch))  # Add some space before the line
      self.pdf_elements.append(self.create_dotted_line(self.doc.width, 0))
      self.pdf_elements.append(Spacer(1, 0.2 * inch))  # Add some space after the line

    self.doc.build(self.pdf_elements)


<IPython.core.display.Javascript object>

In [73]:
cmon = generate_ticket_transcript_pdf(cd, cm, wkdir+"yer.pdf")

<IPython.core.display.Javascript object>

In [74]:
cmon.build_pdf()

<IPython.core.display.Javascript object>

In [70]:
cmon.calculate_cell_width(cmon.process_html_in_dataframe(cd))

<IPython.core.display.Javascript object>

[[72.0, 216.0, 288.0],
 [144.0, 72.0, 72.0, 288.0],
 [288.0, 288.0],
 [72.0, 72.0, 144.0, 288.0],
 [72.0, 72.0, 72.0, 288.0, 72.0],
 [216.0, 288.0, 72.0],
 [144.0]]

In [16]:
cd = df[test.ticket_details_columns].drop_duplicates()
cd

<IPython.core.display.Javascript object>

Unnamed: 0,Ticket ID,Submitted,Description,Requester ID,Priority,Status,Category,CUID,Escalation Notes (Be Specific),Total time spent (sec),Time spent last update (sec),Received via,Dispute Escalation Category,Intent confidence,Sentiment,Sentiment confidence,Intent,Requester Name,Requester Email,Requester Phone
0,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757


In [17]:
cm = df[test.ticket_messages_columns]
cm

<IPython.core.display.Javascript object>

Unnamed: 0,Ticket ID,Author ID,Message,Attachment,Message At,Author Name,Author Email,Author Phone
0,11230128,24640724660123,"<div class=""zd-comment"" dir=""auto"">I gathered ...",[https://current.zendesk.com/attachments/token...,2024-06-09T22:30:54Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
1,11230128,1264993065390,"<div class=""zd-comment"" dir=""auto"">Hi Caller,<...",[],2024-06-10T11:49:21Z,John D.,john.dalumpines@team.shoreoutsourcing.com,
2,11230128,1264993065390,"<div class=""zd-comment"" dir=""auto"">Hi Carlos,<...",[],2024-06-10T11:49:40Z,John D.,john.dalumpines@team.shoreoutsourcing.com,
3,11230128,24640724660123,"<div class=""zd-comment"" dir=""auto""><p dir=""aut...",[https://current.zendesk.com/attachments/token...,2024-06-10T12:03:32Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
4,11230128,24640724660123,"<div class=""zd-comment"" dir=""auto""><p dir=""aut...",[],2024-06-10T12:04:07Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
5,11230128,1267003494309,"<div class=""zd-comment"" dir=""auto"">Hi Carlos,<...",[],2024-06-12T12:46:50Z,MJ,mark.delacruz@team.shoreoutsourcing.com,
6,11230128,1267003494309,"<div class=""zd-comment"" dir=""auto"">CUID: FRHar...",[https://current.zendesk.com/attachments/token...,2024-06-12T12:47:41Z,MJ,mark.delacruz@team.shoreoutsourcing.com,


In [14]:
df

<IPython.core.display.Javascript object>

Unnamed: 0,Ticket ID,Submitted,Description,Requester ID,Priority,Status,Category,CUID,Escalation Notes (Be Specific),Total time spent (sec),Time spent last update (sec),Received via,Dispute Escalation Category,Intent confidence,Sentiment,Sentiment confidence,Intent,Requester Name,Requester Email,Requester Phone,Author ID,Message,Attachment,Message At,Author Name,Author Email,Author Phone
0,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,24640724660123,"<div class=""zd-comment"" dir=""auto"">I gathered ...",[https://current.zendesk.com/attachments/token...,2024-06-09T22:30:54Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
1,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1264993065390,"<div class=""zd-comment"" dir=""auto"">Hi Caller,<...",[],2024-06-10T11:49:21Z,John D.,john.dalumpines@team.shoreoutsourcing.com,
2,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1264993065390,"<div class=""zd-comment"" dir=""auto"">Hi Carlos,<...",[],2024-06-10T11:49:40Z,John D.,john.dalumpines@team.shoreoutsourcing.com,
3,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,24640724660123,"<div class=""zd-comment"" dir=""auto""><p dir=""aut...",[https://current.zendesk.com/attachments/token...,2024-06-10T12:03:32Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
4,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,24640724660123,"<div class=""zd-comment"" dir=""auto""><p dir=""aut...",[],2024-06-10T12:04:07Z,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757.0
5,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1267003494309,"<div class=""zd-comment"" dir=""auto"">Hi Carlos,<...",[],2024-06-12T12:46:50Z,MJ,mark.delacruz@team.shoreoutsourcing.com,
6,11230128,2024-06-09T22:30:54Z,I gathered police report from my previous and ...,24640724660123,Normal,Open,Question about a transaction/purchase,Frharhoogswnbzjczelqxqcinoigwuhr,Cuid: Frharhoogswnbzjczelqxqcinoigwuhr<Br>Did:...,403,51,Es Web Form,Dispute Appeal Evidence Provided,Medium,Neutral,High,Software Data Lost Data Is Missing,Caller +1 (305) 525-5757,carlos.on.matrix@icloud.com,13055255757,1267003494309,"<div class=""zd-comment"" dir=""auto"">CUID: FRHar...",[https://current.zendesk.com/attachments/token...,2024-06-12T12:47:41Z,MJ,mark.delacruz@team.shoreoutsourcing.com,


In [222]:
# html_strings = [
#   """
#   <div class="zd-comment" dir="auto"><p>Hi Caller,<br><br>Thank you for contacting us about this.<br><br>We looked into this for you and we confirmed that your recent dispute claim resulted in no error found.<br><br>If you haveevidence to support your claim that you did not previously provide, you may submit it for consideration. If we determine that it may support your claim, we will open an appeal. Please make sure as well that the document that you'll be providing is related to the claim that you have.<br><br>Examples of documents or written communication that can help support your claim are:<br><br></p><ul dir="auto"><li>Police reports <i>(full report is highly preferred however case numbers WITH precinct contact information is acceptable)</i></li><li>Incarceration Records <i>(full release discharge paperwork with dates of incarceration and release provided)</i></li><li>Hospitalization Records <i>(full release discharge paperwork with dates of intake and release provided)</i></li><li>Evidence of returned merchandise or Non-receipt of goods</li><li>Email communication/confirmation with merchant</li><li>Receipts</li></ul><br><p>We look forward to your response.<br><br>Best,<br>John D.<br></p></div>
#   """
# ]

html_strings = all_text

# html_strings = ['''
# <div class="zd-comment" dir="auto"><p dir="auto">That’s exactly what I sent </p><p dir="auto">Two police reports more one identifies they stole my personal information completely they broke into my house <br>Very Respectfully <br>Carlos Manuel Otero</p><p dir="auto">On Jun 10, 2024, at 07:49, John D. (Current Support)  wrote:</p><p dir="auto"></p></div>
# ''']

pdf_path = wkdir+"ambitious.pdf"
convert_html_to_pdf(html_strings, pdf_path)

In [9]:
[1,2,3,4,5][:4]

<IPython.core.display.Javascript object>

[1, 2, 3, 4]

In [12]:
pd.DataFrame({'Attachments':df['Attachment'].iloc[0]})

<IPython.core.display.Javascript object>

Unnamed: 0,Attachments
0,https://current.zendesk.com/attachments/token/...
