### test date parsing

In [3]:
import re

def extract_dates_with_context(text):
  """Extracts dates from the given text.

  Args:
    text: The input text.

  Returns:
    A list of tuples, where each tuple contains a date and its corresponding data.
  """

  dates = []
  date_pattern = r"\w+ \d{4}-\w+ \d{4}|\w+\s+\d{2}-|\w+\s+\d{4}-.*"
  matches = re.findall(date_pattern, text)

  for match in matches:
    start_index = text.index(match)
    end_index = start_index + len(match)
    lines = text.splitlines()
    date_line_index = None
    for i, line in enumerate(lines):
      if match in line:
        date_line_index = i
        break

    if date_line_index is not None:
      line_before = lines[date_line_index - 1] if date_line_index > 0 else None
      line =  lines[date_line_index]
      line_after = lines[date_line_index + 1] if date_line_index < len(lines) - 1 else None
      dates.append((match, line_before, line, line_after))

  return dates

# Example usage:
text = """
Environment: Oracle12c, TOAD, SQLDeveloper, MSExcel, Github, Jenkins, bamboo, UNIX.
Mphasis Corporation, NY                                                                                              Dec 2016-June 2017      
Role: Oracle PL/SQL Developer 
Project: Connect Risk Engine Genesis.
...
Mastercard, O’Fallon, Missouri                                                                                        July 17-Till date      
Role: Oracle PL/SQL Developer 

 

Project: Smartdata. 
The General Data Protection Regulation (EU) ("GDPR") is a regulation in EU law on data protection and 
privacy for all individuals within the European Union (EU) and the European Economic Area 
(EEA).According to the compliance the data has to be purged in accordance with the regulation. 
"""

dates = extract_dates_with_context(text)
for date, line_before, line, line_after in dates:
  print(f"Date: {date}")
  if line_before:
    print(f"Summary: {line_before}")
  if line:
    start_index = line.find(date)
    string_before = line[:start_index].strip()
    print(f"Company: {string_before}")
  if line_after:
    print(f"Position: {line_after}")
  print()

Date: Dec 2016-June 2017
Summary: Environment: Oracle12c, TOAD, SQLDeveloper, MSExcel, Github, Jenkins, bamboo, UNIX.
Company: Mphasis Corporation, NY
Position: Role: Oracle PL/SQL Developer 

Date: July 17-
Summary: ...
Company: Mastercard, O’Fallon, Missouri
Position: Role: Oracle PL/SQL Developer 



In [4]:
text = """
Jewellery Sales Associate October 2022- Present
AC Custom Diamonds
 Engage positively with customers, providing professional support for sales and service
needs
 Provide excellent customer service through active engagement, direct eye contact, and 
active listening skills.
 Asking open-ended questions to ascertain what type of item the customer desire, price 
range limits, and recipient of the item.
 Manage and process customer payments through a POS made by credit and debit cards 
as well as cash
Cart Attendant January 2021- August 2022
Sobeys
 Engaged customers in friendly conversation, providing customer service to ensure a 
pleasant shopping experience
"""
dates = extract_dates_with_context(text)
for date, line_before, line, line_after in dates:
  print(f"Date: {date}")
  if line_before:
    print(f"Summary: {line_before}")
  if line:
    start_index = line.find(date)
    string_before = line[:start_index].strip()
    print(f"Company: {string_before}")
  if line_after:
    print(f"Position: {line_after}")
  print()

len(dates)

Date: October 2022- Present
Company: Jewellery Sales Associate
Position: AC Custom Diamonds

Date: January 2021- August 2022
Summary: as well as cash
Company: Cart Attendant
Position: Sobeys



2

In [6]:
l = ['Jewellery Sales Associate', 'October 2022- Present', 'AC Custom Diamonds', 'Engage positively with customers, providing professional support for sales and service', 'needs', 'Provide excellent customer service through active engagement, direct eye contact, and', 'active listening skills.', 'Asking open-ended questions to ascertain what type of item the customer desire, price', 'range limits, and recipient of the item.', 'Manage and process customer payments through a POS made by credit and debit cards', 'as well as cash', 'Cart Attendant', 'January 2021- August 2022', 'Sobeys', 'Engaged customers in friendly conversation, providing customer service to ensure a', 'pleasant shopping experience', 'Click to buy NOW!', 'P', 'D', 'F', '-', 'X', 'C', 'h', 'a', 'n', 'g', 'e', 'P', 'r', 'o', 'd', 'u', 'c', 't', 'w', 'w', 'w', '.', 't', 'r', 'a', 'c', 'k', 'e', 'r', '-', 's', 'o', 'f', 't', 'w', 'a', 'r', 'e', '.', 'c', 'o', 'm', 'Click to buy NOW!', 'P', 'D', 'F', '-', 'X', 'C', 'h', 'a', 'n', 'g', 'e', 'P', 'r', 'o', 'd', 'u', 'c', 't', 'w', 'w', 'w', '.', 't', 'r', 'a', 'c', 'k', 'e', 'r', '-', 's', 'o', 'f', 't', 'w', 'a', 'r', 'e', '.', 'c', 'o', 'm', 'AC Co-op & Career Centre', 'October 2023', '4', 'Gathered and ferried carts back to the store, and helped guests carry out and load larger', 'items or large grocery orders', 'Kept entrances clean of debris or clutter, cleaned up spills throughout the store, stocked', 'shelves, and cleaned restrooms and other surfaces', 'Helped customers find stock, manned register as backup, bagged groceries, and', 'retrieved and replaced damaged items noticed at checkout']

mystring = ''.join(map(str,l))

print(mystring)

Jewellery Sales AssociateOctober 2022- PresentAC Custom DiamondsEngage positively with customers, providing professional support for sales and serviceneedsProvide excellent customer service through active engagement, direct eye contact, andactive listening skills.Asking open-ended questions to ascertain what type of item the customer desire, pricerange limits, and recipient of the item.Manage and process customer payments through a POS made by credit and debit cardsas well as cashCart AttendantJanuary 2021- August 2022SobeysEngaged customers in friendly conversation, providing customer service to ensure apleasant shopping experienceClick to buy NOW!PDF-XChangeProductwww.tracker-software.comClick to buy NOW!PDF-XChangeProductwww.tracker-software.comAC Co-op & Career CentreOctober 20234Gathered and ferried carts back to the store, and helped guests carry out and load largeritems or large grocery ordersKept entrances clean of debris or clutter, cleaned up spills throughout the store, stoc

In [7]:
dates = extract_dates_with_context(mystring)
for date, line_before, line, line_after in dates:
  print(f"Date: {date}")
  if line_before:
    print(f"Summary: {line_before}")
  if line:
    start_index = line.find(date)
    string_before = line[:start_index].strip()
    print(f"Company: {string_before}")
  if line_after:
    print(f"Position: {line_after}")
  print()

Date: AssociateOctober 2022- PresentAC Custom DiamondsEngage positively with customers, providing professional support for sales and serviceneedsProvide excellent customer service through active engagement, direct eye contact, andactive listening skills.Asking open-ended questions to ascertain what type of item the customer desire, pricerange limits, and recipient of the item.Manage and process customer payments through a POS made by credit and debit cardsas well as cashCart AttendantJanuary 2021- August 2022SobeysEngaged customers in friendly conversation, providing customer service to ensure apleasant shopping experienceClick to buy NOW!PDF-XChangeProductwww.tracker-software.comClick to buy NOW!PDF-XChangeProductwww.tracker-software.comAC Co-op & Career CentreOctober 20234Gathered and ferried carts back to the store, and helped guests carry out and load largeritems or large grocery ordersKept entrances clean of debris or clutter, cleaned up spills throughout the store, stockedshelves

# Input is a list segment


Try to parse using list, not string


In [10]:
jh_segment = ['Jewellery Sales Associate', 'October 2022- Present', 'AC Custom Diamonds', 'Engage positively with customers, providing professional support for sales and service', 'needs', 'Provide excellent customer service through active engagement, direct eye contact, and', 'active listening skills.', 'Asking open-ended questions to ascertain what type of item the customer desire, price', 'range limits, and recipient of the item.', 'Manage and process customer payments through a POS made by credit and debit cards', 'as well as cash', 'Cart Attendant', 'January 2021- August 2022', 'Sobeys', 'Engaged customers in friendly conversation, providing customer service to ensure a', 'pleasant shopping experience', 'Click to buy NOW!', 'P', 'D', 'F', '-', 'X', 'C', 'h', 'a', 'n', 'g', 'e', 'P', 'r', 'o', 'd', 'u', 'c', 't', 'w', 'w', 'w', '.', 't', 'r', 'a', 'c', 'k', 'e', 'r', '-', 's', 'o', 'f', 't', 'w', 'a', 'r', 'e', '.', 'c', 'o', 'm', 'Click to buy NOW!', 'P', 'D', 'F', '-', 'X', 'C', 'h', 'a', 'n', 'g', 'e', 'P', 'r', 'o', 'd', 'u', 'c', 't', 'w', 'w', 'w', '.', 't', 'r', 'a', 'c', 'k', 'e', 'r', '-', 's', 'o', 'f', 't', 'w', 'a', 'r', 'e', '.', 'c', 'o', 'm', 'AC Co-op & Career Centre', 'October 2023', '4', 'Gathered and ferried carts back to the store, and helped guests carry out and load larger', 'items or large grocery orders', 'Kept entrances clean of debris or clutter, cleaned up spills throughout the store, stocked', 'shelves, and cleaned restrooms and other surfaces', 'Helped customers find stock, manned register as backup, bagged groceries, and', 'retrieved and replaced damaged items noticed at checkout']

def extract_dates_with_context(jh_segment):
  """Extracts date, company, position, summary from the given segment.

  Args:
    jh_segment: list contains text in items for fragment

  Returns:
    A list of tuples, where each tuple contains a date, company, position, summary.
  """

  string_content = ' '.join(map(str,jh_segment))
  dates = []
  date_pattern = r"\w+ \d{4}-\w+ \d{4}|\w+\s+\d{2}-|\w+\s+\d{4}-.*"
  matches = re.findall(date_pattern, text)

  for match in matches:
    start_index = text.index(match)
    end_index = start_index + len(match)
    lines = text.splitlines()
    date_line_index = None
    for i, line in enumerate(lines):
      if match in line:
        date_line_index = i
        break

    if date_line_index is not None:
      line_before = lines[date_line_index - 1] if date_line_index > 0 else None
      line =  lines[date_line_index]
      line_after = lines[date_line_index + 1] if date_line_index < len(lines) - 1 else None
      dates.append((match, line_before, line, line_after))

  return dates


In [11]:
dates = extract_dates_with_context(jh_segment)
for date, line_before, line, line_after in dates:
  print(f"Date: {date}")
  if line_before:
    print(f"Summary: {line_before}")
  if line:
    start_index = line.find(date)
    string_before = line[:start_index].strip()
    print(f"Company: {string_before}")
  if line_after:
    print(f"Position: {line_after}")
  print()

Date: October 2022- Present
Company: Jewellery Sales Associate
Position: AC Custom Diamonds

Date: January 2021- August 2022
Summary: as well as cash
Company: Cart Attendant
Position: Sobeys



In [12]:
jh_segment2 = ['Kroger Stores', 'Cashier', 'Decatur, AL | 2018 - current', 'Assisted 54+ daily customers with checkout, answering', 'questions', 'Collaborated with 8 other cashiers, offering assistance to other', 'teammates during peak hours', 'Managed 26+ items in checkout line inventory', 'Recognized as an employee of the year in 2019 by greeting with', 'a cheery and upbeat attitude', 'Piccadilly', 'Cashier/Customer Service', 'Mobile, AL | 2014 - 2018', 'Handled cashier procedures, ensuring 100% accuracy and 0', 'discrepancies between receipts and register totals', 'Conducted cash, debit, and credit card purchases from 155+', 'customers per shift', 'Directed scheduling of 6 delivery drivers, ensuring 98% of orders', 'were delivered on time', 'Increased average customer order size by 32% by upselling and', 'cross-selling side dishes and drinks', 'Chuck E Cheese', 'Cashier', 'Birmingham, AL | 2011 - 2013', 'Promoted an upbeat and family-friendly attitude, receiving 100%', 'positive feedback on surveys', 'Handled prize disbursement, ensuring customer satisfaction and', 'resolving 6+ customer disputes per shift', 'Conducted cash and credit checkout procedures with 99.5%', 'accuracy', 'Greeted 210+ customers per shift, suggesting food and beverage', 'services that increased overall purchase prices by 27%']

olivia = extract_dates_with_context(jh_segment2)
for date, line_before, line, line_after in dates:
  print(f"Date: {date}")
  if line_before:
    print(f"Summary: {line_before}")
  if line:
    start_index = line.find(date)
    string_before = line[:start_index].strip()
    print(f"Company: {string_before}")
  if line_after:
    print(f"Position: {line_after}")
  print()

Date: October 2022- Present
Company: Jewellery Sales Associate
Position: AC Custom Diamonds

Date: January 2021- August 2022
Summary: as well as cash
Company: Cart Attendant
Position: Sobeys



In [103]:
def extract_jobhistory_bysegment(segment):
    """Extracts dates from the given text.

    Args:
      text: The input text.

    Returns:
      A list of tuples, where each tuple contains a date and its corresponding data.
    """
    jobhistory = []
    matches = []
    if (len(segment) == 0):
        return None
    else:
        #date_pattern = r"\w+ \d{4}-\w+ \d{4}|\w+\s+\d{2}-|\w+\s+\d{4}-.*"
        date_p1 = r".*?\w?\s*(19|20)\d{2}\s*-\s*((19|20)\d{2}|current|present)"
        for index, element in enumerate(test):
            if isinstance(element, str):
                match = re.match(date_p1, element)
                #print(f"\"{element}\"")
                if match:
                    #print(f"\"{element}\" matches the regex")
                    matches.append((index, match.group()))
        
        #matches = re.findall(date_pattern, string_segment)
    
        for match in enumerate(matches):
            jh_idx = match[1][0]
            jh_date = match[1][1]
            if jh_idx > 0 and jh_idx - 2 < len(segment):
                jh_company = segment[jh_idx - 2]
            else:
                jh_company = None  # or any default value you prefer
            if jh_idx > 0 and jh_idx - 1 < len(segment):
                jh_position = segment[jh_idx - 1]
            else:
                jh_position = None  # or any default value you prefer
            jobhistory.append((jh_date, jh_company, jh_position))
    
        return jobhistory

In [87]:
test = [' 2005 - 2023 ','2005 - 2023 ',' Mobile, AL | 2014 - 2018' , 'Decatur, AL | 2018 - current']
jh_segment2 = ['Kroger Stores', 'Cashier', 'Decatur, AL | 2018 - current', 'Assisted 54+ daily customers with checkout, answering', 'questions', 'Collaborated with 8 other cashiers, offering assistance to other', 'teammates during peak hours', 'Managed 26+ items in checkout line inventory', 'Recognized as an employee of the year in 2019 by greeting with', 'a cheery and upbeat attitude', 'Piccadilly', 'Cashier/Customer Service', 'Mobile, AL | 2014 - 2018', 'Handled cashier procedures, ensuring 100% accuracy and 0', 'discrepancies between receipts and register totals', 'Conducted cash, debit, and credit card purchases from 155+', 'customers per shift', 'Directed scheduling of 6 delivery drivers, ensuring 98% of orders', 'were delivered on time', 'Increased average customer order size by 32% by upselling and', 'cross-selling side dishes and drinks', 'Chuck E Cheese', 'Cashier', 'Birmingham, AL | 2011 - 2013', 'Promoted an upbeat and family-friendly attitude, receiving 100%', 'positive feedback on surveys', 'Handled prize disbursement, ensuring customer satisfaction and', 'resolving 6+ customer disputes per shift', 'Conducted cash and credit checkout procedures with 99.5%', 'accuracy', 'Greeted 210+ customers per shift, suggesting food and beverage', 'services that increased overall purchase prices by 27%']
matches = []
date_pattern = r"\w+ \d{4}-\w+ \d{4}|\w+\s+\d{2}-|\w+\s+\d{4}-.*"
date_p1 = r".*?\w?\s*(19|20)\d{2}\s*-\s*((19|20)\d{2}|current|present)"
for index, element in enumerate(jh_segment2):
    if isinstance(element, str):
        match = re.match(date_p1, element)
        #print(f"\"{element}\"")
        if match:
            #print(f"\"{element}\" matches the regex")
            matches.append((index, match.group()))

for match in enumerate(matches):
    jh_idx = match[1][0]
    print(f"index : {jh_idx} value: '{match[1][1]}'")
    print(f'Company: {jh_segment2[jh_idx - 2]}')
    print(f'Position: {jh_segment2[jh_idx - 1]}')

index : 2 value: 'Decatur, AL | 2018 - current'
Company: Kroger Stores
Position: Cashier
index : 12 value: 'Mobile, AL | 2014 - 2018'
Company: Piccadilly
Position: Cashier/Customer Service
index : 23 value: 'Birmingham, AL | 2011 - 2013'
Company: Chuck E Cheese
Position: Cashier


In [75]:
idx_job_title = extract_jobhistory_bysegment(jh_segment2)
idx_job_title


[('Decatur, AL | 2018 - current', 'Kroger Stores', 'Cashier'),
 ('Mobile, AL | 2014 - 2018', 'Piccadilly', 'Cashier/Customer Service'),
 ('Birmingham, AL | 2011 - 2013', 'Chuck E Cheese', 'Cashier'),
 (' 2005 - 2023',
  'Greeted 210+ customers per shift, suggesting food and beverage',
  'services that increased overall purchase prices by 27%'),
 ('2005 - 2023',
  'services that increased overall purchase prices by 27%',
  'Kroger Stores'),
 (' Mobile, AL | 2014 - 2018', 'Kroger Stores', 'Cashier'),
 ('Decatur, AL | 2018 - current', 'Cashier', 'Decatur, AL | 2018 - current')]

In [96]:
def print_job_details(job):
    for date, line, line_after in job:
      print(f"Date: {date}")
      #if line_before:
      #  print(f"Summary: {line_before}")
      if line:
        start_index = line.find(date)
        string_before = line[:start_index].strip()
        print(f"Company: {string_before}")
      if line_after:
        print(f"Position: {line_after}")
      print()

In [113]:
jh_segment3 =['Chicago State University, Chicago, IL.', 'Aug 2015-May 2017', 'Master of Science: Computer Science.', 'JNTU, Hyderabad, Telangana. Aug 2009-May 2013', 'Bachelors of Technology: Electrical and Electronics Engineering.']
jh_test = []

job_3 = []
job_3 = extract_jobhistory_bysegment(jh_segment3)
if(job_3 is not None and len(job_3)>0) : print_job_details(job_3)

Date: Decatur, AL | 2018 - current
Company: Chicago State University, Chicago, IL
Position: Aug 2015-May 2017

Date: Mobile, AL | 2014 - 2018

Date: Birmingham, AL | 2011 - 2013

Date:  2005 - 2023

Date: 2005 - 2023
Company: Bachelors of Technology: Electrical and Electronics Engineering
Position: Chicago State University, Chicago, IL.

Date:  Mobile, AL | 2014 - 2018
Company: Chicago State University, Chicago, IL
Position: Aug 2015-May 2017

Date: Decatur, AL | 2018 - current
Company: Aug 2015-May 201
Position: Master of Science: Computer Science.

Date:  2005 - 2023

Date: 2005 - 2023
Company: Bachelors of Technology: Electrical and Electronics Engineering
Position: Chicago State University, Chicago, IL.

Date:  Mobile, AL | 2014 - 2018
Company: Chicago State University, Chicago, IL
Position: Aug 2015-May 2017

Date: Decatur, AL | 2018 - current
Company: Aug 2015-May 201
Position: Master of Science: Computer Science.

Date:  2005 - 2023

Date: 2005 - 2023
Company: Bachelors of Techn