In [2]:
import csv

# Function to open a CSV file and return fields and rows
def open_csv_file(file_name):
    print("Opened a new CSV file: " + str(file_name))
    
    # Initializing the titles and rows list
    fields = []
    rows = []
    
    # Reading CSV file
    with open(file_name, 'r') as csvfile:
        # Creating a CSV reader object
        csvreader = csv.reader(csvfile)
        
        # Extracting field names from the first row
        fields = next(csvreader)
    
        # Extracting each data row one by one
        for row in csvreader:
            rows.append(row)
    
        # Get the total number of rows
        print("Total no. of rows: %d" % (csvreader.line_num))
    
    # Printing the field names
    print('Field names are: ' + ', '.join(field for field in fields))
    print('\n')
    return [fields, rows]

# Function to get the length of the first column value
def get_length_of_first_column(row):
    return len(row[0])

# Function to perform Job Sequencing Greedy algorithm
def job_sequencing(rows, max_deadline):
    # Sort the rows based on the length of the first column in descending order
    sorted_rows = sorted(rows, key=get_length_of_first_column, reverse=True)

    # Initialize variables for the schedule and set to store selected jobs
    schedule = [-1] * len(sorted_rows)
    job_set = set()
    rejected_jobs = []

    # Fill the schedule
    for row in sorted_rows:
        deadline = int(row[1])  # Assuming the deadline is in the second column
        if deadline <= max_deadline:
            for i in range(min(len(schedule) - 1, deadline - 1), -1, -1):
                if schedule[i] == -1:
                    schedule[i] = tuple(row)  # Convert the list to a tuple
                    job_set.add(tuple(row))   # Convert the list to a tuple
                    break
        else:
            rejected_jobs.append(tuple(row))

    return list(job_set), rejected_jobs

# Main program
if __name__ == "__main__":
    # Open the main file (Before merge)
    res = open_csv_file('notsorted_excel_legal.csv')
    fields = res[0]
    rows = res[1]

    # Apply merge sort on rows based on length of the first column (Report length)
    sorted_rows = sorted(rows, key=get_length_of_first_column)

    # Writing the sorted rows to a new CSV file
    with open("LegalDocs_sorted_by_len.csv", 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(fields)
        csvwriter.writerows(sorted_rows)

    # Open the new file
    res = open_csv_file('LegalDocs_sorted_by_len.csv')
    fields = res[0]
    rows = res[1]

    # Perform Job Sequencing
    max_deadline_chosen = 3
    selected_jobs, rejected_jobs = job_sequencing(rows, max_deadline_chosen)

    # Display selected jobs
    print("Selected Jobs which have a deadline of " + str(max_deadline_chosen) + " days:")
    for job in selected_jobs:
        print(job)

    print("Rejected Jobs which have a deadline of " + str(max_deadline_chosen) + " days:")
    for job in rejected_jobs:
        print(job)

    # Now, classify from array how many times each word has appeared
    to_find_word_arr = ['biotech', 'real-estate', 'house']

    for job in selected_jobs:
        first_column_words = job[0].split()  # We assume words are separated by space
        word_count = {word: 0 for word in to_find_word_arr}  # Initialize word count dictionary
    
        # Lower() is used to convert to lower cases to avoid any mismatches
        for word in first_column_words:
            if word.lower() in to_find_word_arr:
                word_count[word.lower()] += 1  # Increment if found to be matching
    
        # Display the first 128 chars in print
        print("\nWord count for job:", job[0:128])
        for word, count in word_count.items():
            print(f"{word}: {count} instances")


Opened a new CSV file: notsorted_excel_legal.csv
Total no. of rows: 5
Field names are: Reports, Deadline(Days), Payment($)


Opened a new CSV file: LegalDocs_sorted_by_len.csv
Total no. of rows: 5
Field names are: Reports, Deadline(Days), Payment($)


Selected Jobs which have a deadline of 3 days:
('exp exp exp biotech biotech biotech biotech', '3', '80')
('apple apple orange real-estate', '1', '20')
('RESIDENTIAL LEASE AGREEMENT This House Residential Lease Agreement ("Agreement") is entered into on [Date] by and between: Landlord of House: [Landlord\'s Full Name] Address: [Landlord\'s Address] Tenant: [Tenant\'s Full Name] Address: [Tenant\'s Address] 1. PROPERTY Landlord agrees to lease to Tenant and Tenant agrees to lease from Landlord the real property located at [Property Address] (the "Property"). 2. TERM The lease term will commence on [Start Date] and will continue for [Number of Months/Years] ending on [End Date]. 3. RENT Tenant agrees to pay a monthly rent of [Monthly Rent A