### Please install the required Python modules/SDKs

In [1]:
! activate ai-azure-c1

import sys

sys.path.append("/opt/conda/envs/ai-azure-c1/lib/python3.8/site-packages")

## Importing Azure Form Recognizer Python modules

In [2]:
import os
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import FormTrainingClient
from azure.core.credentials import AzureKeyCredential

In [3]:
AZURE_FORM_RECOGNIZER_ENDPOINT = ""
AZURE_FORM_RECOGNIZER_KEY = ""

In [4]:
endpoint = AZURE_FORM_RECOGNIZER_ENDPOINT
key = AZURE_FORM_RECOGNIZER_KEY

In [5]:
form_training_client = FormTrainingClient(endpoint=endpoint, credential=AzureKeyCredential(key))

In [6]:
saved_model_list = form_training_client.list_custom_models()

## Training Source Data URL

To generate the training data URL, you can 
1. Download the Cognito Corporation training documents to your local system: https://github.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/tree/main/resources/cognito-corp-docs.
2. Upload the training documents to a blob container at Azure Blob Storage. Training documents are named `Cognito-corporation-u*.pdf`. 
3. Generate a SAS URL of the training data container. 
4. Once the model is trained, you will use the `Cognito-corporation-test01.png` file located in this GitHub directory to perform prediction: https://raw.githubusercontent.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/main/resources/Cognito-corporation-test01.png

In [7]:
# No subdirectory
#trainingDataUrl = "https://mymlworkspace6225174622.blob.core.windows.net/test1?sv=2021-10-04&st=2023-10-21T07%3A27%3A26Z&se=2023-10-22T07%3A27%3A26Z&sr=c&sp=racwdxltf&sig=sBRyzglCIOIRngjftIJnDDRvlctWkUjyhwDURh9YBOg%3D"
#trainingDataUrl = "https://mymlworkspace6225174622.blob.core.windows.net/training?sv=2021-10-04&st=2023-10-21T14%3A42%3A35Z&se=2023-10-22T14%3A42%3A35Z&sr=c&sp=racwdxltf&sig=PE8GQCJI1%2FtQt6MT%2Bwsl7hzGHED7TUl18TSnmzbr2ZI%3D"
trainingDataUrl = "https://mymlworkspace6225174622.blob.core.windows.net/training?sv=2021-10-04&st=2023-10-25T09%3A10%3A24Z&se=2023-10-26T09%3A10%3A24Z&sr=c&sp=racwdxltf&sig=jlGcD7IyI3CUt5f4AzBdm29bhwRHbwILO1N7axsf8T8%3D"
#trainingDataUrl = "https://mymlworkspace6225174622.blob.core.windows.net/training?sv=2021-10-04&st=2023-10-22T14%3A55%3A43Z&se=2023-10-23T14%3A55%3A43Z&sr=c&sp=racwdxltf&sig=e%2BSEMdfYp1CWYbUJ%2FVCoYZK0bjC%2BUXuJvM%2BP%2Bi%2FfHjs%3D"

# Performing Unlabeled Training


In [8]:
training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=False)
custom_model = training_process.result()

## Getting Model Info

In [9]:
custom_model

CustomFormModel(model_id=056a8614-7b1e-4b6c-823b-457a2aa53bd4, status=ready, training_started_on=2023-10-25 09:42:26+00:00, training_completed_on=2023-10-25 09:42:49+00:00, submodels=[CustomFormSubmodel(accuracy=None, model_id=056a8614-7b1e-4b6c-823b-457a2aa53bd4, fields={'field-0': CustomFormModelField(label=Baggage, name=field-0, accuracy=None), 'field-1': CustomFormModelField(label=Boarding Time, name=field-1, accuracy=None), 'field-2': CustomFormModelField(label=Carrier, name=field-2, accuracy=None), 'field-3': CustomFormModelField(label=Chicago, name=field-3, accuracy=None), 'field-4': CustomFormModelField(label=Class, name=field-4, accuracy=None), 'field-5': CustomFormModelField(label=Date, name=field-5, accuracy=None), 'field-6': CustomFormModelField(label=Flight No., name=field-6, accuracy=None), 'field-7': CustomFormModelField(label=From, name=field-7, accuracy=None), 'field-8': CustomFormModelField(label=From:, name=field-8, accuracy=None), 'field-9': CustomFormModelField(lab

In [10]:
custom_model.model_id

'056a8614-7b1e-4b6c-823b-457a2aa53bd4'

In [11]:
custom_model.status

'ready'

In [12]:
custom_model.training_started_on

datetime.datetime(2023, 10, 25, 9, 42, 26, tzinfo=<isodate.tzinfo.Utc object at 0x00000177F9826E90>)

In [13]:
custom_model.training_completed_on

datetime.datetime(2023, 10, 25, 9, 42, 49, tzinfo=<isodate.tzinfo.Utc object at 0x00000177F9826E90>)

In [14]:
custom_model.training_documents

[TrainingDocumentInfo(name=boarding-james-webb.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-libby.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass1.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass10.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass2.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass3.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass4.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass5.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass6.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass

In [15]:
custom_model_info = form_training_client.get_custom_model(model_id=custom_model.model_id)
print("Model ID: {}".format(custom_model_info.model_id))
print("Status: {}".format(custom_model_info.status))
print("Training started on: {}".format(custom_model_info.training_started_on))
print("Training completed on: {}".format(custom_model_info.training_completed_on))

Model ID: 056a8614-7b1e-4b6c-823b-457a2aa53bd4
Status: ready
Training started on: 2023-10-25 09:42:26+00:00
Training completed on: 2023-10-25 09:42:49+00:00


## Using an image document as test document URL (Not using PDF here)

* Here, you will use the `Cognito-corporation-test01.png` file located in this GitHub directory to perform prediction: https://raw.githubusercontent.com/udacity/cd0461-building-computer-vision-solutions-with-azure-exercises/main/resources/Cognito-corporation-test01.png
* Note: If you want to use a PDF document for the test, please save and upload PDF to Azure Blob Storage and use the SAS URL of this PDF document as the target URL.
* Using a PDF document from the GitHub URL will give you an error.
* You will see a screenshot of how to do this on the exercise solution page later in this lesson. 

In [16]:
new_test_url = "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass1.png"

In [17]:
form_recognizer_client = FormRecognizerClient(endpoint=endpoint, credential=AzureKeyCredential(key))

In [18]:
custom_test_action = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=custom_model_info.model_id, form_url=new_test_url)

In [19]:
custom_test_action_result = custom_test_action.result()

In [20]:
labeled_training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=True)
labeled_custom_model = labeled_training_process.result()

In [21]:
labeled_custom_model.training_documents

[TrainingDocumentInfo(name=boarding_pass1.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass10.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass2.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass3.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass4.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass5.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass6.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass7.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass8.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass9.pdf

In [22]:
labeled_custom_test_action = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=labeled_custom_model.model_id, form_url=new_test_url)
labeled_custom_test_action_result = labeled_custom_test_action.result()
for recognized_content in labeled_custom_test_action_result:
    print("Form type: {}".format(recognized_content.form_type))
    for name, field in recognized_content.fields.items():
        print("Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
            name,
            field.label_data.text if field.label_data else name,
            field.value,
            field.confidence
        ))

Form type: custom:a5a403ae-2c19-4671-8e63-4d3fc0881d84
Field 'Seat' has label 'Seat' with value '20A' and a confidence score of 0.988
Field 'Class' has label 'Class' with value 'E' and a confidence score of 0.689
Field 'Name' has label 'Name' with value 'Avkash Chauhan' and a confidence score of 0.99
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'Bag' has label 'Bag' with value 'NO' and a confidence score of 0.598
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.422
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.653
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99


## Listing Models

In [23]:
saved_model_list = form_training_client.list_custom_models()

In [24]:
## Creating another model with labeled training
labeled_2_training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=True)
labeled_2_custom_model = labeled_2_training_process.result()

In [25]:
cognito_corporation_model_list = [labeled_custom_model.model_id, labeled_2_custom_model.model_id]

In [26]:
composed_process = form_training_client.begin_create_composed_model(
            cognito_corporation_model_list, model_name="Cognito Corporation Model")
composed_process_model = composed_process.result()

In [27]:
composed_model_info = form_training_client.get_custom_model(model_id=composed_process_model.model_id)
print("Model ID: {}".format(composed_model_info.model_id))
print("Status: {}".format(composed_model_info.status))
print("Training started on: {}".format(composed_model_info.training_started_on))
print("Training completed on: {}".format(composed_model_info.training_completed_on))

Model ID: 6c4620b8-434a-4808-a9f6-b9995982f628
Status: ready
Training started on: 2023-10-25 09:43:31+00:00
Training completed on: 2023-10-25 09:43:32+00:00


## Using the composed model to extract data from the boarding_pass

In [28]:
# Global list to store flight-specific information from each boarding pass
flight_info_list = []

# Global array to store the names
names_array = []

def process_training_data_urls(form_recognizer_client, composed_process_model, training_data_urls):
    """
    Process and print recognized forms from a list of training data URLs.
    
    :param form_recognizer_client: The Form Recognizer client.
    :param composed_process_model: The composed model object.
    :param training_data_urls: List of training data URLs to be processed.
    """
    global flight_info_list  # Indicate that we're using the global list
    global names_array  # Indicate that we're using the global array

    # Assuming each URL in the list is a separate training dataset
    for training_data_url in training_data_urls:
        composed_model_testing = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=composed_process_model.model_id, form_url=training_data_url)
        composed_model_testing_result = composed_model_testing.result()
        for recognized_content in composed_model_testing_result:
            # Create an empty dictionary for each boarding pass
            flight_info_dict = {}
            print("Form type: {}".format(recognized_content.form_type))
            
            for name, field in recognized_content.fields.items():
                # If the field is "Name", append its value to the global array
                if name == "Name":
                    names_array.append(field.value)

                # Populate the dictionary with the desired fields
                if name in ['Flight', 'Name', 'From', 'Carrier', 'Seat', 'Time', 'Date', 'Class', 'To']:
                    flight_info_dict[name] = field.value

                print("Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
                name,
                field.label_data.text if field.label_data else name,
                field.value,
                field.confidence
                ))
            
            # Append the dictionary to the global list
            flight_info_list.append(flight_info_dict)
            print("-----------------------------------------------------------------")

# Example usage:
training_data_urls = [
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass1.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass2.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass3.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass4.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/boardingpass/pass5.png",
]
process_training_data_urls(form_recognizer_client, composed_process_model, training_data_urls)

# To view the collected flight information
for flight_info in flight_info_list:
    print(flight_info)
print("-----------------------------------------------------------------")

# To view the collected names
print(names_array)


Form type: Cognito Corporation Model:a5a403ae-2c19-4671-8e63-4d3fc0881d84
Field 'Flight' has label 'Flight' with value '234' and a confidence score of 0.99
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.99
Field 'Name' has label 'Name' with value 'Avkash Chauhan' and a confidence score of 0.99
Field 'Bag' has label 'Bag' with value 'NO' and a confidence score of 0.598
Field 'Class' has label 'Class' with value 'E' and a confidence score of 0.689
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Date' has label 'Date' with value 'April 20, 2022' and a confidence score of 0.653
Field 'Seat' has label 'Seat' with value '20A' and a confidence score of 0.988
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.99
Field 'Time' has label 'Time' with value '10:00 AM PST' and a confidence score of 0.422
-----------------

## Validate ID Cards

In [29]:
content_url_list =[
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-avkash-chauhan.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-jackson.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-webb.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-libby-herold.png",
    "https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-radha-s-kumar.png"
]

In [30]:
# Global dictionary to capture DateOfBirth using full name as the key
dob_dict = {}

def print_id_card_details(identity_card):
    first_name = identity_card.fields.get("FirstName")
    last_name = identity_card.fields.get("LastName")
    full_name = None
    if first_name and last_name:
        full_name = f"{first_name.value} {last_name.value}"
        print(f"Full Name: {full_name}")
    else:
        print("Name not found!")
        return  # Exit the function if full name is not found
    
    dob = identity_card.fields.get("DateOfBirth")
    if dob:
        print("Date of Birth: {} has confidence: {}".format(dob.value, dob.confidence))
        # Add DateOfBirth to the global dictionary using full name as the key
        dob_dict[full_name] = dob.value

    # Rest of the code to print other details...
    # ...

def extract_data_from_ids(form_recognizer_client, url_list):
    for url in url_list:
        print(f"Processing URL: {url}")
        try:
            id_content_from_url = form_recognizer_client.begin_recognize_identity_documents_from_url(url)
            collected_id_cards = id_content_from_url.result()
            if collected_id_cards:
                print_id_card_details(collected_id_cards[0])
            else:
                print(f"No data found for URL: {url}")
        except Exception as e:
            print(f"Error processing URL {url}: {str(e)}")
    # Print the captured DateOfBirth using full name as the key after processing all URLs
    print("Captured Dates of Birth:", dob_dict)

# Use the function
extract_data_from_ids(form_recognizer_client, content_url_list)


Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-avkash-chauhan.png
Full Name: AVKASH CHAUHAN CHAUHAN
Date of Birth: 1990-01-01 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-jackson.png
Full Name: James Jackson
Date of Birth: 1956-10-12 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-james-webb.png
Full Name: James Webb
Date of Birth: 1970-12-15 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-libby-herold.png
Full Name: Libby Herold
Date of Birth: 1996-02-10 has confidence: 0.995
Processing URL: https://mymlworkspace6225174622.blob.core.windows.net/digitalid/ca-dl-radha-s-kumar.png
Full Name: Radha SKumar
Date of Birth: 1994-03-05 has confidence: 0.995
Captured Dates of Birth: {'AVKASH CHAUHAN CHAUHAN': datetime.date(1990, 1, 1), 'James Jackson': datetime.date(1956

## Set the flightmanifestWrite Validation fields to False

## The first and last name extracted from the boarding pass and ID card must match with the name on the flight manifest table

In [31]:
import pandas as pd
import io
from azure.storage.blob import BlobServiceClient

EXPECTED_COLUMNS = [
    "Carrier", "Flight No.", "Class", "From", "To", "Date", "Baggage", 
    "Seat", "Gate", "Boarding Time", "Ticket No", "First Name", "Last Name", 
    "Sex", "DateofBirth", "DoBValidation", "PersonValidation", "LuggageValidation", 
    "NameValidation", "BoardingPassValidation"
]

def normalize_name(first_name, last_name):
    return " ".join((first_name + " " + last_name).split()).lower()

def update_name_validation(df, names_array):
    valid_names_set = set(name.lower() for name in names_array)
    updated_rows_name = 0

    for index, row in df.iterrows():
        full_name = normalize_name(row['First Name'], row['Last Name'])
        if full_name in valid_names_set:
            df.at[index, 'NameValidation'] = True
            updated_rows_name += 1
        else:
            print(f"Failed NameValidation for: {full_name} (Row: {index + 1})")

    return df, updated_rows_name

def update_boarding_pass_validation(df, flight_info_list):
    person_info_dict = {info['Name'].lower(): info for info in flight_info_list}
    updated_rows_person = 0
    
    for index, row in df.iterrows():
        full_name = normalize_name(row['First Name'], row['Last Name'])
        if full_name in person_info_dict:
            if str(row['Flight No.']) == str(person_info_dict[full_name]['Flight']):
                df.at[index, 'BoardingPassValidation'] = True
                updated_rows_person += 1
            else:
                print(f"Failed BoardingPassValidation (flight mismatch) for: {full_name} (Row: {index + 1})")
        else:
            print(f"Failed BoardingPassValidation (name not found) for: {full_name} (Row: {index + 1})")

    return df, updated_rows_person

def update_flight_manifest(names_array, flight_info_list):
    try:
         blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)

        read_container_name = 'flightmanifestwrite'
        read_blob_name = 'FlightManifest.csv'
        blob_client = blob_service_client.get_blob_client(container=read_container_name, blob=read_blob_name)
        blob_data = blob_client.download_blob()
        blob_content = blob_data.readall()
        df = pd.read_csv(io.BytesIO(blob_content))

        # Normalize column names to remove leading and trailing spaces
        df.columns = [col.strip() for col in df.columns]
        
        # Debugging Step: Print the dataframe columns
        print(f"Normalized columns in the data: {df.columns.tolist()}")

        unexpected_columns = set(df.columns) - set(EXPECTED_COLUMNS)
        missing_columns = set(EXPECTED_COLUMNS) - set(df.columns)

        if unexpected_columns:
            print(f"Unexpected columns: {list(unexpected_columns)}")
        if missing_columns:
            print(f"Missing columns: {list(missing_columns)}")
        if unexpected_columns or missing_columns:
            raise ValueError("Column mismatch detected in the data!")

        df, updated_names = update_name_validation(df, names_array)
        df, updated_boarding_passes = update_boarding_pass_validation(df, flight_info_list)

        print(f"Total Rows Updated for NameValidation: {updated_names}")
        print(f"Total Rows Updated for BoardingPassValidation: {updated_boarding_passes}")

        output_stream = io.StringIO()
        df.to_csv(output_stream, index=False)
        output_stream.seek(0)
        blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

        # Print updated dataframe
        print("Updated FlightManifest.csv:")
        print(df.head())
        print("Data successfully uploaded to flightmanifestwrite")

    except Exception as e:
        print(f"An error occurred: {e}")
# Sample call
update_flight_manifest(names_array, flight_info_list)


Normalized columns in the data: ['Carrier', 'Flight No.', 'Class', 'From', 'To', 'Date', 'Baggage', 'Seat', 'Gate', 'Boarding Time', 'Ticket No', 'First Name', 'Last Name', 'Sex', 'DateofBirth', 'DoBValidation', 'PersonValidation', 'LuggageValidation', 'NameValidation', 'BoardingPassValidation']
Total Rows Updated for NameValidation: 5
Total Rows Updated for BoardingPassValidation: 5
Updated FlightManifest.csv:
  Carrier  Flight No.     Class           From       To            Date  \
0      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
1      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
2      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
3      UA         234  Business  San Francisco  Chicago  April 20, 2022   
4      UA         234   Economy  San Francisco  Chicago  April 20, 2022   

  Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex  \
0     YES  20A   G1  10:00 AM PST   34236746     Avkash   Cha

In [32]:
import pandas as pd
import io
from azure.storage.blob import BlobServiceClient
import datetime


def normalize_name(first_name, last_name):
    return " ".join((first_name + " " + last_name).split()).lower()

def update_dob_validation(df, dob_dict):
    print(df.head())  # Print a sample of the dataframe before the update

    
    updated_rows_dob = 0
    normalized_dob_dict = {normalize_name(" ".join(key.split()[:-1]), key.split()[-1]): value for key, value in dob_dict.items()}

    for index, row in df.iterrows():
        full_name = normalize_name(row['First Name'], row['Last Name'])
        dob_str = row['DateofBirth']

        if full_name in normalized_dob_dict:
            dob_from_csv = datetime.datetime.strptime(dob_str, '%d %B %Y').date()
            if dob_from_csv == normalized_dob_dict[full_name]:
                df.at[index, 'DoBValidation'] = "TRUE"
                updated_rows_dob += 1

    return df, updated_rows_dob

def update_flight_manifest():
     blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)

    read_container_name = 'flightmanifestwrite'
    read_blob_name = 'FlightManifest.csv'
    blob_client = blob_service_client.get_blob_client(container=read_container_name, blob=read_blob_name)
    blob_data = blob_client.download_blob()
    blob_content = blob_data.readall()

    df = pd.read_csv(io.BytesIO(blob_content))

    dob_dict = {
    'avkash chauhan': datetime.date(1990, 1, 1),
    'james jackson': datetime.date(1956, 10, 12),
    'james webb': datetime.date(1970, 12, 15),
    'libby herold': datetime.date(1996, 2, 10),
    'radha s kumar': datetime.date(1994, 3, 5)
    }
    
    
    df, updated_dobs = update_dob_validation(df, dob_dict)  # DoB validation
    print(df.head())  # Print a sample of the dataframe before the update

    print(f"Total Rows Updated for DoBValidation: {updated_dobs}")

    write_container_name = 'flightmanifestwrite'
    write_blob_name = 'FlightManifest.csv'
    output_stream = io.StringIO()
    df.to_csv(output_stream, index=False)
    output_stream.seek(0)
    blob_client = blob_service_client.get_blob_client(container=write_container_name, blob=write_blob_name)
    blob_client.upload_blob(output_stream.getvalue(), overwrite=True)

    print("Data successfully uploaded to flightmanifestwrite")

update_flight_manifest()


  Carrier  Flight No.     Class           From       To            Date  \
0      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
1      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
2      UA         234   Economy  San Francisco  Chicago  April 20, 2022   
3      UA         234  Business  San Francisco  Chicago  April 20, 2022   
4      UA         234   Economy  San Francisco  Chicago  April 20, 2022   

  Baggage Seat Gate Boarding Time  Ticket No First Name Last Name Sex  \
0     YES  20A   G1  10:00 AM PST   34236746     Avkash   Chauhan   M   
1     YES   1A   G1  10:00 AM PST   34236747      James      Webb   M   
2      No  25B   G1  10:00 AM PST   34236748      James   Jackson   M   
3      No   3D   G1  10:00 AM PST   34236749      Libby    Herold   F   
4     YES  34B   G1  10:00 AM PST   34236750    Radha S     Kumar   F   

        DateofBirth  DoBValidation  PersonValidation  LuggageValidation  \
0    1 January 1990           True 

In [33]:
import pandas as pd
import io
from azure.storage.blob import BlobServiceClient


container_name = 'flightmanifestwrite'
blob_name = 'FlightManifest.csv'

def get_updated_manifest_from_blob(account_name, account_key, container_name, blob_name):
    from azure.storage.blob import BlobServiceClient
    from io import StringIO

    # Constructing the connection string
    conn_str = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net"
    blob_service_client = BlobServiceClient.from_connection_string(conn_str)

    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)

    # Download the blob content to a string
    data = blob_client.download_blob().content_as_text()
    df = pd.read_csv(StringIO(data))
    
    # Ensure columns are cleaned of any leading/trailing spaces
    df.columns = df.columns.str.strip()

    # Convert to boolean for easier comparison
    for col in ['DoBValidation', 'NameValidation', 'BoardingPassValidation']:
        print(f"Processing column: {col}")
        
        if col in df.columns:
            series = df[col].astype(str).str.strip().str.upper()
            print(f"Values in column {col} before conversion: {series.unique()}")
            df[col] = series == "TRUE"
            print(f"Values in column {col} after conversion: {df[col].unique()}")
        else:
            print(f"Warning: Column {col} not found in DataFrame.")
    return df

df = get_updated_manifest_from_blob(account_name, account_key, container_name, blob_name)

for index, row in df.iterrows():
    print("------------------------------------------------------------")
    print("\n")
    # All validations are TRUE
    if row['DoBValidation'] and row.get('NameValidation', False) and row.get('BoardingPassValidation', False):
        print(f"Dear Mr. {row['First Name']} {row['Last Name']},")
        print(f"You are welcome to flight # {row['Flight No.']} leaving at {row['Boarding Time']} from {row['From']} to {row['To']}.")
        print(f"Your seat number is {row['Seat']}, and it is confirmed.")
        print("\n")

    # BoardingPassValidation is FALSE but DoBValidation and NameValidation are TRUE
    elif row['DoBValidation'] and row.get('NameValidation', False) and not row.get('BoardingPassValidation', False):
        print(f"Dear Sir/Madam,")
        print(f"Your boarding pass for flight # {row['Flight No.']} leaving at {row['Boarding Time']} from {row['From']} to {row['To']} has not been validated.")
        print(f"Please visit our customer service desk for further assistance.")
        print("\n")

    # Any of the three validations is FALSE
    else:
        print(f"Dear Sir/Madam,")
        print(f"There seems to be an issue with your credentials for flight # {row['Flight No.']} leaving at {row['Boarding Time']} from {row['From']} to {row['To']}.")
        print(f"Please visit our customer service desk for further assistance.")
        print("\n")


Processing column: DoBValidation
Values in column DoBValidation before conversion: ['TRUE']
Values in column DoBValidation after conversion: [ True]
Processing column: NameValidation
Values in column NameValidation before conversion: ['TRUE']
Values in column NameValidation after conversion: [ True]
Processing column: BoardingPassValidation
Values in column BoardingPassValidation before conversion: ['TRUE']
Values in column BoardingPassValidation after conversion: [ True]
------------------------------------------------------------


Dear Mr. Avkash Chauhan,
You are welcome to flight # 234 leaving at 10:00 AM PST from San Francisco to Chicago.
Your seat number is 20A, and it is confirmed.


------------------------------------------------------------


Dear Mr. James Webb,
You are welcome to flight # 234 leaving at 10:00 AM PST from San Francisco to Chicago.
Your seat number is 1A, and it is confirmed.


------------------------------------------------------------


Dear Mr. James Jack

## Resources 
- https://docs.microsoft.com/en-us/samples/azure/azure-sdk-for-python/formrecognizer-samples/