In [1]:
import azure.cognitiveservices.speech as speechsdk
import os
from dotenv import load_dotenv
load_dotenv()


True

Set up the subscription info for the Speech Service:

In [2]:
speech_key, service_region = os.getenv('SPEECH_SERVICE_KEY'), "EastUS"

Create an instance of a speech config with specified subscription key and service region.
Replace with your own subscription key and service region (e.g., "westus").

In [3]:
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

Create a recognizer with the given settings. Since no explicit audio config is specified, the default microphone will be used (make sure the audio settings are correct).

### Transcribe file to text

In [11]:
# Replace with the path to your audio file
audio_file = "./TedTalk_Bill_Gates.wav"

# Creates an audio configuration that points to an audio file
audio_config = speechsdk.audio.AudioConfig(filename=audio_file)

# Creates a recognizer with the given settings
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

print("Recognizing speech from file...")
result = speech_recognizer.recognize_once()

# Prints the recognized text
print(result.text)


Recognizing speech from file...
I'm going to talk today about energy and climate, and that might seem a bit surprising because my full time work at the Foundation is mostly about vaccines and seeds, about the things that we need to invent and deliver to help the poorest 2 billion live better lives.


In [12]:
print(result)

SpeechRecognitionResult(result_id=d17f63f0408c46fd90f3aafe0a5c8ef9, text="I'm going to talk today about energy and climate, and that might seem a bit surprising because my full time work at the Foundation is mostly about vaccines and seeds, about the things that we need to invent and deliver to help the poorest 2 billion live better lives.", reason=ResultReason.RecognizedSpeech)


In [17]:


def sample_extractive_summarization(result):
    # [START extract_summary]
    import os
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.textanalytics import TextAnalyticsClient

    endpoint = os.environ["AZURE_LANGUAGE_ENDPOINT"]
    key = os.environ["AZURE_LANGUAGE_KEY"]

    text_analytics_client = TextAnalyticsClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key),
    )
    
    document = [result.text]
    poller = text_analytics_client.begin_extract_summary(document)
    extract_summary_results = poller.result()
    for result in extract_summary_results:
        if result.kind == "ExtractiveSummarization":
            print("Summary extracted: \n{}".format(
                " ".join([sentence.text for sentence in result.sentences]))
            )
        elif result.is_error is True:
            print("...Is an error with code '{}' and message '{}'".format(
                result.error.code, result.error.message
            ))
    # [END extract_summary]


if __name__ == "__main__":
    sample_extractive_summarization(result)


Summary extracted: 
I'm going to talk today about energy and climate, and that might seem a bit surprising because my full time work at the Foundation is mostly about vaccines and seeds, about the things that we need to invent and deliver to help the poorest 2 billion live better lives.


In [8]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient

In [16]:
form_endpoint = os.getenv('form_endpoint')
form_key = os.getenv('form_key')

In [22]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
document_analysis_client = DocumentAnalysisClient(
    endpoint=form_endpoint, credential=AzureKeyCredential(form_key)
)


In [34]:
model_id = "resumetest"
formUrl = "https://testlanguage1.blob.core.windows.net/testjo12345/New-York-Resume-Template-Creative.pdf?sp=r&st=2024-05-22T16:32:48Z&se=2024-05-23T00:32:48Z&spr=https&sv=2022-11-02&sr=b&sig=ovEfZiq4ewScGL2dewUmDs5DT1O2iuEDlUtVQmsP6g8%3D"

In [35]:
poller = document_analysis_client.begin_analyze_document_from_url(model_id, formUrl)
result = poller.result()

In [36]:
for idx, document in enumerate(result.documents):
    print("--------Analyzing document #{}--------".format(idx + 1))
    print("Document has type {}".format(document.doc_type))
    print("Document has confidence {}".format(document.confidence))
    print("Document was analyzed by model with ID {}".format(result.model_id))
    for name, field in document.fields.items():
        field_value = field.value if field.value else field.content
        print("......found field of type '{}' with value '{}' and with confidence {}".format(field.value_type, field_value, field.confidence))


--------Analyzing document #1--------
Document has type resumetest
Document has confidence 0.021
Document was analyzed by model with ID resumetest
......found field of type 'string' with value 'ROBERT COOPER' and with confidence 0.826
......found field of type 'date' with value '2020-01-01' and with confidence 0.651
......found field of type 'string' with value '1515 Pacific Ave Los Angeles, CA 90291 United States' and with confidence 0.247
......found field of type 'string' with value 'Security Guard' and with confidence 0.623
......found field of type 'string' with value 'S.A.F.E. Approach Level II Training, Hawaii Western College' and with confidence 0.031
......found field of type 'string' with value 'email@email.com' and with confidence 0.883
......found field of type 'string' with value '3868683442' and with confidence 0.216


In [37]:
for page in result.pages:
    print("\nLines found on page {}".format(page.page_number))
    for line in page.lines:
        print("...Line '{}'".format(line.content.encode('utf-8')))
    for word in page.words:
        print(
            "...Word '{}' has a confidence of {}".format(
                word.content.encode('utf-8'), word.confidence
            )
        )
    for selection_mark in page.selection_marks:
        print(
            "...Selection mark is '{}' and has a confidence of {}".format(
                selection_mark.state, selection_mark.confidence
            )
        )


Lines found on page 1
...Line 'b'ROBERT COOPER''
...Line 'b'SECURITY GUARD''
...Line 'b'\xc2\xb7 LOS ANGELES, CA 90291, UNITED STATES \xc2\xa2 3868683442''
...Line 'b'. DETAILS \xc2\xba''
...Line 'b'1515 Pacific Ave''
...Line 'b'Los Angeles, CA 90291''
...Line 'b'United States''
...Line 'b'3868683442''
...Line 'b'email@email.com''
...Line 'b'Place of birth''
...Line 'b'San Antonio''
...Line 'b'Driving license''
...Line 'b'Full''
...Line 'b'O LINKS \xc2\xba''
...Line 'b'LinkedIn''
...Line 'b'Pinterest''
...Line 'b'Resume Templates''
...Line 'b'Build this template''
...Line 'b'. SKILLS \xc2\xba''
...Line 'b'Investigation skills''
...Line 'b'Criminal justice knowledge''
...Line 'b'Safety compliance''
...Line 'b'Restraining devices''
...Line 'b'Martial arts/Physical combat''
...Line 'b'training''
...Line 'b'\xc2\xb7 HOBBIES \xc2\xba''
...Line 'b'Running, Mtb, Enduro''
...Line 'b'\xc2\xb7 LANGUAGES \xc2\xba''
...Line 'b'English''
...Line 'b'Spanish''
...Line 'b'Italian''
...Line 'b'PROFILE

In [38]:
for i, table in enumerate(result.tables):
    print("\nTable {} can be found on page:".format(i + 1))
    for region in table.bounding_regions:
        print("...{}".format(i + 1, region.page_number))
    for cell in table.cells:
        print(
            "...Cell[{}][{}] has content '{}'".format(
                cell.row_index, cell.column_index, cell.content.encode('utf-8')
            )
        )
print("-----------------------------------")


-----------------------------------
