# Azure AI Search Tutorial

## Setup

In [None]:
import asyncio
import datetime

from azure.core.credentials import AzureKeyCredential
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient
from azure.search.documents.indexes.models import (
    AzureOpenAIEmbeddingSkill,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    CognitiveServicesAccountKey,
    EntityRecognitionSkill,
    HnswAlgorithmConfiguration,
    HnswParameters,
    IndexingSchedule,
    IndexProjectionMode,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    ScoringProfile,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SearchIndexer,
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection,
    SearchIndexerIndexProjection,
    SearchIndexerIndexProjectionSelector,
    SearchIndexerIndexProjectionsParameters,
    SearchIndexerSkillset,
    SemanticConfiguration,
    SemanticField,
    SemanticPrioritizedFields,
    SemanticSearch,
    SplitSkill,
    TagScoringFunction,
    TagScoringParameters,
    TextWeights,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)
from azure.search.documents.models import VectorizedQuery
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.open_ai.services.azure_text_embedding import AzureTextEmbedding

In [None]:
# API endpoints and keys
# Azure OpenAI via Azure AI Studio
# setup:
# 1. create Azure AI Studio resource
# 2. model catalog -> deploy a language model
# 3. model catalog -> deploy a text embedding model
# 4. assign "AI Developer" role for the AI Search and AI Services managed identities (portal -> access control, IAM)
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = "gpt-4o"
AZURE_OPENAI_RESOURCE = "https://XXXXXXXXXXXXXXXXXXXX.openai.azure.com"
AZURE_OPENAI_ENDPOINT = "https://XXXXXXXXXXXXXXXXXXXX.openai.azure.com/XXXXXXXXXXXXXXXXXXXX"
AZURE_OPENAI_API_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
AZURE_EMBEDDING_DEPLOYMENT_NAME = "text-embedding-3-small"
AZURE_EMBEDDING_ENDPOINT = "https://XXXXXXXXXXXXXXXXXXXX.openai.azure.com/XXXXXXXXXXXXXXXXXXXX"
# Azure AI Search
# setup:
# 1. create AI Search resource
# 2. turn on system managed identity (portal -> resource management -> identity)
AZURE_SEARCH_ENDPOINT = "https://XXXXXXXXXXXXXXXXXXXX.search.windows.net"
AZURE_SEARCH_API_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
# Azure Storage
# setup:
# 1. create storage account
# 2. create storage container
# 3. assign "Storage Blob Contributor" role for AI Search and AI Services managed identities (portal -> access control, IAM)
AZURE_STORRAGE_CONNECTION_STRING = "DefaultEndpointsProtocol=https;AccountName=XXXXXXXXXXXXXXXXXXXX;AccountKey=XXXXXXXXXXXXXXXXXXXX;EndpointSuffix=core.windows.net"
AZURE_STORAGE_CONTAINER_NAME = "XXXXXXXXXXXXXXXXXXXX"
# AI Services
# setup:
# 1. create Azure AI Services multi-service account (must be same region)
# 2. turn on system managed identity  (portal -> resource management -> identity)
AZURE_AI_SERVICES_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

In [None]:
# semantic kernel setup
kernel = Kernel()
chat_service_id = AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
azure_openai_chat_gpt35 = AzureChatCompletion(
    service_id=chat_service_id,
    deployment_name=AZURE_OPENAI_CHAT_DEPLOYMENT_NAME,
    endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
)
embedding_service_id = AZURE_EMBEDDING_DEPLOYMENT_NAME
embedding_gen = AzureTextEmbedding(
    service_id=embedding_service_id,
    deployment_name=AZURE_EMBEDDING_DEPLOYMENT_NAME,
    endpoint=AZURE_EMBEDDING_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
)
kernel.add_service(azure_openai_chat_gpt35)
kernel.add_service(embedding_gen)
print(kernel.services)

In [None]:
async def get_text_embedding(text: str) -> list[float]:
    embedding = await embedding_gen.generate_embeddings(text)
    return list(embedding[0])

## Index Schema

In [None]:
# schema
fields = [
    SearchField(name="id", type=SearchFieldDataType.String),
    SearchField(name="title", type=SearchFieldDataType.String),
    SearchField(name="source", type=SearchFieldDataType.String),
    SearchField(name="url", type=SearchFieldDataType.String),
    SearchField(
        name="chunk_id",
        type=SearchFieldDataType.String,
        key=True,
        sortable=True,
        filterable=True,
        facetable=True,
        analyzer_name="keyword",
    ),
    SearchField(
        name="chunk",
        type=SearchFieldDataType.String,
        sortable=False,
        filterable=False,
        facetable=False,
    ),
    SearchField(
        name="embedding",
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
        searchable=True,
        vector_search_dimensions=1536,
        vector_search_profile_name="embedding_profile",
    ),
]
# vector search
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(name="hnsw_config"),
    ],
    profiles=[
        VectorSearchProfile(
            name="embedding_profile",
            algorithm_configuration_name="hnsw_config",
        )
    ],
)
# semantic search
semantic_config = SemanticConfiguration(
    name="semantic_config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[
            SemanticField(field_name="title"),
            SemanticField(field_name="source"),
            SemanticField(field_name="id"),
        ],
        content_fields=[SemanticField(field_name="chunk")],
    ),
)
semantic_search = SemanticSearch(configurations=[semantic_config])
# create index
index_name = "testindex"
index = SearchIndex(
    name=index_name, fields=fields, vector_search=vector_search, semantic_search=semantic_search
)
index_client = SearchIndexClient(
    endpoint=AZURE_SEARCH_ENDPOINT, credential=AzureKeyCredential(AZURE_SEARCH_API_KEY)
)
index_client.create_or_update_index(index)

## Document Chunking and Embedding

In [None]:
documents = [
    {
        "id": "0",
        "source": "Wikipedia",
        "title": "Notre Dame Fighting Irish football",
        "url": "https://en.wikipedia.org/wiki/Notre_Dame_Fighting_Irish_football",
        "content": """The Notre Dame Fighting Irish football team is the college football team representing the University of Notre Dame in Notre Dame, Indiana, north of the city of South Bend, Indiana. The team plays its home games at the campus's Notre Dame Stadium, which has a capacity of 77,622. Notre Dame is one of three schools that competes as an Independent at the National Collegiate Athletic Association (NCAA) Football Bowl Subdivision (FBS) level; however, they play five games a year against opponents from the Atlantic Coast Conference (ACC), of which Notre Dame is a member in all other sports except ice hockey.[5] The Fighting Irish are among the most prestigious college football teams of all time. Since their inaugural season in 1887, Notre Dame has claimed 11 national championships, including 8 from the major wire-service: AP Poll and/or Coaches' Poll.[6] Seven Notre Dame players have won the Heisman Trophy. Notre Dame has 948 official victories, with an additional 21 having been vacated by the NCAA in 2016 for self-reported academic misconduct.[7] Notre Dame has had 22 undefeated seasons including 12 perfect seasons.[8] Notre Dame home games have been televised by NBC since 1991.[9][10]""",
    },
    {
        "id": "1",
        "source": "Wikipedia",
        "title": "Northwestern Wildcats football",
        "url": "https://en.wikipedia.org/wiki/Northwestern_Wildcats_football",
        "content": """The Northwestern Wildcats football team represents Northwestern University as an NCAA Division I college football team and member of the Big Ten Conference based near Chicago in Evanston, Illinois. Founded in 1851, Northwestern began playing football in 1882. Its football mascot is the Wildcat, a term coined by a Chicago Tribune reporter in 1924, after reporting on a football game where the players appeared as "a wall of purple wildcats".[2] Northwestern Football is also marketed as "Chicago's Big Ten Team" with its proximity and ties to Chicago.[3] The Wildcats have won three Big Ten championships or co-championships since 1995, and have been "bowl eligible" five times between 2015 and 2020. Northwestern consistently ranks among the national leaders in graduation rate among football teams, having received the AFCA Academic Achievement Award four times since 2002.[4] The Wildcats first played their home games at Northwestern Field, which was replaced by Ryan Field (formerly Dyche Stadium) in 1926.""",
    },
    {
        "id": "2",
        "source": "Wikipedia",
        "title": "Notre-Dame de Paris",
        "url": "https://en.wikipedia.org/wiki/Notre-Dame_de_Paris",
        "content": """Notre-Dame de Paris (French: [nɔtʁ(ə) dam də paʁi] ⓘ; meaning "Our Lady of Paris"), often referred to simply as Notre-Dame,[a] is a medieval Catholic cathedral on the Île de la Cité (an island in the River Seine), in the 4th arrondissement of Paris, France. The cathedral, dedicated to the Virgin Mary, is considered one of the finest examples of French Gothic architecture. Several attributes set it apart from the earlier Romanesque style, particularly its pioneering use of the rib vault and flying buttress, its enormous and colourful rose windows, and the naturalism and abundance of its sculptural decoration.[6] Notre-Dame also stands out for its three pipe organs (one historic) and its immense church bells.[7] Built during the medieval era, construction of the cathedral began in 1163 under Bishop Maurice de Sully and was largely completed by 1260, though it was modified in succeeding centuries. In the 1790s, during the French Revolution, Notre-Dame suffered extensive desecration; much of its religious imagery was damaged or destroyed. In the 19th century, the coronation of Napoleon and the funerals of many of the French Republic's presidents took place at the cathedral. The 1831 publication of Victor Hugo's novel Notre-Dame de Paris (in English: The Hunchback of Notre-Dame) inspired interest which led to restoration between 1844 and 1864, supervised by Eugène Viollet-le-Duc. On 26 August 1944, the Liberation of Paris from German occupation was celebrated in Notre-Dame with the singing of the Magnificat. Beginning in 1963, the cathedral's façade was cleaned of soot and grime. Another cleaning and restoration project was carried out between 1991 and 2000.[8] A fire in April 2019 caused serious damage and forced the cathedral to close for five years; it is planned to reopen on 8 December 2024. The cathedral is a widely recognized symbol of the city of Paris and the French nation. In 1805, it was awarded honorary status as a minor basilica. As the cathedral of the archdiocese of Paris, Notre-Dame contains the cathedra of the archbishop of Paris (currently Laurent Ulrich). In the early 21st century, approximately 12 million people visited Notre-Dame annually, making it the most visited monument in Paris.[9] The cathedral is renowned for its Lent sermons, a tradition founded in the 1830s by the Dominican Jean-Baptiste Henri Lacordaire. These sermons have increasingly been given by leading public figures or government-employed academics. Over time, the cathedral has gradually been stripped of many decorations and artworks. However, the cathedral still contains Gothic, Baroque, and 19th-century sculptures, 17th- and early 18th-century altarpieces, and some of the most important relics in Christendom – including the Crown of Thorns, and a sliver and nail from the True Cross.""",
    },
    {
        "id": "3",
        "source": "Wikipedia",
        "title": "Van Halen",
        "url": "https://en.wikipedia.org/wiki/Van_Halen",
        "content": """Van Halen (/væn ˈheɪlən/ van HAY-len) was an American rock band formed in Pasadena, California, in 1972. Credited with restoring hard rock to the forefront of the music scene,[1] Van Halen was known for their energetic live performances[2] and for the virtuosity of its guitarist, Eddie Van Halen.[3][4] The band was inducted into the Rock and Roll Hall of Fame in 2007. From 1974 to 1985, Van Halen consisted of Eddie Van Halen, his brother, drummer Alex Van Halen, lead vocalist David Lee Roth, and bassist Michael Anthony.[5] Upon its release in 1978, the band's self-titled debut album reached No. 19 on the Billboard 200 and would sell over 10 million copies in the United States, achieving a Diamond certification by the Recording Industry Association of America (RIAA). By 1982, the band released four more albums: Van Halen II (1979), Women and Children First (1980), Fair Warning (1981), and Diver Down (1982), all of which have since been certified multi-platinum. By the early 1980s, Van Halen was among the most commercially successful rock acts.[6] The album 1984, released in the eponymous year, was a commercial success with U.S. sales of 10 million copies and four successful singles. Its lead single, "Jump", was the band's only number one single on the Billboard Hot 100. In 1985, Roth left the band to embark on a solo career and was replaced by former Montrose lead vocalist Sammy Hagar. With Hagar, the group released four U.S. number-one, multi-platinum albums over the course of 11 years: 5150 in 1986, OU812 in 1988, For Unlawful Carnal Knowledge in 1991, and Balance in 1995. The group also released a double-platinum live album, Live: Right Here, Right Now, in 1993. Hagar left the band in 1996 shortly before the release of the band's first greatest hits collection, Best Of – Volume I. Former Extreme frontman Gary Cherone replaced Hagar and recorded the commercially unsuccessful album Van Halen III with the band in 1998, before parting ways in 1999. Van Halen then went on hiatus until reuniting with Hagar in 2003 for a worldwide tour in 2004 and the double-disc greatest hits collection, The Best of Both Worlds (2004). Hagar again left Van Halen in 2005. Roth returned in 2006, but Anthony was replaced on bass guitar by Eddie's son, Wolfgang Van Halen. In 2012, the band released their final studio album, A Different Kind of Truth, which was commercially and critically successful. It was also Van Halen's first album with Roth in 28 years and the only one to feature Wolfgang. Eddie was diagnosed with cancer in 2001, and died of the disease on October 6, 2020.[7][8][9] A month after his father's death, Wolfgang confirmed that Van Halen had disbanded.[10] As of March 2019, Van Halen is 20th on the RIAA's list of best-selling artists in the United States; the band has sold 56 million albums in the U.S.[11][12] and more than 80 million worldwide, making them one of the best-selling groups of all time.[13][14][15] As of 2007, Van Halen is one of only five rock bands with two studio albums to sell more than 10 million copies in the United States[16] and is tied for the most multi-platinum albums by an American band. Additionally, Van Halen has charted 13 number-one hits on Billboard's Mainstream Rock chart. VH1 ranked the band seventh on its list of the "100 Greatest Hard Rock Artists".[17] History 1972–1977: Formation and early history The Van Halen brothers were born in Amsterdam, the Netherlands, Alex Van Halen in 1953 and Eddie Van Halen in 1955,[18] sons to Dutch musician Jan Van Halen and Indonesian-born Indo Eugenia Van Beers. The family moved to Pasadena, California, in 1962. Young Edward began learning classical piano by ear, and became so proficient he won an annual piano recital contest 2 or 3 years in a row, despite never mastering sight-reading sheet music. The brothers began playing music together in the 1960s, with Eddie on drums and Alex on guitar. However, while Ed was delivering newspapers to pay off his drum set, Alex secretly developed a passion and proficiency at them. Eventually, out of frustration and brotherly competition, Ed told Alex, "OK, you play drums and I'll play your guitar."[19] The Van Halen brothers formed their first band, the Broken Combs, in 1964. As they gained popularity playing backyard parties and local high school functions, they changed their name first to the Trojan Rubber Co, then in 1972 to Genesis, later still to Mammoth when they discovered Genesis was already in use by a major-label British band. At this time the band included Eddie on both vocals and lead guitar and friend Mark Stone on bass. They rented a sound-system from Indiana-born Pasadena transplant David Lee Roth for $10 per night. The loquacious, worldly, energetic son of a local ophthalmologist, Roth fronted a local R&B influenced rock band the Red Ball Jets. Roth's uncle Manny owned NYC's Bleecker street Cafe Wha? until 1968. Partly to save money, they now invited Roth to join as their lead vocalist despite previous unconvincing audition(s).[20] Ultimately Roth's charismatic "Jim Dandy" approach would be both an artistic foil to Eddie's circumspect, guitar prodigy talents as well as allowing Eddie to focus his energies on song composition. Van Halen performing at La Cañada High School in 1975. In 1973, Mammoth officially changed its name to Van Halen.[5] According to Roth,[21] this was his brainchild. He felt it was a name that held long-term identity, artistic and marketing advantages, like Santana. They continued to play Pasadena, San Bernardino, and Venice at clubs, festivals, backyard parties and city parks like Hamilton, drawing up to 2000 people. Traffic jams and noise complaints to the local police often ensued, as far away as San Pedro.[22] Van Halen subsequently played clubs in Los Angeles and West Hollywood to growing audiences, increasing their popularity entirely through self-promotion, passing out flyers at local high schools. This tenacious self-promotion soon built them an auspicious, loyal, area following.[20] Flyer handed out at La Cañada High School show. Ed playing an Ibanez Destroyer. By 1974, Roth had been in the band for about a year, and they decided to replace the ambivalent Stone, who was unsure about a career in music. Michael Anthony Sobolewski, a Pasadena college music-classmate of Eddie's, joined the group after an all-night jam session. He had sung and played bass in a number of less successful Arcadia backyard-party bands, including Snake. Although he was hesitant, his own Snake-bandmate encouraged him to seize this opportunity.[20] Also in 1974, the band had a major break when it was hired to play regularly at the Sunset Strip club Gazzarri's. The Doors had also "broken" there in the late 1960s. Owner Bill Gazzarri previously claimed VH was too loud. However, their new managers, Mark Algorri and Mario Miranda, took over the club's hiring and booked them through 1976.[citation needed] By the Spring of 1975, they were also the regular Tuesday night band at Myron's Ballroom.[23] They had succeeded in becoming a staple of the Los Angeles music scene during the mid-1970s, playing at well-known clubs like the Whisky a Go Go on Sunset Strip.[24] All the club gig success led naturally to the need for a demo tape, which was recorded at Cherokee Studios in Northridge where Steely Dan had recently completed an LP. According to a January 4, 1977, L.A. Times article by Robert Hilburn,[25] entitled "HOMEGROWN PUNK", Rodney Bingenheimer saw Van Halen at Gazzarri's in the summer of 1976, and enticed Gene Simmons of Kiss to see them. Impressed to action, Simmons produced a 29-track Van Halen demo tape, entitled "Zero" at Village Recorder studios in Los Angeles and with post-production overdubs completed at Electric Lady Studios in New York.[19] Simmons even suggested changing their name to "Daddy Longlegs." However, a very disappointed Simmons could do no more once Kiss management opined that VH "had no chance of making it".[26] 1977–1985: Breakthrough and initial success with David Lee Roth Doug Messenger, Van Morrison's band leader guitarist, knowing that Ted Templeman was looking for a "guitar hero" act, had seen Van Halen at the Starwood in Hollywood and placed a number of calls to Warner Bros. Records for Ted to check them out. "I don't know if it was 4 calls or 10, but I knew this was exactly the act Ted wanted. So on a horrendously rainy night in mid-1977", Warner Bros. executive Mo Ostin and producer Ted Templeman saw Van Halen perform at the Starwood in Hollywood.[27] According to a December 1977 story in the Los Angeles Times, it was Van Halen's first booking at the Starwood and the first time they hired their own roadies. "We wanted to come on with a little class and we couldn't be seen setting up our own stuff in Hollywood," explained Roth.[28] Although the audience was negligible — Messenger claims only a barmaid and himself were there until Ostin and Templeman arrived — the Warner Bros. reps were so impressed that they wrote a letter of intent on a napkin, and within a week met at a local diner with the band, their future manager Marshall Berle (nephew of comedian Milton Berle) and Warner touring manager Noel Monk, who had just guided the Sex Pistols across the United States. According to Noel Monk's book, the band's car had broken down enroute to the meeting at the diner, and rather than leave the Warner Bros. reps waiting and appear to be an irresponsible band, the members of Van Halen actually ran the remaining distance of several miles to arrive only slightly late .[29] Warner offered the band a rather basic two album recording contract, one that heavily favored Warner, paying the four young men only $0.70 per unit (album) sold, a deal that would leave the band over $1 million in debt at the conclusion of their first supporting tour as the opening act for Journey and Ronnie Montrose.[30] The group recorded their debut album at Sunset Sound Recorders studio from mid-September to early October 1977, recording guitar parts for one week and then vocals for two additional. All of the tracks were laid down with little over-dubbing or multi-tracking. Minor mistakes were purposely left on the record and a very rudimentary instrument set-up was used to give the record a live feel. During this time, they continued to play various venues in Southern California, including some notable concerts at the Pasadena Convention Center produced by their promoter and impresario, Steve Tortomasi, himself a fixture in the local rock and roll scene. Upon its release, Van Halen reached No. 19 on the Billboard pop music charts, one of rock's most commercially successful debuts.[31] It was highly regarded as both a heavy metal and hard rock album.[32] The album included songs now regarded as Van Halen classics, like "Runnin' with the Devil" and the guitar solo "Eruption", which showcased Eddie's use of a technique known as "finger-tapping", leading into what became the band's first single, a cover of "You Really Got Me". The band toured for 9 months more, opening for Black Sabbath and establishing a reputation for their performances.[33] The band's chemistry was based on Eddie Van Halen's guitar technique and David Lee Roth's charisma. The band returned to the studio for 2 weeks, in late 1978, to record Van Halen II, a 1979 LP similar in style to their debut. This record yielded the band's first hit single, "Dance the Night Away", which peaked at 15 on BB Hot 100. Over the next four years, the band toured non stop, never taking more than 2 weeks to record an album. Their album Women and Children First was released in 1980, and further cemented Van Halen's platinum-selling status to Warner Bros. It yielded two hit singles, "And the Cradle Will Rock..." and "Everybody Wants Some!!". For the first time, an amplified Wurlitzer electric piano was used to complement Ed's guitar. In 1981, during the recording of Fair Warning, Eddie's desire for darker, more complex songs in minor keys was at odds with Roth's pop tastes and style. Nonetheless, Roth and veteran Warner Bros. rock producer Ted Templeman acquiesced to Eddie's wishes on this album. Doug Messenger recalled how Ed and engineer Don Landee rerecorded the "Unchained" solo hours after Ted "stormed out of" the studio. This darker album only reached platinum status after $250,000 of payola pushed it up nationwide from 400k copies.[33] Planning to release a cover single, then take a hiatus, Roth and Ed agreed upon a remake of the 1960s Roy Orbison song "Oh, Pretty Woman", which peaked at 12 on BB Top 100. "Oh, Pretty Woman"'s comical video helped its immediate success, but was also banned by MTV. Due to much pressure from Warner Bros., the hiatus was canceled and the Diver Down LP was squeezed out, again, within 2 weeks time.[33] Roth's preference for pop covers prevailed this time and with Ed's synthesizer and guitar riffs Diver Down charted much better. The band then earned a spot in the Guinness Book of World Records for the highest-paid single appearance of a band: $1.5 million for a 90-minute set at Steve Wozniak's 1983 US Festival,[34] a show that both Noel Monk and Doug Messenger considered artistically a disaster, Roth being imbibed on alcohol to the point of forgetting lyrics.[33] Despite this return to form, Roth and Eddie's differences continued, and this caused friction with other band members. Billy Sheehan, after his band Talas completed a tour with Van Halen, claims he was approached by Eddie to replace Michael Anthony; the reasons for this were never completely clear to Sheehan, as nothing came of it.[35] During this time, Eddie contributed the score and instrumental songs to the movie The Wild Life.[36] The score was laden with drum machine and hinted at sounds and riffs that would come with their next two albums, 1984 and 5150. 1984 (released on January 9, 1984) was a commercial success, going five-times platinum after a year of release.[37] Recorded at Eddie's newly built 5150 Studios, the album featured keyboards, which had only been used sporadically on previous albums. The lead single, "Jump", featured a synthesizer hook and anthemic lyrics inspired by news coverage of a suicidal jumper. It became the band's first and only No. 1 pop hit with Roth, garnering them a Grammy nomination.[38] Following the 1984 Tour, Roth decided to quit and form a new band. Group members have given different reasons for the split, but all were firmly rooted in control of the band's sound, artistic direction, singles released and pace. Roth was concerned about Eddie playing music outside of Van Halen. Roth was also launching a successful solo career with two hit cover songs off his Crazy from the Heat EP, a remake of the Beach Boys' classic "California Girls" (#3 U.S.) and a pairing of the classic Al Jolson standard "Just a Gigolo" and "I Ain't Got Nobody"(#12 U.S.), which had previously been paired together by Louis Prima. Roth was also offered a $20-million film deal for a script titled Crazy from the Heat. Roth hoped Van Halen would contribute the soundtrack; however, the film deal fell through when CBS Pictures was reorganized in 1986. 1985–1996: Sammy Hagar era The introduction of Sammy Hagar (pictured in 2005) as vocalist continued the band's worldwide popularity. Eddie invited Patty Smyth of Scandal to replace Roth, but she declined.[43] Daryl Hall was also offered the lead vocal position in 1985, but also declined. Hall verified to Hagar, his musical guest in the May 2015 season premiere of Live from Daryl's House, that he was approached after a Hall & Oates concert.[44] Eddie was introduced to Sammy Hagar in 1985, via their mutual Ferrari mechanic. Hagar was the former frontman for the hard rock group Montrose, and now a solo artist coming off a very successful year. His hit single "I Can't Drive 55" came from his 1984 album VOA, produced by Ted Templeman, who had also produced Montrose's first album Montrose, as well as all of Van Halen's albums up to that point. Hagar agreed to sing as well as play rhythm guitar. When Warner Bros. president Mo Ostin came to the band's 5150 Studios to hear the band's progress, Hagar said the band played "Why Can't This Be Love" live with Eddie on keyboards, after which Ostin proclaimed: "I smell money."[45] The 1986 Van Halen album 5150 was a huge hit, becoming the band's first No. 1 album on the Billboard charts, driven by the keyboard-dominated singles "Why Can't This Be Love" (#3 U.S.), "Dreams", and "Love Walks In" (Top 30 U.S.). To further introduce the new era for the band, a new Van Halen logo was put on the cover. The new logo retained elements of the original, but now the lines extending from either side of 'VH' wrapped around and formed a ring. Following the release of the 5150 album, the "5150 Tour" was launched to support it across North America. Footage was released on VHS and Laserdisc as Live Without a Net. The band minimized the use of pre-Hagar Van Halen songs in the set.[46] All four studio albums produced during this period reached No. 1 on the Billboard pop music charts and 17 singles breached the top 12 of the mainstream rock tracks chart. During that era, a single taken from 1988's OU812, "When It's Love", reached the Top Five, peaking at No. 5. In addition, Van Halen was nominated for two Grammy awards. The band won the 1992 Grammy Award for Best Hard Rock Performance with Vocal for the album For Unlawful Carnal Knowledge.[47] Van Halen continued to enjoy success throughout the mid-1990s. They recorded a live album and concert film at two 1992 F.U.C.K. tour shows in Fresno, California called Live: Right Here, Right Now. During the F.U.C.K. and the live album supporting Live: Right Here Right Now tour, Night Ranger's Alan Fitzgerald played keyboards offstage every night allowing Eddie to concentrate on guitar. Fitzgerald would return to play offstage keyboards on the 2004, 2007, and 2012 tours. In 1995, Van Halen released the album Balance and supported Bon Jovi on their European Summer stadium tour. The Balance Tour was nicknamed "Ambulance Tour" by the band due to an amount of physical ailments, as Hagar had throat problems during the first concerts, Eddie suffered a hip injury caused by avascular necrosis, and Alex wore a neck brace due to ruptured vertebrae.[48][49] During the recording of songs for the film Twister, escalating tension between Hagar and the Van Halen brothers boiled over[50] and Hagar departed on Father's Day, 1996.[51] Hagar would claim he was fired, and Eddie would claim Hagar quit. The band had recorded "Humans Being", a song for which Eddie, unhappy with Hagar's lyrics, retitled the song and wrote the melody.[52] This upset Hagar,[52] and when they were to record a second song for the soundtrack, Hagar was in Hawaii for the birth of his child. It was not an easy birth as the baby was breech, so it needed to be delivered via C-section.[53] With Hagar back in Hawaii and against the idea of doing the project,[52] but having another song left to contribute, the Van Halen brothers alone recorded the instrumental "Respect the Wind". The performance, which featured Eddie playing guitar and Alex playing keyboards, was nominated for Best Rock Instrumental Performance at the 39th Annual Grammy Awards.[54] The band was also working on a compilation album. This led to conflicts with Hagar and the group's new manager, Ray Danniels (Ed Leffler's replacement and Alex Van Halen's former brother-in-law), even though it was Leffler who had renewed their contract with Warner Bros. Records and had added in the Best Of album option years before. Hagar was reluctant to work on a compilation album before a new album came out, but if the rest of the band and Danniels insisted on going forward with one at that time, his preference was that it should include only Roth-era songs, or as a third choice, that two volumes should be released, one of Roth-era songs and one of Hagar-era songs. During this same period, competing personal priorities and creative differences contributed to increasing interpersonal tensions within the band, particularly between Eddie and Hagar. The relationship between Hagar and Van Halen broke down.[52] 1996: Temporary reunion with Roth David Lee Roth called Eddie to discuss what tracks would be included on a planned Van Halen compilation (work on which had actually begun before Hagar's departure). They got along well, and Eddie invited him up to his house/studio. Shortly afterwards, Roth re-entered the studio with the band and producer Glen Ballard. Two songs from those sessions were added to the band's Best Of – Volume I album and released as singles to promote it. In September, Van Halen was asked to present an award at the 1996 MTV Video Music Awards. They agreed, and on September 4, 1996, the four original members of Van Halen made their first public appearance together in over eleven years. This helped to bring the compilation to No. 1 on the U.S. album charts. However, unknown to Roth, Eddie and Alex were still auditioning other singers, including Mitch Malloy.[55][56] The band's appearance on the 1996 MTV Video Music Awards fueled reunion speculation. But several weeks after the awards show, it was discovered that Roth was out of Van Halen again. Roth released a statement that Van Halen misled him into thinking they were seriously considering bringing him back into the band and that he had made clear to them beforehand that he did not want to do the awards show unless they were actually reuniting.[57] The next day, the Van Halen brothers and Anthony released their own statement, denying they had in any way led Roth to believe they were planning to bring him back into the band.[58] Eddie later recounted that at the MTV Video Music Awards appearance, he was embarrassed and outraged by Roth's antics while on camera behind Beck, who was giving an acceptance speech for the award that Van Halen had presented to him. At a backstage press conference, press queries about a reunion tour were met with Eddie saying that he needed a hip replacement and would have to record an entire new studio album before any tour. Roth told Eddie to avoid talking about negative things like his hip and the two almost came to blows.[59] 1996–2000: Gary Cherone era Vocalist Gary Cherone (pictured in 2008) joined the band briefly in the late 1990s. Van Halen's next lead singer was Gary Cherone, former frontman of the Boston-based band Extreme, a group which had enjoyed some popular success in the early 1990s.[60] The result was the album Van Halen III. Many songs were longer and more experimental than Van Halen's earlier work. It was a notable contrast from their previous material, with more focus on ballads than traditional rock songs ("How Many Say I", with Eddie on vocals). Sales were poor by the band's standards, only reaching a Gold certification, despite the album peaking at No. 4 on the U.S. charts. However, Van Halen III did produce the hit "Without You", and another album track, "Fire in the Hole", appeared on the Lethal Weapon 4 soundtrack. The album was followed by a tour. The III Tour saw Van Halen playing in new countries, including first ever visits to Australia and New Zealand. "Without You" acquired No. 1 place on the Billboard Mainstream Rock Charts in 1998, the 13th song of theirs to do so. This made them the band with the most Mainstream Rock No. 1s.[61] Van Halen returned to the studio and in early 1999 started work on a new album. For the sessions, they brought on Danny Kortchmar to produce.[62][63] Working titles of songs included "Left for Dead", "River Wide", "Say Uncle", "You Wear it Well", "More Than Yesterday", "I Don't Miss You ... Much", "Love Divine", and "From Here, Where Do We Go?"; more than 20 songs were rumored to have been written.[64] The project was never released, with Cherone leaving the band amicably in November 1999, citing musical differences and personal issues that he was going through.[65] None of the material from these sessions has been released, and in fact the band released no new material until three new songs written and recorded with Hagar were included on the 2004 Best of Both Worlds compilation. Cherone later stated that he believed if he and the band had toured first and then recorded an album they might have creatively gelled more and the album would have turned out better. Touring with Cherone had proven disappointing in terms of attendance. Eddie later admitted that Warner Bros. had forced his hand in parting with Cherone.[citation needed] Unlike with the previous two singers, there was reportedly no bad blood behind the breakup, and Cherone remained in contact and on good terms with Van Halen.[citation needed] As when Hagar left, speculation resumed on a Roth reunion.""",
    },
    {
        "id": "4",
        "source": "Wikipedia",
        "title": "Israel–Hamas war",
        "url": "https://en.wikipedia.org/wiki/Israel%E2%80%93Hamas_war",
        "content": """An armed conflict between Israel and Hamas-led Palestinian militant groups[x] has been taking place in the Gaza Strip and Israel since 7 October 2023. It is the fifth war of the Gaza–Israel conflict since 2008, and the most significant military engagement in the region since the Yom Kippur War in 1973.[138] It is the deadliest war for Palestinians in the history of the Israeli–Palestinian conflict.[139] The war began when Hamas-led militant groups launched a surprise attack on Israel on 7 October, which involved a rocket barrage and a few thousand militants breaching the Gaza–Israel barrier, attacking Israeli civilian communities and military bases. During this attack, 1,195 Israelis and foreign nationals were killed, including 815 civilians.[94][y] In addition, 251 Israelis and foreigners were taken captive into Gaza, with the stated goal to force Israel to release Palestinian prisoners and detainees.[140][141] Hamas said its attack was in response to Israel's continued occupation, blockade of Gaza, expansion of settlements, Israel's disregard for international law, as well as alleged threats to the Al-Aqsa Mosque and the general plight of Palestinians.[142][143][144] After clearing militants from its territory, Israel launched one of the most destructive bombing campaigns in modern history[145][146] and invaded Gaza on 27 October with the stated objectives of destroying Hamas and freeing hostages.[147][148] Since the start of the Israeli invasion, over 40,000 Palestinians in Gaza have been killed,[z] more than half of them women and children.[32][149][150] Israel's tightened blockade cut off basic necessities and attacks on infrastructure have destroyed Gaza's healthcare system and caused an impending famine as of February 2024.[151][152][153] By early 2024, Israeli forces had destroyed or damaged more than half of Gaza's houses,[154] at least a third of its tree cover and farmland,[155][156] most of its schools and universities,[157][158] hundreds of cultural landmarks,[159] and at least a dozen cemeteries.[160] Nearly all of the strip's 2.3 million Palestinian population have been forcibly displaced.[161][162] Over 100,000 Israelis were internally displaced as of February 2024.[163] The war continues to have significant regional and international repercussions. Large, primarily pro-Palestinian protests have taken place across the world, calling for a ceasefire. The International Court of Justice is reviewing a case accusing Israel of committing genocide in Gaza.[164] The United States has given Israel extensive military aid and vetoed multiple UN Security Council ceasefire resolutions.[165] Groups of the Axis of Resistance have attacked American military bases in the Middle East. Additionally, the Yemeni Houthi movement have engaged in attacks in the Red Sea on commercial vessels allegedly linked to Israel, incurring a US-led military response.[166] The ongoing exchange of strikes between Lebanon's Hezbollah and Israel escalated into an Israeli invasion of Lebanon on 1 October 2024.[167] The 1948 Palestine war saw the establishment of Israel over most of what had been Mandatory Palestine, with the exception of two separated territories that became known as the West Bank and the Gaza Strip, which were held by Jordan and Egypt respectively. Following the 1967 Six-Day War, Israel occupied the Palestinian territories of the West Bank and the Gaza Strip.[138] The upcoming period witnessed two popular uprisings by Palestinians against the Israeli occupation; the First and Second Intifadas in 1987 and 2000 respectively,[169] with the latter's end seeing Israel's unilateral withdrawal from Gaza in 2005.[170][171] Since 2007, the Gaza Strip has been governed by Hamas, an Islamist militant group, while the West Bank remained under the control of the Fatah-led Palestinian Authority. After Hamas' takeover, Israel imposed a blockade of the Gaza Strip,[172][173] that significantly damaged its economy.[174] The blockade was justified by Israel citing security concerns,[175] but international rights groups have characterized the blockade as a form of collective punishment.[176][177][178] Due to the Israeli blockade of the Gaza Strip, UNRWA reported that 81% of people were living below the poverty level in 2023, with 63% being food insecure and dependent on international assistance.[168][179] Since 2007, Israel and Hamas, along with other Palestinian militant groups based in Gaza, have engaged in conflict,[175][173][180] including in four wars in 2008–2009, 2012, 2014, and 2021.[181][182] These conflicts killed approximately 6,400 Palestinians and 300 Israelis.[183][105][168] In 2018–2019, there were large weekly organized protests near the Gaza-Israel border, which were violently suppressed by Israel, whose forces killed hundreds and injured thousands of Palestinians by sniper fire.[184][185] Soon after the 2021 Israel–Palestine crisis began, Hamas' military wing, the Al-Qassam Brigades, started planning the 7 October 2023 operation against Israel.[186][187] Hamas officials stated that the attack was a response to the Israeli occupation, blockade of the Gaza Strip, Israeli settler violence against Palestinians, restrictions on the movement of Palestinians, and imprisonment of thousands of Palestinians, whom Hamas sought to release by taking Israeli hostages.[142][143][188] Numerous commentators have identified the broader context of Israeli occupation as a cause of the war.[189][190][191] Deputy political leader Saleh al-Arouri said that Hamas believed Israel was planning to launch an attack on the West Bank and Gaza after the High Holy Days.[192][193] The Associated Press wrote that Palestinians are "in despair over a never-ending occupation in the West Bank and suffocating blockade of Gaza".[194] Several human rights organizations, including Amnesty International,[195] B'Tselem[196] and Human Rights Watch[197] have likened the Israeli occupation to apartheid, although supporters of Israel dispute this characterization.[198][199] An advisory opinion by the International Court of Justice published in July 2024 declared the occupation illegal and said it violated Article 3 of the International Convention on the Elimination of All Forms of Racial Discrimination, which prohibits racial segregation and apartheid.[200]""",
    },
]

In [None]:
def chunk_text_with_overlap(text: str, chunk_size: int = 2500, overlap_percent: int = 25):
    """
    2500 characters is approximately 500 tokens
    """
    overlap_size = int(chunk_size * overlap_percent / 100)
    chunks = []
    for i in range(0, len(text), chunk_size - overlap_size):
        chunk = text[i : i + chunk_size]
        chunks.append(chunk)
        if len(chunk) < chunk_size:
            break
    return chunks

In [None]:
processed_documents = []
for each_document in documents:
    chunks = chunk_text_with_overlap(each_document["content"])
    each_chunk_id = 0
    for each_chunk in chunks:
        each_chunk_embedding = await get_text_embedding(each_chunk)
        each_processed_document = {
            "id": each_document["id"],
            "source": each_document["source"],
            "title": each_document["title"],
            "url": each_document["url"],
            "chunk": each_chunk,
            "chunk_id": f"{each_document["id"]}-{each_chunk_id}",
            "embedding": each_chunk_embedding,
        }
        each_chunk_id += 1
        processed_documents.append(each_processed_document)
print(len(processed_documents))

## Upload Documents 

In [None]:
# upload processed documents
search_client = SearchClient(
    AZURE_SEARCH_ENDPOINT,
    index_name,
    credential=AzureKeyCredential(AZURE_SEARCH_API_KEY),
)
upload_results = search_client.upload_documents(documents=processed_documents)
[each_upload.as_dict() for each_upload in upload_results]

## Queries (Simple, Vector, Hybrid, and Semantic)

In [None]:
search_query = "university of notre dame"

In [None]:
results = search_client.search(
    search_text=search_query,
    top=5,
    query_type="simple",
)
print("Keyword Search:")
for each_result in results:
    print(
        f"chunk_id: {each_result['chunk_id']}, title: {each_result['title']}, score: {each_result['@search.score']}"
    )

In [None]:
search_query_embedding = await get_text_embedding(search_query)
results = search_client.search(
    search_text=None,
    top=5,
    vector_queries=[
        VectorizedQuery(vector=search_query_embedding, k_nearest_neighbors=10, fields="embedding")
    ],
)
print("Vector Search:")
for each_result in results:
    print(
        f"chunk_id: {each_result['chunk_id']}, title: {each_result['title']}, score: {each_result['@search.score']}"
    )

In [None]:
search_query_embedding = await get_text_embedding(search_query)
results = search_client.search(
    search_text=search_query,
    top=5,
    vector_queries=[
        VectorizedQuery(vector=search_query_embedding, k_nearest_neighbors=10, fields="embedding")
    ],
)
print("Hybrid Search:")
for each_result in results:
    print(
        f"chunk_id: {each_result['chunk_id']}, title: {each_result['title']}, score: {each_result['@search.score']}"
    )

In [None]:
search_query_embedding = await get_text_embedding(search_query)
results = search_client.search(
    search_text=search_query,
    top=5,
    vector_queries=[
        VectorizedQuery(vector=search_query_embedding, k_nearest_neighbors=10, fields="embedding")
    ],
    query_type="semantic",
    semantic_configuration_name="semantic_config",
)
print("Hybrid Search with Semantic Re-Ranker:")
for each_result in results:
    print(
        f"chunk_id: {each_result['chunk_id']}, title: {each_result['title']}, semantic score: {each_result['@search.reranker_score']}"
    )

## External Data Source Connection

In [None]:
data_source_connection_name = "test-datasource"
indexer_client = SearchIndexerClient(
    endpoint=AZURE_SEARCH_ENDPOINT, credential=AzureKeyCredential(AZURE_SEARCH_API_KEY)
)
container = SearchIndexerDataContainer(name=AZURE_STORAGE_CONTAINER_NAME)
data_source_connection = SearchIndexerDataSourceConnection(
    name=data_source_connection_name,
    type="azureblob",
    connection_string=AZURE_STORRAGE_CONNECTION_STRING,
    container=container,
)
data_source = indexer_client.create_or_update_data_source_connection(data_source_connection)
data_source

## Skillsets (AI Search Integrated Chunking, Enrichments, and Embedding)

In [None]:
# index schema
fields = [
    SearchField(name="id", type=SearchFieldDataType.String),
    SearchField(name="title", type=SearchFieldDataType.String),
    SearchField(name="source", type=SearchFieldDataType.String),
    SearchField(name="url", type=SearchFieldDataType.String),
    SearchField(name="parent_id", type=SearchFieldDataType.String),
    SearchField(
        name="chunk_id",
        type=SearchFieldDataType.String,
        key=True,
        sortable=True,
        filterable=True,
        facetable=True,
        analyzer_name="keyword",
    ),
    SearchField(
        name="chunk",
        type=SearchFieldDataType.String,
        sortable=False,
        filterable=False,
        facetable=False,
    ),
    SearchField(
        name="locations",
        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
        filterable=True,
    ),
    SearchField(
        name="persons",
        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
        filterable=True,
    ),
    SearchField(
        name="organizations",
        type=SearchFieldDataType.Collection(SearchFieldDataType.String),
        filterable=True,
    ),
    SearchField(
        name="embedding",
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
        vector_search_dimensions=1536,
        vector_search_profile_name="embedding_profile",
    ),
]
# vector search
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(name="hnsw_config"),
    ],
    profiles=[
        VectorSearchProfile(
            name="embedding_profile",
            algorithm_configuration_name="hnsw_config",
            vectorizer_name="openai_vectorizer",
        )
    ],
    vectorizers=[
        AzureOpenAIVectorizer(
            vectorizer_name="openai_vectorizer",
            kind="azureOpenAI",
            parameters=AzureOpenAIVectorizerParameters(
                resource_url=AZURE_OPENAI_RESOURCE,
                deployment_name=AZURE_EMBEDDING_DEPLOYMENT_NAME,
                model_name=AZURE_EMBEDDING_DEPLOYMENT_NAME,
            ),
        ),
    ],
)
# semantic configuration
semantic_config = SemanticConfiguration(
    name="semantic_config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="locations")],
        content_fields=[SemanticField(field_name="chunk")],
    ),
)
semantic_search = SemanticSearch(configurations=[semantic_config])
# create index
index_name = "testindex-skills"
index = SearchIndex(
    name=index_name, fields=fields, vector_search=vector_search, semantic_search=semantic_search
)
index_client = SearchIndexClient(
    endpoint=AZURE_SEARCH_ENDPOINT, credential=AzureKeyCredential(AZURE_SEARCH_API_KEY)
)
index_client.create_or_update_index(index)

In [None]:
# reference: https://github.com/Azure-Samples/azure-search-python-samples/blob/main/Tutorial-RAG/Tutorial-rag.ipynb
# chunking
split_skill = SplitSkill(
    description="chunk documents",
    text_split_mode="pages",
    context="/document",
    maximum_page_length=2000,
    page_overlap_length=500,
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content"),
    ],
    outputs=[OutputFieldMappingEntry(name="textItems", target_name="pages")],
)
# vector embedding
embedding_skill = AzureOpenAIEmbeddingSkill(
    description="generate embeddings",
    context="/document/pages/*",
    resource_url=AZURE_OPENAI_RESOURCE,
    deployment_name=AZURE_EMBEDDING_DEPLOYMENT_NAME,
    model_name=AZURE_EMBEDDING_DEPLOYMENT_NAME,
    dimensions=1536,
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/pages/*"),
    ],
    outputs=[OutputFieldMappingEntry(name="embedding", target_name="embedding")],
)
# ai services enrichments, extractions
entity_skill = EntityRecognitionSkill(
    description="extract entities",
    context="/document/pages/*",
    categories=["Location", "Person", "Organization"],
    default_language_code="en",
    minimum_precision=0.2,
    inputs=[InputFieldMappingEntry(name="text", source="/document/pages/*")],
    outputs=[
        OutputFieldMappingEntry(name="locations", target_name="locations"),
        OutputFieldMappingEntry(name="persons", target_name="persons"),
        OutputFieldMappingEntry(name="organizations", target_name="organizations"),
    ],
)
# index projections
index_projections = SearchIndexerIndexProjection(
    selectors=[
        SearchIndexerIndexProjectionSelector(
            target_index_name=index_name,
            parent_key_field_name="parent_id",
            source_context="/document/pages/*",
            mappings=[
                InputFieldMappingEntry(name="chunk", source="/document/pages/*"),
                InputFieldMappingEntry(name="embedding", source="/document/pages/*/embedding"),
                InputFieldMappingEntry(name="locations", source="/document/pages/*/locations"),
                InputFieldMappingEntry(name="persons", source="/document/pages/*/persons"),
                InputFieldMappingEntry(
                    name="organizations", source="/document/pages/*/organizations"
                ),
                InputFieldMappingEntry(name="id", source="/document/id"),
                InputFieldMappingEntry(name="source", source="/document/source"),
                InputFieldMappingEntry(name="title", source="/document/title"),
                InputFieldMappingEntry(name="url", source="/document/url"),
            ],
        ),
    ],
    parameters=SearchIndexerIndexProjectionsParameters(
        projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS
    ),
)
# create skillset
skillset_name = "test-skillset"
skills = [split_skill, embedding_skill, entity_skill]
cognitive_services_account = CognitiveServicesAccountKey(key=AZURE_AI_SERVICES_KEY)
skillset = SearchIndexerSkillset(
    name=skillset_name,
    skills=skills,
    index_projection=index_projections,
    cognitive_services_account=cognitive_services_account,
)
skills_client = SearchIndexerClient(
    endpoint=AZURE_SEARCH_ENDPOINT, credential=AzureKeyCredential(AZURE_SEARCH_API_KEY)
)
skills_client.create_or_update_skillset(skillset)

In [None]:
# indexer
indexer_name = "test-indexer-with-skills-1"
indexer = SearchIndexer(
    name=indexer_name,
    skillset_name=skillset_name,
    schedule=IndexingSchedule(interval=datetime.timedelta(hours=24)),
    target_index_name=index_name,
    data_source_name=data_source.name,
)
# Create and run the indexer
indexer_client = SearchIndexerClient(
    endpoint=AZURE_SEARCH_ENDPOINT, credential=AzureKeyCredential(AZURE_SEARCH_API_KEY)
)
indexer_client.create_or_update_indexer(indexer)

## Scoring Profiles

In [None]:
# ref: https://github.com/farzad528/azure-ai-search-python-playground/blob/main/azure-ai-search-document-boosting.ipynb
# can also use date, ratings, geolocation, etc.
scoring_profiles = [
    ScoringProfile(
        name="scoring_profile_weights",
        text_weights=TextWeights(
            weights={
                "title": 5.0,
                "url": 2.0,
                "source": 2.0,
                "chunk": 1.0,
            }
        ),
    ),
    ScoringProfile(
        name="scoring_profile_function_tag_location",
        functions=[
            TagScoringFunction(
                field_name="locations",
                boost=2.0,
                parameters=TagScoringParameters(
                    tags_parameter="tags",
                ),
            )
        ],
    ),
    ScoringProfile(
        name="scoring_profile_function_tag_persons",
        functions=[
            TagScoringFunction(
                field_name="persons",
                boost=5.0,
                parameters=TagScoringParameters(
                    tags_parameter="tags",
                ),
            )
        ],
    ),
]
# update index
index = SearchIndex(
    name=index_name,
    fields=fields,
    vector_search=vector_search,
    semantic_search=semantic_search,
    scoring_profiles=scoring_profiles,
)
index_client.create_or_update_index(index)

In [None]:
search_query = "van halen"
index_name = "testindex-skills"
search_client = SearchClient(
    AZURE_SEARCH_ENDPOINT, index_name, credential=AzureKeyCredential(AZURE_SEARCH_API_KEY)
)
search_query_embedding = await get_text_embedding(search_query)
results = search_client.search(
    search_text=search_query,
    top=5,
    vector_queries=[
        VectorizedQuery(vector=search_query_embedding, k_nearest_neighbors=10, fields="embedding")
    ],
    query_type="semantic",
    semantic_configuration_name="semantic_config",
    search_fields=["title", "source", "url", "locations", "persons", "organizations", "chunk"],
)
print("Hybrid Search with Scoring Profile and Semantic Re-Ranker:")
for each_result in results:
    print(
        f"chunk_id: {each_result['chunk_id']}, title: {each_result['title']}, semantic score: {each_result['@search.reranker_score']}"
    )