In [None]:
%run PromptFunction_Relational

In [None]:
from synapse.ml.core.platform import find_secret

# Fill in the following lines with your service information
service_name = "<MyOpenAIService>" # Name of your OpenAI service
deployment_name = "<MyModelDepl>" # Name of your deployment in OpenAI
key = find_secret("<MySecret>", "<MyKeyVault>")  # replace this with your secret and keyvault

# Note: a cartesian product of customers and restaurants will be generated
NrOfCustomers = 5 # Set number of Customers
NrOfRestaurants = 5 # Set number of Restaurants


In [None]:
dfcustomerids = df1 = spark.range(1,NrOfCustomers + 1) \
    .withColumnRenamed("id", "customerid") \
    .withColumn("prompt", customer_prompt_udf())

display(dfcustomerids)


In [None]:
dfrestaurantids = df1 = spark.range(1,NrOfRestaurants + 1) \
    .withColumnRenamed("id", "restaurantid") \
    .withColumn("prompt", restaurant_prompt_udf())

display(dfrestaurantids)


In [None]:
from synapse.ml.cognitive import OpenAICompletion

OpenAICompletion = (
    OpenAICompletion()
    .setSubscriptionKey(key)
    .setDeploymentName(deployment_name)
    .setUrl("https://{}.openai.azure.com/".format(service_name))
    .setMaxTokens(2048)
    .setPromptCol("prompt")
    .setErrorCol("error")
    .setOutputCol("response")
)

In [None]:
from pyspark.sql.functions import col

df_customerobject = OpenAICompletion.transform(dfcustomerids) \
    .select(col('customerid'), col('response.choices.text').getItem(0).alias('customerobject'))\
    .cache()

df_restaurantobject = OpenAICompletion.transform(dfrestaurantids) \
    .select(col('restaurantid'), col('response.choices.text').getItem(0).alias('restaurantobject'))\
    .cache()

display(df_customerobject)
display(df_restaurantobject)


In [None]:
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from pyspark.sql.functions import col, from_json

schemaCustomer = StructType([ \
        StructField("firstname", StringType(), False), \
        StructField("lastname", StringType(), False), \
        StructField("username", StringType(), False), \
        StructField("email", StringType(), False) \
        ])

schemaRestaurant = StructType([ \
        StructField("restaurant", StringType(), False), \
        StructField("description", StringType(), False) \
        ])



df_customer = df_customerobject.withColumn("json",from_json(col("customerobject"), schemaCustomer))\
    .select(col("customerID"), col("json.*"))

df_restaurant = df_restaurantobject.withColumn("json",from_json(col("restaurantobject"), schemaRestaurant))\
    .select(col("restaurantID"), col("json.*"))




display(df_customer)
display(df_restaurant)



In [None]:
from pyspark.sql.functions import col, row_number, window, rand
from pyspark.sql.types import StructType, StructField, StringType, IntegerType


cross_joined_df = df_customer.crossJoin(df_restaurant) \
    .withColumn("prompt", reviews_prompt_udf(col("restaurant"), ((5 * rand()).cast("int") + 1))) \

df_reviewobject = OpenAICompletion.transform(cross_joined_df) \
    .withColumn("reviewObject",col('response.choices.text').getItem(0))\
    .select(col("customerid"),col("restaurantid"), col("reviewobject")) 


schemaReview = StructType([ \
        StructField("reviewdate", StringType(), False), \
        StructField("review", StringType(), False), \
        StructField("rating", IntegerType(), False) \
        ])


df_review = df_reviewobject.withColumn("json",from_json(col("reviewobject"), schemaReview))\
    .select(col("restaurantid"), col("customerid"), col("json.*"))

display(df_review)