**Retail domain**

Generate summaries based on the famous items available in each city using Store locator.

Key Features
City-Specific Recommendations: Provides recommendations based on famous items in each city.

Engaging Summaries: Uses LangChain to generate engaging and personalized recommendations.

Scalability: Built with PySpark, making it suitable for large-scale retail datasets.

In [None]:
# Install necessary libraries
!pip install langchain langchain-openai openai pyspark --quiet

# Load OpenAI API Key from userdata (secure way to avoid hardcoding)
from google.colab import userdata
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf, concat_ws
from pyspark.sql.types import StringType

# Initialize Spark session
spark = SparkSession.builder.appName("RetailDataProcessing").getOrCreate()

# Retrieve the OpenAI API key from userdata
openai_api_key = userdata.get('OPENAI_API_KEY')

if openai_api_key:
    print(f"✅ OpenAI API Key exists and begins with: {openai_api_key[:8]}")
else:
    print("❌ OpenAI API Key not set. Please run userdata.set('OPENAI_API_KEY', 'your-key') in a separate cell.")

# Sample data for Store Information
store_data = [
    (1, "Store A", "New York"),
    (2, "Store B", "Los Angeles"),
    (3, "Store C", "Chicago")
]
store_columns = ["store_id", "store_name", "city"]

# Sample data for Store Addresses
address_data = [
    (1, "123 Main St", "NY", "10001"),
    (2, "456 Elm St", "CA", "90001"),
    (3, "789 Oak St", "IL", "60601")
]
address_columns = ["store_id", "street", "state", "zipcode"]

# Sample data for Famous Items in Cities
famous_items_data = [
    ("New York", "Pizza, Bagels, Cheesecake"),
    ("Los Angeles", "Tacos, Burgers, Sushi"),
    ("Chicago", "Deep Dish Pizza, Hot Dogs, Popcorn")
]
famous_items_columns = ["city", "famous_items"]

# Create DataFrames
store_df = spark.createDataFrame(store_data, store_columns)
address_df = spark.createDataFrame(address_data, address_columns)
famous_items_df = spark.createDataFrame(famous_items_data, famous_items_columns)

# Join the tables on `store_id` and `city`
joined_df = store_df.join(address_df, on="store_id", how="inner")
joined_df = joined_df.join(famous_items_df, on="city", how="inner")

# Add a new column for the full address
joined_df = joined_df.withColumn(
    "full_address",
    concat_ws(", ", "street", "city", "state", "zipcode")
)

# Show the resulting DataFrame
print("Store Information with Full Address and Famous Items:")
joined_df.show(truncate=False)

# Function to generate a summary recommending famous items
def recommend_items(city, famous_items):
    try:
        # Import inside the function to avoid serialization issues
        from langchain.prompts import PromptTemplate
        from langchain.chains import LLMChain
        from langchain_openai import OpenAI  # Use langchain_openai instead of langchain.llms.OpenAI

        # Define prompt
        prompt_template = PromptTemplate(
            template="You are a helpful retail assistant. Recommend famous items to customers in {city}. "
                     "Here are some famous items in {city}: {famous_items}. "
                     "Write a short and engaging recommendation for customers.",
            input_variables=["city", "famous_items"]
        )

        # Initialize LLM inside the function (NOT globally)
        llm = OpenAI(temperature=0.7, openai_api_key=openai_api_key)  # Use the API key from userdata
        chain = LLMChain(llm=llm, prompt=prompt_template)

        # Run LangChain model to generate the recommendation
        result = chain.run({"city": city, "famous_items": famous_items})

        # Strip newline characters and limit to 100 characters
        result = result.replace("\n\n", " ").strip()  # Remove newlines
        # result = result[:100]  # Truncate to 100 characters
        return result

    except Exception as e:
        return f"Error: {e}"

# Register as a UDF
recommend_items_udf = udf(recommend_items, StringType())

# Apply the UDF to create a new column "recommendation" in the DataFrame
joined_df_with_recommendations = joined_df.withColumn(
    "recommendation",
    recommend_items_udf("city", "famous_items")
)

# Show the resulting DataFrame with recommendations
print("Store Information with Recommendations:")
joined_df_with_recommendations.show(truncate=False)