In [1]:
from langchain.vectorstores.cassandra import Cassandra
import cassio

In [2]:
import pandas as pd
import random

num_rows = 1000

engagement_data = {
        'post_id': list(range(1, num_rows + 1)),
        'post_type': [random.choice(["Photo Posts","Video Posts","Carousel Posts","Instagram Stories","Instagram Reels","Instagram Live","IGTV","Instagram Shopping Posts","Instagram Ads"]) for _ in range(num_rows)],
        'likes': [random.randint(50, 1000) for _ in range(num_rows)],
        'shares': [random.randint(5, 200) for _ in range(num_rows)],
        'comments': [random.randint(1, 500) for _ in range(num_rows)]
    }
df = pd.DataFrame(engagement_data)
df.to_csv("engagement_data.csv", index=False)

In [3]:
post_types =  list(df["post_type"].unique())
post_types

['Photo Posts',
 'Video Posts',
 'Instagram Live',
 'Instagram Reels',
 'Instagram Shopping Posts',
 'Instagram Ads',
 'IGTV',
 'Carousel Posts',
 'Instagram Stories']

In [4]:
from astrapy import DataAPIClient
import os
from dotenv import load_dotenv
load_dotenv()
# Initialize the client
client = DataAPIClient(os.getenv("ASTRA_DB_APPLICATION_TOKEN"))
db = client.get_database_by_api_endpoint(
  "https://84fa67e8-00d5-46b8-9283-e28598a5dd00-us-east-2.apps.astra.datastax.com"
)

print(f"Connected to Astra DB: {db.list_collection_names()}")

Connected to Astra DB: ['social_media_data']


In [5]:
collection = db.get_collection("social_media_data")
data_dict = df.to_dict(orient="records")


collection.insert_many(data_dict)

print(f"Inserted {len(data_dict)} records into the collection.")


Inserted 1000 records into the collection.


In [6]:
i=0
for record in collection.find():
    if i ==10:
        break
    print(record)
    i+=1

{'_id': '0b9114ec-319d-427b-9114-ec319d227bd5', 'post_id': 796, 'post_type': 'Instagram Live', 'likes': 56, 'shares': 105, 'comments': 305}
{'_id': '8016b583-20fa-4ed6-96b5-8320faded6b7', 'post_id': 498, 'post_type': 'Carousel Posts', 'likes': 838, 'shares': 75, 'comments': 207}
{'_id': 'b13bec27-2a29-4f63-bbec-272a29df63e3', 'post_id': 211, 'post_type': 'Video Posts', 'likes': 711, 'shares': 112, 'comments': 416}
{'_id': '16858f53-d67f-40c7-858f-53d67ff0c791', 'post_id': 853, 'post_type': 'Instagram Live', 'likes': 639, 'shares': 174, 'comments': 441}
{'_id': '33a93e90-1011-4976-a93e-90101119764f', 'post_id': 277, 'post_type': 'Video Posts', 'likes': 85, 'shares': 43, 'comments': 445}
{'_id': '73b3c80b-90ac-40e6-b3c8-0b90ac20e6ce', 'post_id': 881, 'post_type': 'Carousel Posts', 'likes': 744, 'shares': 44, 'comments': 3}
{'_id': '110b1304-6988-4aa7-8b13-0469881aa756', 'post_id': 956, 'post_type': 'Instagram Reels', 'likes': 272, 'shares': 18, 'comments': 163}
{'_id': '01e611ce-923c-470

In [7]:
from astrapy.client import DataAPIClient

def analyze_post_performance(post_type):
    # Connect to Astra DB
    client = DataAPIClient(os.getenv("ASTRA_DB_APPLICATION_TOKEN"))
    db = client.get_database_by_api_endpoint(
        "https://84fa67e8-00d5-46b8-9283-e28598a5dd00-us-east-2.apps.astra.datastax.com"
    )
    collection = db.get_collection("social_media_data")
    
    # Fetch data for the specified post_type
    documents = collection.find({"post_type": post_type})
    
    # Calculate average metrics
    metrics = {"likes": 0, "shares": 0, "comments": 0}
    count = 0
    for doc in documents:
        metrics["likes"] += doc["likes"]
        metrics["shares"] += doc["shares"]
        metrics["comments"] += doc["comments"]
        count += 1
    
    if count > 0:
        metrics = {key: value / count for key, value in metrics.items()}
    
    return metrics


In [8]:
for type in post_types:
    print(f"For Post type : {type}")
    metrics = analyze_post_performance(post_type=type)
    print(metrics)

For Post type : Photo Posts
{'likes': 519.1333333333333, 'shares': 105.85, 'comments': 233.38095238095238}
For Post type : Video Posts
{'likes': 529.7156177156177, 'shares': 99.53379953379954, 'comments': 248.1864801864802}
For Post type : Instagram Live
{'likes': 548.7288888888888, 'shares': 105.91777777777777, 'comments': 262.20222222222225}
For Post type : Instagram Reels
{'likes': 541.0728929384966, 'shares': 106.54669703872437, 'comments': 256.6104783599089}
For Post type : Instagram Shopping Posts
{'likes': 520.496583143508, 'shares': 102.68564920273349, 'comments': 261.01822323462414}
For Post type : Instagram Ads
{'likes': 511.76119402985074, 'shares': 102.88059701492537, 'comments': 260.3944562899787}
For Post type : IGTV
{'likes': 540.7668997668998, 'shares': 104.2027972027972, 'comments': 260.05827505827506}
For Post type : Carousel Posts
{'likes': 546.5704845814978, 'shares': 99.86123348017621, 'comments': 250.9713656387665}
For Post type : Instagram Stories
{'likes': 530.9

In [9]:
def calculate_percentage_difference(old_value, new_value):
    if old_value == 0:
        return 0  # Avoid division by zero, return 0 if the old value is 0
    return ((new_value - old_value) / old_value) * 100

In [10]:
from langchain_openai import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate

def generate_insights(metrics, selected_metrics, post_type, comparison):
    openai_api_key = os.getenv("OPENAI_API_KEY")
    chat = ChatOpenAI(temperature=0.7, model="gpt-4o", openai_api_key=openai_api_key)

    # Calculate percentage differences between selected post type and others
    comparison_data = {} 
    for post, post_metrics in comparison.items():
        if post == post_type: #Skipping the current post type
            continue
        likes_diff = calculate_percentage_difference(post_metrics["likes"], selected_metrics["likes"])
        shares_diff = calculate_percentage_difference(post_metrics["shares"], selected_metrics["shares"])
        comments_diff = calculate_percentage_difference(post_metrics["comments"], selected_metrics["comments"])
        
        comparison_data[post] = {
            "likes_diff": likes_diff,
            "shares_diff": shares_diff,
            "comments_diff": comments_diff
        }

    # Format the comparison data into a string for inclusion in the prompt
    comparison_data_str = ""
    for post, data in comparison_data.items():
        comparison_data_str += f"- {post.capitalize()} Posts: Likes: {data['likes_diff']:.2f}%, Shares: {data['shares_diff']:.2f}%, Comments: {data['comments_diff']:.2f}%; "


    # Ensure there's no extra newline at the end of the string
    comparison_data_str = comparison_data_str.strip()

    # Create the prompt
    prompt = ChatPromptTemplate.from_template("""
    You are an expert in social media post performance analysis. Based on the following average engagement metrics for {post_type} posts, provide insights and suggest how this post type performs overall:
    - Average Likes: {likes}
    - Average Shares: {shares}
    - Average Comments: {comments}

    Additionally, compare this post type with other post types and provide insights based on the following percentage differences in engagement:
    {comparison_data}
    
    Provide a bullet-point summary of the performance insights for {post_type} posts, including how this post type compares with others in terms of likes, shares, and comments.
    """)

    # Invoke the model to get the response
    response = chat.invoke(
        prompt.format_prompt(
            post_type=post_type.capitalize(),
            likes=metrics["likes"],
            shares=metrics["shares"],
            comments=metrics["comments"],
            comparison_data=comparison_data_str
        ).to_messages()
    )

    general_insight = response.content
    return general_insight
