<a href="https://colab.research.google.com/github/k-dinakaran/automation-of-wordpress-post-publication-using-AI-tools/blob/main/Develop_an_AI_Based_Tool_for_Segmenting_Large_Content_Databases_by_User_Preferences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler

# Sample Dataset
data = {
    'user_id': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
    'content_id': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
    'content_title': [
        '10 Tips for Fitness', 'Mindfulness Meditation', 'Healthy Eating Habits',
        'Strength Training Guide', 'Yoga Basics', 'Nutrition for Energy',
        'Dealing with Anxiety', 'Quick Workout Routines', 'Mindful Living',
        'Building Muscle Strength'
    ],
    'user_preference_score': [5, 3, 4, 5, 2, 4, 3, 5, 2, 4]
}
df = pd.DataFrame(data)

# Step 1: Process Text Data
vectorizer = TfidfVectorizer(stop_words='english')
content_matrix = vectorizer.fit_transform(df['content_title'])

# Step 2: Combine Text Features with User Preferences
df['user_preference_score'] = MinMaxScaler().fit_transform(df[['user_preference_score']])
content_with_preferences = np.hstack([content_matrix.toarray(), df[['user_preference_score']].values])

# Step 3: Get User Input for Number of Segments
try:
    num_clusters = int(input("Enter the number of user segments you'd like to create (e.g., 2, 3, 4): "))
    if num_clusters < 1:
        raise ValueError("Number of segments should be at least 1.")
except ValueError as e:
    print(f"Invalid input: {e}")
    num_clusters = 3  # Default to 3 segments

# Step 4: Apply Clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
df['segment'] = kmeans.fit_predict(content_with_preferences)

# Step 5: Generate Segment Insights
segment_insights = df.groupby('segment').apply(lambda x: {
    'Top Content Titles': x['content_title'].tolist(),
    'Average Preference Score': x['user_preference_score'].mean()
}).to_frame(name='Insights').reset_index()

# Display Segmentation Insights
print("\nUser Segmentation Insights:")
print(segment_insights)

# Step 6: Recommendation Function Based on Segment
def recommend_content_for_segment(segment_id):
    """Recommend content titles for a given segment ID."""
    segment_data = df[df['segment'] == segment_id]
    return segment_data['content_title'].tolist()

# Step 7: Get User Input for Content Recommendations
try:
    segment_id = int(input(f"\nEnter a segment number (0 to {num_clusters-1}) to see recommended content: "))
    if segment_id < 0 or segment_id >= num_clusters:
        raise ValueError("Segment number out of range.")
    print(f"\nRecommended Content for Segment {segment_id}:")
    print(recommend_content_for_segment(segment_id))
except ValueError as e:
    print(f"Invalid input: {e}")


Enter the number of user segments you'd like to create (e.g., 2, 3, 4): 2


  segment_insights = df.groupby('segment').apply(lambda x: {



User Segmentation Insights:
   segment                                           Insights
0        0  {'Top Content Titles': ['10 Tips for Fitness',...
1        1  {'Top Content Titles': ['Mindfulness Meditatio...

Enter a segment number (0 to 1) to see recommended content: 1

Recommended Content for Segment 1:
['Mindfulness Meditation', 'Healthy Eating Habits', 'Yoga Basics', 'Nutrition for Energy', 'Dealing with Anxiety', 'Mindful Living', 'Building Muscle Strength']
