In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [4]:
df = pd.read_csv("Downloads/social_media_data.csv")

In [5]:
df.describe()

Unnamed: 0,caption_length,hashtag_count,posting_hour,likes,comments,shares
count,20.0,20.0,20.0,20.0,20.0,20.0
mean,117.5,6.1,18.15,226.25,26.8,10.65
std,47.889676,2.954034,2.641272,125.811837,18.724035,11.753051
min,30.0,1.0,11.0,60.0,4.0,1.0
25%,87.5,4.0,17.75,127.5,10.0,2.75
50%,115.0,6.0,19.0,205.0,23.0,6.0
75%,152.5,8.0,20.0,302.5,37.75,12.75
max,200.0,12.0,21.0,500.0,70.0,40.0


In [9]:
#cari angka engangement
df["engagement"] = df["likes"] + df["comments"] + df["shares"]
encoder = LabelEncoder()
df["content_type_encoded"] = encoder.fit_transform(df["content_type"])

#klaster jenis konten
kmeans = KMeans(n_clusters=3, random_state=42)
df["content_cluster"] = kmeans.fit_predict(df[["caption_length", "hashtag_count", "engagement"]])

#model prediksi
X = df[["content_type_encoded", "caption_length", "hashtag_count", "posting_hour"]]
y = df["engagement"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)




In [10]:
#fungsi rekomendasi 
def recommend_content(content_type, caption_length, hashtag_count):

    # Konversi content type ke angka
    ct_encoded = encoder.transform([content_type])[0]

    # Cek jam posting terbaik (berdasarkan rata-rata ER tiap jam)
    best_hour = df.groupby("posting_hour")["engagement"].mean().idxmax()

    # Prediksi engagement untuk jam terbaik
    input_data = pd.DataFrame({
        "content_type_encoded": [ct_encoded],
        "caption_length": [caption_length],
        "hashtag_count": [hashtag_count],
        "posting_hour": [best_hour]
    })

    predicted_er = model.predict(input_data)[0]

    # Cari cluster konten terbaik
    best_cluster = df.groupby("content_cluster")["engagement"].mean().idxmax()
    cluster_mean = df[df["content_cluster"] == best_cluster]["content_type"].mode()[0]

    return {
        "recommended_posting_hour": int(best_hour),
        "predicted_engagement": int(predicted_er),
        "best_content_cluster_type": str(cluster_mean),
        "note": "Posting at this hour usually performs best based on historical data."
    }
