In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv('Social_Media_Advertising.csv', nrows=12000)


missing_values = data.isnull().sum()
print("Missing values:\n", missing_values)


data['Acquisition_Cost'] = data['Acquisition_Cost'].replace('[\\$,]', '', regex=True).astype(float)


data.drop(columns=['Date', 'Company'], inplace=True)

data_encoded = pd.get_dummies(data, columns=['Target_Audience', 'Campaign_Goal', 'Duration', 'Location', 'Language', 'Customer_Segment'])


X = data_encoded.drop(columns=['Channel_Used'])  # Features
y = data_encoded['Channel_Used']  # Target variable


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)


clf = GradientBoostingClassifier()
clf.fit(X_train, y_train)


y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy %:", accuracy * 100)


def predict_best_platform(product_attributes):
    product_attributes_encoded = pd.DataFrame(product_attributes, index=[0])
    product_attributes_encoded = pd.get_dummies(product_attributes_encoded)
    product_attributes_encoded = product_attributes_encoded.reindex(columns=X_train.columns, fill_value=0)

    predicted_platform = clf.predict(product_attributes_encoded)
    return predicted_platform[0]


# 1 if available, 0 if not
new_product_attributes = {
    'Target_Audience_Men 18-34': 1,
    'Campaign_Goal_Brand Awareness': 1,
    'Duration_30 Days': 1,
    'Location_New Delhi': 1,
    'Language_Hindi': 1,
    'Customer_Segment_Fitness': 1,
    'Social_Media_Facebook': 1,
    'Social_Media_Twitter': 1,
    'Product_Category_Technology': 1,
    'Discount_Available': 0,
    'Free_Shipping': 1,
    'Limited_Edition': 0,
    'Promotion_Code': 0,
}

recommended_platform = predict_best_platform(new_product_attributes)
print("Recommended Platform:", recommended_platform)


Missing values:
 Campaign_ID         0
Target_Audience     0
Campaign_Goal       0
Duration            0
Channel_Used        0
Conversion_Rate     0
Acquisition_Cost    0
ROI                 0
Location            0
Language            0
Clicks              0
Impressions         0
Engagement_Score    0
Customer_Segment    0
Date                0
Company             0
dtype: int64
Training set shape: (9600, 34) (9600,)
Testing set shape: (2400, 34) (2400,)
Model Accuracy %: 49.666666666666664
Recommended Platform: Pinterest
