In [297]:
import pandas as pd

In [298]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

Load the data set

In [348]:
data = pd.read_csv('Social_Media_Advertising.csv', nrows=5000)

In [349]:
data[['Conversion_Rate', 'Acquisition_Cost', 'ROI', 'Engagement_Score']] = data[['Conversion_Rate', 'Acquisition_Cost', 'ROI', 'Engagement_Score']].apply(pd.to_numeric, errors='coerce')

In [350]:
data.dropna(subset=['Conversion_Rate', 'Acquisition_Cost', 'ROI', 'Engagement_Score'], inplace=True)


Calculate weighted score for each campaign

In [351]:
data['Weighted_Score'] = (data['Conversion_Rate'] + data['ROI'] + data['Engagement_Score']) 


In [352]:
if data.isnull().values.any():
    print("Warning: There are still missing or invalid values after preprocessing.")

 Preprocess the dataset
Convert categorical variables into numerical format using one-hot encoding

In [353]:
data_encoded = pd.get_dummies(data.drop(columns=['Campaign_ID', 'Date', 'Company','Channel_Used']))

Define features (X) and target variable (y)

In [354]:
X = data_encoded
y = data['Channel_Used']

Split the dataset into training and testing sets

In [355]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

Train a Random Forest classifier using weighted scores as additional features

In [343]:
X_train['Weighted_Score'] = data.loc[X_train.index, 'Weighted_Score']
X_test['Weighted_Score'] = data.loc[X_test.index, 'Weighted_Score']
clf = RandomForestClassifier()
clf.fit(X_train, y_train)


Evaluate the classifier

In [344]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

Model Accuracy: 0.502


Predict the best platform for a new product

In [345]:
def predict_best_platform(product_attributes):
    # Calculate weighted score for the new product
    weighted_score = (product_attributes['Conversion_Rate'] + product_attributes['ROI'] + product_attributes['Engagement_Score'])
    # Add weighted score as an additional feature
    product_attributes_encoded = pd.get_dummies(pd.DataFrame(product_attributes, index=[0]))
    product_attributes_encoded['Weighted_Score'] = weighted_score
    # Predict the best platform using the trained classifier
    predicted_platform = clf.predict(product_attributes_encoded)
    return predicted_platform[0]

In [346]:
# Example usage:
new_product_attributes = {
    'Target_Audience': 'Men 35-44',
    'Campaign_Goal': 'Product Launch',
    'Duration': '15 Days',
    'Conversion_Rate': 0.05,
    'ROI': 5.0,
    'Location': 'Las Vegas',
    'Language': 'Spanish',
    'Clicks': 500,
    'Impressions': 3000,
    'Engagement_Score': 7,
    'Customer_Segment': 'Health'
}


In [347]:
recommended_platform = predict_best_platform(new_product_attributes)
print(f'Recommended Platform: {recommended_platform}')

ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- Campaign_Goal_Brand Awareness
- Campaign_Goal_Increase Sales
- Campaign_Goal_Market Expansion
- Customer_Segment_Fashion
- Customer_Segment_Food
- ...
