In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


#importing the csv file
campaign = pd.read_csv('campaign.csv')

campaign.head()

In [35]:
#make a copy of the data
campaign_copy = campaign.copy()

#drop the columns that are not needed
campaign_copy.drop(['Campaign ID','Reporting starts','Reporting ends'], axis=1, inplace=True)

#making the heads of the columns to be in lower case and replacing the spaces with underscore
campaign_copy.columns = campaign_copy.columns.str.lower().str.replace(' ', '_')

#remaming headers to make them more readable
campaign_copy.rename(columns={'amount_spent_(eur)':'spend', 'cpm_(cost_per_1,000_impressions)':'cpm', 'clicks_(all)':'clicks'}, inplace=True)

#drop columns
campaign_copy =campaign_copy.drop(['cpi'], axis=1)

#count the Nan values in the columns
campaign_copy.isnull().sum()

#remove the null values
campaign_copy = campaign_copy.dropna()



Calculating the score

In [36]:
def calculate_campaign_score(row):
    """Calculate a performance score for each campaign based on defined criteria"""
    score = 0
    
    # Calculate metrics
    ctr = (row['clicks'] / row['impressions']) * 100
    cpi = row['spend'] / row['app_installs'] if row['app_installs'] > 0 else float('inf')
    
    # Score based on CPM criterion (≤ 8)
    if row['cpm'] <= 8:
        score += 1
        
    # Score based on CTR criterion (> 0.50%)
    if ctr > 0.50:
        score += 1
        
    # Score based on CPI criterion (< 5)
    if cpi < 5:
        score += 1
    
    return score


Campaign Analysis

In [37]:
def analyze_campaign_performance(df):
    """Analyze campaign performance and predict ideal duration"""
    
# Create copy of dataframe
df_analysis = campaign_copy.copy()  # Fixed from df.campaign_copy()
df_analysis.head()
  
# Calculate performance metrics
df_analysis['ctr'] = (df_analysis['clicks'] / df_analysis['impressions']) * 100
df_analysis['cpi'] = df_analysis['spend'] / df_analysis['app_installs'].replace(0, np.nan)

# Calculate performance score
df_analysis['performance_score'] = df_analysis.apply(calculate_campaign_score, axis=1)
    
# Add campaign duration
df_analysis['day'] = pd.to_datetime(df_analysis['day'])
df_analysis['campaign_duration'] = df_analysis.groupby('campaign_name')['day'].transform('nunique')

In [None]:
def predict_campaign_duration(df_analysis):
    # Get unique campaigns and their features
    unique_campaigns = df_analysis.groupby('campaign_name').agg({
            'performance_score': 'mean',
            'reach': 'sum',
            'impressions': 'sum',
            'frequency': 'mean',
            'spend': 'sum',
            'day': lambda x: (x.max() - x.min()).days + 1  # Actual duration
        }).reset_index()
        
    # Prepare features for prediction
    features = ['performance_score', 'reach', 'impressions', 'frequency', 'spend']
    X = unique_campaigns[features]
    y = unique_campaigns['day']
        
    # Train model
    model = LinearRegression()
    model.fit(X, y)
        
    # Make predictions
    predictions = model.predict(X)
        
    # Create results dataframe
    results = pd.DataFrame({
            'Campaign Name': unique_campaigns['campaign_name'],
            'Predicted Ideal Duration (Days)': np.round(predictions, 1),
            'Actual Duration (Days)': unique_campaigns['day']
        })
        
    # Print results
    print("\nCampaign Predictions:")
    print("====================")
    for _, row in results.iterrows():
            print(f"\nCampaign: {row['Campaign Name']}")
            print(f"Predicted Ideal Duration: {row['Predicted Ideal Duration (Days)']} days")
            print(f"Actual Duration: {row['Actual Duration (Days)']} days")
        
    return results

# Call the function
results = predict_campaign_duration(df_analysis)


In [39]:
# Calculate accuracy metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
    
# Calculate feature importance
feature_importance = pd.DataFrame({
        'Feature': features,
        'Importance': abs(model.coef_)
    }).sort_values('Importance', ascending=False)
    
# Print accuracy metrics
print("\nModel Accuracy Metrics:")
print("=======================")
print(f"R-squared (R²) Score: {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.2f} days")
print(f"Root Mean Square Error (RMSE): {rmse:.2f} days")
    
print("\nFeature Importance:")
print("==================")
for _, row in feature_importance.iterrows():
        print(f"{row['Feature']}: {row['Importance']:.4f}")
    
# Calculate prediction accuracy percentage
accuracy_percentage = (1 - mae / y_test.mean()) * 100
print(f"\nOverall Prediction Accuracy: {accuracy_percentage:.2f}%")
    
# Show actual vs predicted values
results = pd.DataFrame({
        'Actual Duration': y_test,
        'Predicted Duration': y_pred,
        'Absolute Error': abs(y_test - y_pred)
    })
    
print("\nPrediction Details:")
print("==================")
print(results)
    
return {
        'r2_score': r2,
        'mae': mae,
        'rmse': rmse,
        'accuracy_percentage': accuracy_percentage,
        'feature_importance': feature_importance,
        'model': model
    }


Model Accuracy Metrics:
R-squared (R²) Score: -0.0174
Mean Absolute Error (MAE): 2.89 days
Root Mean Square Error (RMSE): 3.42 days

Feature Importance:
frequency: 3.8469
performance_score: 0.5774
spend: 0.0037
reach: 0.0000
impressions: 0.0000

Overall Prediction Accuracy: 62.48%

Prediction Details:
      Actual Duration  Predicted Duration  Absolute Error
1466                5            8.121147        3.121147
574                11            8.659509        2.340491
887                10            8.383370        1.616630
1213                5            7.171533        2.171533
139                11            8.464166        2.535834
...               ...                 ...             ...
2998                6            8.097984        2.097984
2159                6            6.814248        0.814248
2259                7            8.531363        1.531363
58                 10            8.302559        1.697441
56                 10            8.805573        1.194427


SyntaxError: 'return' outside function (687453172.py, line 40)