In [18]:
import pandas as pd
import numpy as np
from datetime import datetime
import os

def generate_ad_data(num_records):
    """
    Generate synthetic ad performance data with daily reach
    """
    data = []
    
    for i in range(num_records):
        ad_id = f"AD_{i // (num_records//20) + 1}"
        days_active = (i % 45) + 1
        
        # Base metrics that change over time
        base_ctr = 0.025 * np.exp(-0.03 * days_active)
        
        # Daily reach calculation - starts high and gradually decreases
        base_reach = 10000 * np.exp(-0.02 * days_active)  # Base reach decays over time
        daily_reach = int(max(1000, base_reach * (1 + np.random.random() * 0.3 - 0.15)))  # Add some randomness
        
        # Impressions based on reach and frequency
        frequency = min(10, 1 + (days_active * 0.18) + (np.random.random() * 0.5))
        impressions = int(daily_reach * frequency)
        
        # Calculate clicks and CTR
        ctr = max(0.001, base_ctr + (np.random.random() * 0.005 - 0.0025))
        clicks = int(impressions * ctr)
        
        # CPM rises as performance declines
        cpm = min(20, 6 + (days_active * 0.25) + (np.random.random() * 2))
        
        data.append({
            'ad_id': ad_id,
            'days_active': days_active,
            'daily_reach': daily_reach,
            'impressions': impressions,
            'clicks': clicks,
            'ctr': round(ctr, 4),
            'frequency': round(frequency, 2),
            'cpm': round(cpm, 2)
        })
    
    return pd.DataFrame(data)

def export_data(df, filename='ad_performance_data.csv', export_format='csv', output_dir='data_exports'):
    """
    Export the dataset to specified format
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    file_base = os.path.splitext(filename)[0]
    
    if export_format.lower() == 'csv':
        output_file = f"{output_dir}/{file_base}_{timestamp}.csv"
        df.to_csv(output_file, index=False)
        print(f"\nData exported to CSV: {output_file}")
        
        print("\nFirst few rows of exported data:")
        print(df.head())
        
        print("\nDataset Summary:")
        print(f"Total records: {len(df)}")
        print(f"Average Daily Reach: {df['daily_reach'].mean():.0f}")
        print(f"Average CTR: {df['ctr'].mean():.4f}")
        print(f"Average Frequency: {df['frequency'].mean():.2f}")
        
    elif export_format.lower() == 'excel':
        output_file = f"{output_dir}/{file_base}_{timestamp}.xlsx"
        df.to_excel(output_file, index=False, sheet_name='Ad_Performance')
        print(f"\nData exported to Excel: {output_file}")
    
    return output_file

def main():
    # Generate sample data
    num_records = 500
    print(f"Generating {num_records} records of ad performance data...")
    
    # Generate the dataset
    df = generate_ad_data(num_records)
    
    # Export to CSV with timestamp
    export_data(df, 
                filename='ad_performance.csv',
                export_format='csv',
                output_dir='ad_performance')

if __name__ == "__main__":
    main()

Generating 500 records of ad performance data...

Data exported to CSV: ad_performance/ad_performance_20250113_135307.csv

First few rows of exported data:
  ad_id  days_active  daily_reach  impressions  clicks     ctr  frequency  \
0  AD_1            1         9745        14920     356  0.0239       1.53   
1  AD_1            2         8429        14262     325  0.0228       1.69   
2  AD_1            3        10332        20404     475  0.0233       1.97   
3  AD_1            4         8301        15675     350  0.0223       1.89   
4  AD_1            5         7986        17950     409  0.0228       2.25   

    cpm  
0  7.25  
1  6.71  
2  8.53  
3  7.73  
4  9.12  

Dataset Summary:
Total records: 500
Average Daily Reach: 6591
Average CTR: 0.0136
Average Frequency: 5.36


In [24]:
# Generate sample data
num_records = 500
df = generate_ad_data(num_records)

# Export your data to csv
df.to_csv('ad_performance.csv', index=False)

In [27]:
import matplotlib.pyplot as plt
import seaborn as sns

#renaming the dataframes
creatives = pd.read_csv('ad_performance.csv')
creatives.head(30)


Unnamed: 0,ad_id,days_active,daily_reach,impressions,clicks,ctr,frequency,cpm
0,AD_1,1,11119,14522,360,0.0248,1.31,6.42
1,AD_1,2,8667,14202,327,0.0231,1.64,7.48
2,AD_1,3,8572,14154,332,0.0235,1.65,7.59
3,AD_1,4,8125,15225,318,0.021,1.87,8.96
4,AD_1,5,8870,18387,387,0.0211,2.07,8.92
5,AD_1,6,8448,19330,387,0.02,2.29,7.98
6,AD_1,7,9007,20439,440,0.0216,2.27,8.53
7,AD_1,8,7782,21496,369,0.0172,2.76,8.46
8,AD_1,9,8091,24792,432,0.0175,3.06,8.82
9,AD_1,10,6986,21583,411,0.0191,3.09,9.14
