In [None]:
# If needed, uncomment and run:
# !pip install kagglehub pandas matplotlib python-pptx openai

import os
from dataclasses import dataclass
from typing import Dict, List

import kagglehub
import pandas as pd
import matplotlib.pyplot as plt

from pptx import Presentation
from pptx.util import Inches
from openai import OpenAI

plt.rcParams["figure.figsize"] = (8, 4)
pd.set_option("display.max_columns", 50)

OUTPUT_DIR = "output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

PPTX_PATH = os.path.join(OUTPUT_DIR, "Marketing_Insight_Engine_Deck.pptx")
CHART_CHANNEL_PATH = os.path.join(OUTPUT_DIR, "uplift_by_channel.png")
CHART_OBJECTIVE_PATH = os.path.join(OUTPUT_DIR, "uplift_by_objective.png")

OPENAI_MODEL = "gpt-4o-mini"  # or another available model


In [None]:
# Download latest version of the dataset from Kaggle
path = kagglehub.dataset_download("geethasagarbonthu/marketing-and-e-commerce-analytics-dataset")
print("Path to dataset files:", path)

# See what files we have
files = []
for root, dirs, fs in os.walk(path):
    for f in fs:
        files.append(os.path.join(root, f))

files

In [None]:
def pick_campaign_csv(file_list: List[str]) -> str:
    """
    Pick the CSV that looks like the marketing campaign file.
    Priority:
      1. filename containing 'campaign'
      2. otherwise first CSV
    """
    csv_files = [f for f in file_list if f.lower().endswith(".csv")]
    if not csv_files:
        raise FileNotFoundError("No CSV files found in the dataset.")

    for f in csv_files:
        name = os.path.basename(f).lower()
        if "campaign" in name:
            return f

    return csv_files[0]


campaign_csv_path = pick_campaign_csv(files)
print("Using campaign CSV:", campaign_csv_path)

df_raw = pd.read_csv(campaign_csv_path)
df_raw.head()
