In [8]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

BASE_DIR = os.getcwd()
DATA_DIR = BASE_DIR
OUT_DIR = os.path.join(BASE_DIR, "output")
LOG_DIR = os.path.join(BASE_DIR, "logs")

os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)

INPUT_FILE = os.path.join(DATA_DIR, "sales_data.csv")
CLEAN_FILE = os.path.join(OUT_DIR, "clean_sales_data.csv")
SUMMARY_FILE = os.path.join(OUT_DIR, "monthly_sales_summary.csv")
CHART_FILE = os.path.join(OUT_DIR, "monthly_sales_trend.png")
LOG_FILE = os.path.join(LOG_DIR, "automation_log.txt")


def log(msg):
    with open(LOG_FILE, "a") as f:
        f.write(f"[{datetime.now()}] {msg}\n")


log("Job started")
df = pd.read_csv(INPUT_FILE)

df.drop_duplicates(inplace=True)
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["sales"] = pd.to_numeric(df["sales"], errors="coerce")
df.dropna(inplace=True)

df["month"] = df["date"].dt.to_period("M").astype(str)


monthly_sales = (
    df.groupby("month", as_index=False)["sales"]
      .sum()
      .sort_values("month")
)


plt.figure(figsize=(9, 5))
plt.plot(monthly_sales["month"], monthly_sales["sales"], marker="o")
plt.title("Monthly Sales Trend")
plt.xlabel("Month")
plt.ylabel("Total Sales")
plt.grid(True)
plt.tight_layout()
plt.savefig(CHART_FILE)
plt.close()




df.to_csv(CLEAN_FILE, index=False)
monthly_sales.to_csv(SUMMARY_FILE, index=False)


log("Clean data, summary, and chart generated successfully")
log("Job finished")

print("Automation completed successfully.")
print("Outputs:")
print(f"- {CLEAN_FILE}")
print(f"- {SUMMARY_FILE}")
print(f"- {CHART_FILE}")


Automation completed successfully.
Outputs:
- c:\Users\rajva\Desktop\data\output\clean_sales_data.csv
- c:\Users\rajva\Desktop\data\output\monthly_sales_summary.csv
- c:\Users\rajva\Desktop\data\output\monthly_sales_trend.png
