In [1]:
# Load and explore the uploaded Amazon Sales Report, then build an end-to-end analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import textwrap

In [85]:
!pip install xlsxwriter

Collecting xlsxwriter
  Downloading xlsxwriter-3.2.5-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.5-py3-none-any.whl (172 kB)
Installing collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.5


In [86]:
# 1) Load the CSV
path = "data/Amazon Sale Report.csv"
df_raw = pd.read_csv(path, encoding="utf-8", low_memory=False)

In [9]:
df_raw.head()

Unnamed: 0,index,Order ID,Date,Status,Fulfilment,Sales Channel,ship-service-level,Category,Size,Courier Status,...,currency,Amount,ship-city,ship-state,ship-postal-code,ship-country,B2B,fulfilled-by,New,PendingS
0,0,405-8078784-5731545,04-30-22,Cancelled,Merchant,Amazon.in,Standard,T-shirt,S,On the Way,...,INR,647.62,MUMBAI,MAHARASHTRA,400081.0,IN,False,Easy Ship,,
1,1,171-9198151-1101146,04-30-22,Shipped - Delivered to Buyer,Merchant,Amazon.in,Standard,Shirt,3XL,Shipped,...,INR,406.0,BENGALURU,KARNATAKA,560085.0,IN,False,Easy Ship,,
2,2,404-0687676-7273146,04-30-22,Shipped,Amazon,Amazon.in,Expedited,Shirt,XL,Shipped,...,INR,329.0,NAVI MUMBAI,MAHARASHTRA,410210.0,IN,True,,,
3,3,403-9615377-8133951,04-30-22,Cancelled,Merchant,Amazon.in,Standard,Blazzer,L,On the Way,...,INR,753.33,PUDUCHERRY,PUDUCHERRY,605008.0,IN,False,Easy Ship,,
4,4,407-1069790-7240320,04-30-22,Shipped,Amazon,Amazon.in,Expedited,Trousers,3XL,Shipped,...,INR,574.0,CHENNAI,TAMIL NADU,600073.0,IN,False,,,


In [13]:
# 2) Clean column names
df = df_raw.copy()
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(r"[^a-z0-9]+", "_", regex=True)
    .str.strip("_")
)

In [14]:
# 3) Try to identify key columns by common names in Amazon reports
# Candidates for order date
date_cols_candidates = [
    "order_date", "purchase_date", "date", "ship_date", "shipment_date"
]
order_date_col = next((c for c in date_cols_candidates if c in df.columns), None)

In [15]:
# Candidates for amount / sales
amount_candidates = ["amount", "total", "sales", "sale", "item_total", "item_price"]
amount_col = next((c for c in amount_candidates if c in df.columns), None)

In [16]:
# Candidates for quantity
qty_candidates = ["qty", "quantity", "item_quantity", "units", "qty_ordered"]
qty_col = next((c for c in qty_candidates if c in df.columns), None)

In [17]:
# Candidates for product/category
category_candidates = ["category", "product_category", "item_type", "sub_category"]
category_col = next((c for c in category_candidates if c in df.columns), None)

product_candidates = ["product_name", "sku", "asin", "item_name", "description", "title"]
product_col = next((c for c in product_candidates if c in df.columns), None)

size_candidates = ["size", "item_size", "variant_size"]
size_col = next((c for c in size_candidates if c in df.columns), None)

In [18]:
# Fulfillment
fulfill_candidates = ["fulfilment", "fulfillment", "fulfillment_channel", "fulfilment_channel", "ship_service_level"]
fulfill_col = next((c for c in fulfill_candidates if c in df.columns), None)

In [19]:
# Customer and location
customer_candidates = ["customer_id", "buyer_name", "buyer_email", "order_id"]
customer_col = next((c for c in customer_candidates if c in df.columns), None)

state_candidates = ["ship_state", "state", "recipient_state"]
state_col = next((c for c in state_candidates if c in df.columns), None)

city_candidates = ["ship_city", "city", "recipient_city"]
city_col = next((c for c in city_candidates if c in df.columns), None)

status_candidates = ["status", "order_status", "shipment_status"]
status_col = next((c for c in status_candidates if c in df.columns), None)


In [20]:
df.head()

Unnamed: 0,index,order_id,date,status,fulfilment,sales_channel,ship_service_level,category,size,courier_status,...,currency,amount,ship_city,ship_state,ship_postal_code,ship_country,b2b,fulfilled_by,new,pendings
0,0,405-8078784-5731545,04-30-22,Cancelled,Merchant,Amazon.in,Standard,T-shirt,S,On the Way,...,INR,647.62,MUMBAI,MAHARASHTRA,400081.0,IN,False,Easy Ship,,
1,1,171-9198151-1101146,04-30-22,Shipped - Delivered to Buyer,Merchant,Amazon.in,Standard,Shirt,3XL,Shipped,...,INR,406.0,BENGALURU,KARNATAKA,560085.0,IN,False,Easy Ship,,
2,2,404-0687676-7273146,04-30-22,Shipped,Amazon,Amazon.in,Expedited,Shirt,XL,Shipped,...,INR,329.0,NAVI MUMBAI,MAHARASHTRA,410210.0,IN,True,,,
3,3,403-9615377-8133951,04-30-22,Cancelled,Merchant,Amazon.in,Standard,Blazzer,L,On the Way,...,INR,753.33,PUDUCHERRY,PUDUCHERRY,605008.0,IN,False,Easy Ship,,
4,4,407-1069790-7240320,04-30-22,Shipped,Amazon,Amazon.in,Expedited,Trousers,3XL,Shipped,...,INR,574.0,CHENNAI,TAMIL NADU,600073.0,IN,False,,,


In [25]:
# 4) Parse dates and cast numerics safely
if order_date_col is not None:
    df[order_date_col] = pd.to_datetime(df[order_date_col], errors="coerce")

if amount_col is not None:
    # remove commas/currency symbols if present
    df[amount_col] = (
        df[amount_col]
        .astype(str)
        .str.replace(r"[^\d\.\-]", "", regex=True)
        .replace("", np.nan)
        .astype(float)
    )

if qty_col is not None:
    df[qty_col] = pd.to_numeric(df[qty_col], errors="coerce")

In [26]:
# 5) Basic filters: keep rows with a valid date and amount
work = df.copy()
if order_date_col is not None:
    work = work[work[order_date_col].notna()]
if amount_col is not None:
    work = work[work[amount_col].notna()]

# Helper: month period
if order_date_col is not None:
    work["order_month"] = work[order_date_col].dt.to_period("M").dt.to_timestamp()


In [27]:
# 6) KPI summary
kpis = {}
if amount_col is not None:
    kpis["Total Sales"] = work[amount_col].sum()
if qty_col is not None:
    kpis["Total Units"] = work[qty_col].sum()
if order_date_col is not None:
    kpis["Date Range"] = f"{work[order_date_col].min().date()} to {work[order_date_col].max().date()}"
if customer_col is not None:
    kpis["Unique Customers"] = work[customer_col].nunique()
if product_col is not None:
    kpis["Unique Products"] = work[product_col].nunique()

kpi_df = pd.DataFrame(list(kpis.items()), columns=["Metric", "Value"])


In [28]:
# 7) Time series (monthly)
monthly_sales = pd.DataFrame()
if order_date_col is not None and amount_col is not None:
    monthly_sales = work.groupby("order_month", as_index=False)[amount_col].sum().sort_values("order_month")

In [32]:
# 8) Product/category analysis
product_sales = pd.DataFrame()
if product_col is not None and amount_col is not None:
    group_cols = [amount_col]
    if qty_col is not None:
        group_cols.append(qty_col)
    product_sales = (
        work.groupby(product_col, as_index=False)[group_cols]
        .sum()
        .sort_values(amount_col, ascending=False)
        .head(20)
    )


In [33]:
# 9) Fulfillment analysis
fulfill_sales = pd.DataFrame()
if fulfill_col is not None and amount_col is not None:
    fulfill_sales = work.groupby(fulfill_col, as_index=False)[amount_col].sum().sort_values(amount_col, ascending=False)

fulfill_status = pd.DataFrame()
if fulfill_col is not None and status_col is not None:
    fulfill_status = work.groupby([fulfill_col, status_col], as_index=False).size().pivot(index=fulfill_col, columns=status_col, values="size").fillna(0)


In [55]:
# 10) Customer segmentation (simple RFM)
rfm = pd.DataFrame()
if customer_col is not None and order_date_col is not None and amount_col is not None:
    snapshot_date = work[order_date_col].max() + pd.Timedelta(days=1)
    rfm_base = work.groupby(customer_col).agg({
        order_date_col: lambda x: (snapshot_date - x.max()).days,
        amount_col: 'sum',
    })
    freq = work.groupby(customer_col).size().rename("frequency")
    rfm = rfm_base.rename(columns={order_date_col: "recency"}).join(freq)
    # Score into quintiles (1=worst, 5=best) where lower recency is better
    rfm["r_score"] = pd.qcut(-rfm["recency"].rank(method="first"), 5, labels=[1,2,3,4,5])
    rfm["f_score"] = pd.qcut(rfm["frequency"].rank(method="first"), 5, labels=[1,2,3,4,5])
    rfm["m_score"] = pd.qcut(rfm[amount_col].rank(method="first"), 5, labels=[1,2,3,4,5])
    rfm["rfm_score"] = rfm[["r_score","f_score","m_score"]].astype(int).sum(axis=1)
    # Segment
    def segment_row(row):
        if row["rfm_score"] >= 13: return "Champions"
        if row["rfm_score"] >= 10: return "Loyal"
        if row["rfm_score"] >= 8: return "Potential Loyalist"
        if row["rfm_score"] >= 6: return "Need Attention"
        return "At Risk"
    rfm["segment"] = rfm.apply(segment_row, axis=1)
    rfm = rfm.sort_values("rfm_score", ascending=False)

In [56]:
# 11) Geography
state_sales = pd.DataFrame()
if state_col is not None and amount_col is not None:
    state_sales = work.groupby(state_col, as_index=False)[amount_col].sum().sort_values(amount_col, ascending=False)

city_sales = pd.DataFrame()
if city_col is not None and amount_col is not None:
    city_sales = work.groupby(city_col, as_index=False)[amount_col].sum().sort_values(amount_col, ascending=False)


In [64]:
# 12) Create charts (each in its own figure, default colors only)
image_paths = []

def save_plot(fig, name):
    out = f"data/{name}.png"
    fig.savefig(out, bbox_inches="tight", dpi=150)
    plt.close(fig)
    image_paths.append(out)
    return out

In [66]:
# Monthly sales trend
if not monthly_sales.empty:
    fig = plt.figure()
    plt.plot(monthly_sales["order_month"], monthly_sales[amount_col], marker="o")
    plt.title("Monthly Sales Trend")
    plt.xlabel("Month")
    plt.ylabel("Sales")
    plt.xticks(rotation=45)
    plt.tight_layout()  # Ensures labels are visible
    save_plot(fig, "monthly_sales_trend")
    plt.close(fig)      # Properly closes the figure


In [67]:
# Monthly sales trend
if not monthly_sales.empty:
    fig = plt.figure()
    plt.plot(monthly_sales["order_month"], monthly_sales[amount_col], marker="o")
    plt.title("Monthly Sales Trend")
    plt.xlabel("Month")
    plt.ylabel("Sales")
    plt.xticks(rotation=45)
    save_plot(fig, "monthly_sales_trend")

In [73]:
# category_sales defined
category_sales = pd.DataFrame()
# or the output of your groupby aggregation
category_sales = work.groupby(category_col, as_index=False)[amount_col].sum().sort_values(amount_col, ascending=False)


In [74]:
# Category distribution (top 10)
if not category_sales.empty:
    top_cat = category_sales.head(10)
    fig, ax = plt.subplots()
    categories = top_cat[category_col].astype(str)
    values = top_cat[amount_col]
    ax.bar(categories, values)
    ax.set_title("Top Categories by Sales")
    ax.set_xlabel("Category")
    ax.set_ylabel("Sales")
    plt.xticks(rotation=45, ha="right")  # Rotate x labels properly
    plt.tight_layout()
    save_plot(fig, "top_categories_sales")


In [77]:
# size_sales defined
size_sales = pd.DataFrame()
# or the output of your groupby aggregation
size_sales = work.groupby(size_col, as_index=False)[amount_col].sum().sort_values(amount_col, ascending=False)


In [78]:
# Size distribution (top 10)
if not size_sales.empty:
    top_size = size_sales.head(10)
    fig = plt.figure()
    plt.bar(top_size[size_col].astype(str), top_size[amount_col])
    plt.title("Top Sizes by Sales")
    plt.xlabel("Size")
    plt.ylabel("Sales")
    plt.xticks(rotation=45, ha="right")
    save_plot(fig, "top_sizes_sales")

In [79]:
# Fulfillment by sales
if not fulfill_sales.empty:
    fig = plt.figure()
    plt.bar(fulfill_sales[fulfill_col].astype(str), fulfill_sales[amount_col])
    plt.title("Sales by Fulfillment Method")
    plt.xlabel("Fulfillment")
    plt.ylabel("Sales")
    plt.xticks(rotation=45, ha="right")
    save_plot(fig, "sales_by_fulfillment")

In [80]:
# Geo top states
if not state_sales.empty:
    top_states = state_sales.head(10)
    fig = plt.figure()
    plt.bar(top_states[state_col].astype(str), top_states[amount_col])
    plt.title("Top States by Sales")
    plt.xlabel("State")
    plt.ylabel("Sales")
    plt.xticks(rotation=45, ha="right")
    save_plot(fig, "top_states_sales")

In [81]:
# Geo top cities
if not city_sales.empty:
    top_cities = city_sales.head(10)
    fig = plt.figure()
    plt.bar(top_cities[city_col].astype(str), top_cities[amount_col])
    plt.title("Top Cities by Sales")
    plt.xlabel("City")
    plt.ylabel("Sales")
    plt.xticks(rotation=45, ha="right")
    save_plot(fig, "top_cities_sales")

In [82]:
# RFM segment distribution
if not rfm.empty:
    seg_counts = rfm["segment"].value_counts().reset_index()
    seg_counts.columns = ["segment", "customers"]
    fig = plt.figure()
    plt.bar(seg_counts["segment"], seg_counts["customers"])
    plt.title("Customer Segments (RFM)")
    plt.xlabel("Segment")
    plt.ylabel("Customers")
    plt.xticks(rotation=45, ha="right")
    save_plot(fig, "rfm_segment_distribution")

In [88]:
# 13) Create an Excel workbook with multiple sheets
excel_path = "/data/Amazon_Sales_Analysis.xlsx"
with pd.ExcelWriter(excel_path, engine="xlsxwriter") as writer:
    kpi_df.to_excel(writer, sheet_name="KPIs", index=False)
    if not monthly_sales.empty:
        monthly_sales.to_excel(writer, sheet_name="Monthly Sales", index=False)
    if not category_sales.empty:
        category_sales.to_excel(writer, sheet_name="Category Sales", index=False)
    if not size_sales.empty:
        size_sales.to_excel(writer, sheet_name="Size Sales", index=False)
    if not product_sales.empty:
        product_sales.to_excel(writer, sheet_name="Top Products", index=False)
    if not fulfill_sales.empty:
        fulfill_sales.to_excel(writer, sheet_name="Fulfillment Sales", index=False)
    if not fulfill_status.empty:
        fulfill_status.to_excel(writer, sheet_name="Fulfillment Status")
    if not rfm.empty:
        rfm.reset_index().rename(columns={customer_col: "customer"}).to_excel(writer, sheet_name="RFM", index=False)
    if not state_sales.empty:
        state_sales.to_excel(writer, sheet_name="State Sales", index=False)
    if not city_sales.empty:
        city_sales.to_excel(writer, sheet_name="City Sales", index=False)

In [90]:
!pip install tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0


In [93]:
# 14) Draft a concise textual report and save as a markdown file
lines = []
lines.append("# Amazon Sales Analysis – Summary Report")
lines.append("")
lines.append("## Overview KPIs")
for k, v in kpis.items():
    lines.append(f"- **{k}**: {v}")
lines.append("")

def add_section(title, df_section, bullets):
    lines.append(f"## {title}")
    for b in bullets:
        lines.append(f"- {b}")
    if isinstance(df_section, pd.DataFrame) and not df_section.empty:
        head_preview = df_section.head(10)
        lines.append("")
        lines.append("Top records preview:")
        lines.append(head_preview.to_markdown(index=False))
    lines.append("")

add_section(
    "Sales Overview",
    monthly_sales,
    [
        "Monthly sales trend plotted in `monthly_sales_trend.png`.",
        "Check seasonality and growth or decline over the analysis period."
    ],
)

add_section(
    "Product Analysis",
    category_sales.head(10) if not category_sales.empty else category_sales,
    [
        "Top contributing categories identified (see `top_categories_sales.png`).",
        "Top 20 products by sales listed in the 'Top Products' sheet.",
        "Size-level contributions shown in `top_sizes_sales.png` (if available)."
    ],
)

add_section(
    "Fulfillment Analysis",
    fulfill_sales,
    [
        "Sales split by fulfillment method (see `sales_by_fulfillment.png`).",
        "Status mix by fulfillment available in 'Fulfillment Status' sheet."
    ],
)

add_section(
    "Customer Segmentation",
    rfm,
    [
        "RFM segmentation performed (Champions, Loyal, Potential Loyalist, Need Attention, At Risk).",
        "Segment distribution chart saved as `rfm_segment_distribution.png`."
    ],
)

add_section(
    "Geographical Analysis",
    state_sales.head(10) if not state_sales.empty else state_sales,
    [
        "Top states and cities by sales shown in `top_states_sales.png` and `top_cities_sales.png`.",
        "Use these to target regional promotions and inventory allocation."
    ],
)

lines.append("## Recommendations (Auto-Generated)")
recs = [
    "Double down on top categories and products with targeted ads and bundling.",
    "Balance stock for best-selling sizes; reduce slow-moving size inventory.",
    "Prioritize the most profitable fulfillment method and address bottlenecks in low-performing channels.",
    "Cultivate 'Champions' and 'Loyal' customers with VIP offers; design win-back campaigns for 'At Risk' customers.",
    "Allocate marketing budgets to top-performing states/cities; test local promotions where share is growing.",
]
for r in recs:
    lines.append(f"- {r}")

report_md = "\n".join(lines)
report_path = "/data/Amazon_Sales_Report_Summary.md"
with open(report_path, "w", encoding="utf-8") as f:
    f.write(report_md)


In [101]:
# 15) Show key tables to the user
from IPython.display import display

# Show KPIs
print("KPIs")
display(kpi_df)

if not monthly_sales.empty:
    print("Monthly Sales (Summary)")
    display(monthly_sales.tail(12))

if not category_sales.empty:
    print("Top Categories")
    display(category_sales.head(15))

if not product_sales.empty:
    print("Top 20 Products by Sales")
    display(product_sales)

if not rfm.empty:
    print("Customer RFM Segments")
    display(rfm.reset_index().rename(columns={customer_col: "customer"}).head(50))

if not state_sales.empty:
    print("Top States by Sales")
    display(state_sales.head(15))

if not city_sales.empty:
    print("Top Cities by Sales")
    display(city_sales.head(15))


KPIs


Unnamed: 0,Metric,Value
0,Total Sales,78590170.25
1,Total Units,116479
2,Date Range,2022-03-31 to 2022-06-29
3,Unique Customers,112887


Monthly Sales (Summary)


Unnamed: 0,order_month,amount
0,2022-03-01,101683.85
1,2022-04-01,28836200.27
2,2022-05-01,26226476.75
3,2022-06-01,23425809.38


Top Categories


Unnamed: 0,category,amount
5,T-shirt,39206756.65
2,Shirt,21297770.08
0,Blazzer,11215104.12
6,Trousers,5346286.3
1,Perfume,789419.66
7,Wallet,458408.18
4,Socks,150757.5
3,Shoes,124752.76
8,Watch,915.0


Customer RFM Segments


Unnamed: 0,customer,recency,amount,frequency,r_score,f_score,m_score,rfm_score,segment
0,408-2305815-7883568,8,1315.0,1,5,5,5,15,Champions
1,171-0036605-7601963,16,1838.0,2,5,5,5,15,Champions
2,408-9981002-8870719,12,1199.0,1,5,5,5,15,Champions
3,408-9982126-9372352,20,1301.0,1,5,5,5,15,Champions
4,406-2843416-1416314,4,1602.0,2,5,5,5,15,Champions
5,171-0106620-2575543,1,2929.0,3,5,5,5,15,Champions
6,408-9888319-3338719,8,1442.0,1,5,5,5,15,Champions
7,408-9961022-0800369,11,1192.0,1,5,5,5,15,Champions
8,408-9848509-7337107,10,1044.0,1,5,5,5,15,Champions
9,171-7441943-0132301,18,1823.0,2,5,5,5,15,Champions


Top States by Sales


Unnamed: 0,ship_state,amount
27,MAHARASHTRA,13340333.05
22,KARNATAKA,10480694.22
56,TELANGANA,6915018.08
58,UTTAR PRADESH,6823947.08
55,TAMIL NADU,6519182.3
13,DELHI,4232738.97
23,KERALA,3823559.58
60,WEST BENGAL,3507212.82
1,ANDHRA PRADESH,3217859.86
18,HARYANA,2880355.99


Top Cities by Sales


Unnamed: 0,ship_city,amount
750,BENGALURU,6845390.65
2813,HYDERABAD,4946394.25
4653,MUMBAI,3701843.04
5230,NEW DELHI,3612512.78
1416,CHENNAI,3103415.74
5982,PUNE,2342011.18
3602,KOLKATA,1413603.87
2532,GURUGRAM,1220562.74
7370,THANE,1004503.29
4268,LUCKNOW,938076.34


In [102]:
print("KPIs")
print(kpi_df.to_string(index=False))

if not monthly_sales.empty:
    print("Monthly Sales (Summary)")
    print(monthly_sales.tail(12).to_string(index=False))

if not category_sales.empty:
    print("Top Categories")
    print(category_sales.head(15).to_string(index=False))

if not product_sales.empty:
    print("Top 20 Products by Sales")
    print(product_sales.to_string(index=False))

if not rfm.empty:
    print("Customer RFM Segments")
    print(rfm.reset_index().rename(columns={customer_col: "customer"}).head(50).to_string(index=False))

if not state_sales.empty:
    print("Top States by Sales")
    print(state_sales.head(15).to_string(index=False))

if not city_sales.empty:
    print("Top Cities by Sales")
    print(city_sales.head(15).to_string(index=False))


KPIs
          Metric                    Value
     Total Sales              78590170.25
     Total Units                   116479
      Date Range 2022-03-31 to 2022-06-29
Unique Customers                   112887
Monthly Sales (Summary)
order_month      amount
 2022-03-01   101683.85
 2022-04-01 28836200.27
 2022-05-01 26226476.75
 2022-06-01 23425809.38
Top Categories
category      amount
 T-shirt 39206756.65
   Shirt 21297770.08
 Blazzer 11215104.12
Trousers  5346286.30
 Perfume   789419.66
  Wallet   458408.18
   Socks   150757.50
   Shoes   124752.76
   Watch      915.00
Customer RFM Segments
           customer  recency  amount  frequency r_score f_score m_score  rfm_score   segment
408-2305815-7883568        8 1315.00          1       5       5       5         15 Champions
171-0036605-7601963       16 1838.00          2       5       5       5         15 Champions
408-9981002-8870719       12 1199.00          1       5       5       5         15 Champions
408-9982126-9372352   

In [103]:
# 16) Output paths to generated assets so the user can download
{
    "excel_path": excel_path,
    "report_markdown": report_path,
    "charts": image_paths,
    "detected_columns": {
        "order_date_col": order_date_col,
        "amount_col": amount_col,
        "qty_col": qty_col,
        "category_col": category_col,
        "product_col": product_col,
        "size_col": size_col,
        "fulfill_col": fulfill_col,
        "customer_col": customer_col,
        "state_col": state_col,
        "city_col": city_col,
        "status_col": status_col
    },
    "data_shape": work.shape
}

{'excel_path': '/data/Amazon_Sales_Analysis.xlsx',
 'report_markdown': '/data/Amazon_Sales_Report_Summary.md',
 'charts': ['data/monthly_sales_trend.png',
  'data/monthly_sales_trend.png',
  'data/monthly_sales_trend.png',
  'data/top_categories_sales.png',
  'data/top_categories_sales.png',
  'data/top_categories_sales.png',
  'data/top_sizes_sales.png',
  'data/sales_by_fulfillment.png',
  'data/top_states_sales.png',
  'data/top_cities_sales.png',
  'data/rfm_segment_distribution.png'],
 'detected_columns': {'order_date_col': 'date',
  'amount_col': 'amount',
  'qty_col': 'qty',
  'category_col': 'category',
  'product_col': None,
  'size_col': 'size',
  'fulfill_col': 'fulfilment',
  'customer_col': 'order_id',
  'state_col': 'ship_state',
  'city_col': 'ship_city',
  'status_col': 'status'},
 'data_shape': (121176, 22)}