In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt

# Step 1: Load data from each file format

# Load CSV data
csv_data = pd.read_csv("sales_data.csv")

# Load Excel data
excel_data = pd.read_excel("sales_data.xlsx")

# Load JSON data
with open("sales_data.json", "r") as file:
    json_data = pd.DataFrame(json.load(file))

# Step 2: Explore the data structures
print("CSV Data:\n", csv_data.head())
print("\nExcel Data:\n", excel_data.head())
print("\nJSON Data:\n", json_data.head())

# Step 3: Data Cleaning - Handle missing values and remove duplicates
csv_data.dropna(inplace=True)
excel_data.dropna(inplace=True)
json_data.dropna(inplace=True)

csv_data.drop_duplicates(inplace=True)
excel_data.drop_duplicates(inplace=True)
json_data.drop_duplicates(inplace=True)

# Step 4: Convert data to a unified format (combine all data into one dataframe)
all_data = pd.concat([csv_data, excel_data, json_data], ignore_index=True)
print("\nCombined Data:\n", all_data.head())

# Step 5: Data Transformation - Assume columns such as 'Date', 'Product', 'SalesAmount'
# Convert 'Date' column to datetime format if it exists
if 'Date' in all_data.columns:
    all_data['Date'] = pd.to_datetime(all_data['Date'])

# Step 6: Analyze sales data - Calculate total sales and average order value
total_sales = all_data['SalesAmount'].sum()
avg_order_value = all_data['SalesAmount'].mean()
print(f"\nTotal Sales: {total_sales}")
print(f"Average Order Value: {avg_order_value}")

# Calculate sales distribution by product category if 'ProductCategory' column exists
if 'ProductCategory' in all_data.columns:
    product_sales = all_data.groupby('ProductCategory')['SalesAmount'].sum()
    print("\nSales by Product Category:\n", product_sales)

# Step 7: Visualizations
# Bar plot for sales by product category
if 'ProductCategory' in all_data.columns:
    plt.figure(figsize=(8, 5))
    product_sales.plot(kind='bar', color='skyblue')
    plt.title("Sales by Product Category")
    plt.xlabel("Product Category")
    plt.ylabel("Total Sales Amount")
    plt.show()

# Pie chart for product category distribution
    plt.figure(figsize=(8, 5))
    product_sales.plot(kind='pie', autopct='%1.1f%%', startangle=140)
    plt.title("Sales Distribution by Product Category")
    plt.ylabel("")
    plt.show()

# Box plot for SalesAmount distribution
plt.figure(figsize=(8, 5))
plt.boxplot(all_data['SalesAmount'].dropna())
plt.title("Sales Amount Distribution")
plt.ylabel("Sales Amount")
plt.show()
