In [3]:
import pandas as pd
import os
import csv

In [12]:
Root_Dir = "Data/Sales_data"

def Read_files_from_Dir(root):
    for dir_path, dir_names, file_names in os.walk(root):
        for filename in file_names:
            filepath = os.path.join(dir_path, filename)
            print("Directory Name: ", os.path.basename(dir_path))  
            with open(filepath, 'r') as file:
                print("File Name: ", filename)
                print(file.read())

Read_files_from_Dir(Root_Dir)

Directory Name:  2022
File Name:  data_jan.csv
Date,Store ID,Product ID,Quantity
2022-01-02,S001,P001,15
2022-01-04,S001,P002,20
2022-01-06,S001,P003,25
2022-01-08,S001,P002,30
2022-01-10,S001,P002,35
Directory Name:  2022
File Name:  data_feb.csv
Date,Store ID,Product ID,Quantity
2022-02-02,S001,P002,24
2022-02-04,S001,P005,36
2022-02-06,S001,P003,10
2022-02-08,S001,P003,12
2022-02-10,S001,P005,34
Directory Name:  2022
File Name:  data_march.csv
Date,Store ID,Product ID,Quantity
2022-03-02,S001,P004,33
2022-03-04,S001,P005,11
2022-03-06,S001,P005,25
2022-03-08,S001,P004,31
2022-03-10,S001,P003,5
Directory Name:  2024
File Name:  data_jan.csv
Date,Store ID,Product ID,Quantity
2024-01-02,S001,P001,15
2024-01-04,S001,P002,20
2024-01-06,S001,P003,25
2024-01-08,S001,P002,30
2024-01-10,S001,P002,35
Directory Name:  2024
File Name:  data_feb.csv
Date,Store ID,Product ID,Quantity
2024-02-02,S001,P002,24
2024-02-04,S001,P005,36
2024-02-06,S001,P003,10
2024-02-08,S001,P003,12
2024-02-10,S001,P0

In [15]:
def Cal_Total_Sales(root):
    Sales = {}
    for dir_path, dir_names, file_names in os.walk(root):
        for filename in file_names:
            filepath = os.path.join(dir_path, filename)
            with open(filepath, 'r') as file:
                reader = csv.DictReader(file)
                for row in reader:
                    product_id = row["Product ID"]
                    quantity = int(row["Quantity"])
                    if product_id in Sales:
                        Sales[product_id] += quantity
                    else:
                        Sales[product_id] = quantity
    return Sales

Sales = Cal_Total_Sales(Root_Dir)
Sales

# Sorting the dictionary by values in reverse order using a custom lambda function
Sorted_sales = {k: v for k, v in sorted(Sales.items(), key=lambda x: x[1], reverse=True)}
print(Sorted_sales)

# Selecting the first 3 items from the sorted dictionary
first_3_items = {k: Sorted_sales[k] for k in list(Sorted_sales.keys())[:3]}
print("TOP 3 BEST SELLING PRODUCTS: ", first_3_items)

{'P002': 981, 'P005': 954, 'P004': 576, 'P003': 468, 'P001': 135}
TOP 3 BEST SELLING PRODUCTS:  {'P002': 981, 'P005': 954, 'P004': 576}


In [16]:
Root_Dir = "Data/Sales_Data"
Product_Names_File = "Data/product_names.csv"

def Load_Product_Names(filepath):
    productnames = {}
    with open(filepath, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            product_id = row["Product ID"].strip()
            productname = row["Product_Name"].strip()
            productnames[product_id] = productname
    return productnames

In [18]:
def Create_Sales_Summary(Total_Sales, root, product_names):
    product_details = {}  # Dictionary to store product details and quantities

    # Extract product quantities and other details
    for dir_path, dir_names, file_names in os.walk(root):
        for filename in file_names:
            filepath = os.path.join(dir_path, filename)
            with open(filepath, 'r') as file:
                reader = csv.DictReader(file)
                for row in reader:
                    product_id = row.get("Product ID", "").strip()
                    quantity = int(row.get("Quantity", 0))
                    
                    # Lookup product name from the Product_Names.csv
                    product_name = product_names.get(product_id, "Unknown Product")

                    if product_id in product_details:
                        product_details[product_id]["total_quantity"] += quantity
                        product_details[product_id]["months"] += 1
                    else:
                        product_details[product_id] = {
                            "product_name": product_name,
                            "total_quantity": quantity,
                            "months": 1
                        }

    # Create the CSV summary file
    with open('sales_summary.csv', mode='w', newline='') as summary_file:
        fieldnames = ['Product ID', 'Product_Name', 'Total Quantity', 'Average Quantity Sold']
        writer = csv.DictWriter(summary_file, fieldnames=fieldnames)
        
        writer.writeheader()  
        
        for product_id, details in product_details.items():
            writer.writerow({
                'Product ID': product_id,
                'Product_Name': details["product_name"],
                'Total Quantity': details["total_quantity"],
                'Average Quantity Sold': details["total_quantity"] / details["months"]
            })

# Load the product names from Product_Names.csv
product_names = Load_Product_Names(Product_Names_File)

# Call the function to create the sales summary CSV file
Create_Sales_Summary(Sales, Root_Dir, product_names)