<a href="https://colab.research.google.com/github/ssharma-ss/ss-tools/blob/main/report_to_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title
# Imports utilities and packages
import warnings
import pandas as pd
from datetime import datetime
import os

# Telling function to ignore all warnings
warnings.filterwarnings('ignore')
pd.set_option('display.width', 1000)

def dataReport_to_csv(filename):

  # Import file from side column while preserving data
  # For future: Modify this to read CSVs in case clients send us CSV,
  # Check if the filename contains CSV__ in front
  df = pd.read_excel(filename, 'Request Shipping Labels', header=None)

  # Remove unnecessary columns after 'REQUEST SHIPPING LABELS'
  df = df.loc[:, :11]
  po_num = df[3][1]
  vendor_dir = df[0][1]
  retailer_name = df[1][1]
  df = df.drop(df.columns[[4, 6, 7, 8]], axis=1)
  new_header = df.iloc[0]
  df = df[1:]
  df.columns = new_header

  # Remove the summary lines from the file.
  rem = "SUMMARY"
  df = df[~df['PO NUM'].str.contains(rem)]

  # Remove rows that contain Quantity Shipped = 0
  # Otherwise it breaks on portal app
  df = df[df['QTY SHIPPED'] != 0]

  ##############################################################################
  # Remove Nan from the carton number and replace with 6 JUST FOR TESTING
  df['SHIP CARTON NUMBER'].fillna('6', inplace=True)
  ##############################################################################

  # Add placeholder for all the BOL/Tracking field
  df['TRACKING'] = "PLACEHOLDER"

  # Set the headers to the right CSV mapping
  df.rename(columns={
      'VENDOR'              :  'vendor.tp_directory',
      'RETAILER'            :  'retailer.tp_name',
      'SHIP TO LOC'         :  'ship_info.ship_to_location.tp_location_code',
      'PO NUM'              :  'po.po_num',
      'MARK FOR LOC'        :  'po.mark_for_location.tp_location_code',
      'GTIN/UPC'            :  'po.ship_carton.po_item_pack.po_item.product.product_gtin',
      'QTY SHIPPED'         :  'po.ship_carton.po_item_pack.po_item_pack_qty',
      'SHIP CARTON NUMBER'  :  'po.ship_carton.ship_carton_number',
      'TRACKING'            :  'ship_info.ship_info_tracking'
    }, inplace=True)

  # Create file name variables for the format: DATE_PONUM_VENDOR_RETAILER.CSV
  file_name_breaks = [
      datetime.today().strftime('%Y-%m-%d'),
      # df[3].values[0],
      # df[0].values[0].title(),
      # df[1].values[0].title()
    ]
  file_name = ''
  for i in file_name_breaks:
    file_name = file_name + i + "_"
  file_name = po_num + vendor_dir + retailer_name + "_CSV_Shipment_Upload" ".csv"

  # Spit out the CSV file
  with open(file_name, 'w') as csv_file:
      df.to_csv(index=False, path_or_buf=csv_file)

'''

  Use cases to check:
  1. Whether the file provided is an Excel file or the CSV file
  2. Make sure the script doesn't run the CSV's that it produced
    -- Solution: Since I append CSV__ in fron the file which is kindof
    like a unique identifier.
    Look for the first five characters for the filename so that
    it tells you that the file was generated

'''


# @title
directory_path = '/content/'
directory_files = os.listdir(directory_path)
files_to_process = []
for f in directory_files:
  if f[-4:] == "xlsx" or f[-3:] == "csv" and f[0:5] != "CSV__":
    files_to_process.append(f)

for f in files_to_process:
  dataReport_to_csv(f)

print("Finished processing")

Finished processing
