In [4]:
import pandas as pd
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import udf, avg, col
from snowflake.snowpark.types import IntegerType, FloatType, StringType, BooleanType
from snowflake.snowpark.files import SnowflakeFile
import sys
sys.path.append('..')
from credentials import Credentials


In [5]:
cred = Credentials()
session = Session.builder.configs(cred.__dict__).create()

In [6]:
session.use_role("SYSADMIN")
session.use_database("ACCOUNTADMIN_MGMT")
session.use_warehouse("ACCOUNTADMIN_MGMT")
session.use_schema("UTILITIES")

In [13]:
session.sql('CREATE STAGE EXCEL_FILES DIRECTORY = (ENABLE = TRUE)').collect()

[Row(status='Stage area EXCEL_FILES successfully created.')]

In [15]:
session.sql("SHOW STAGES")

<snowflake.snowpark.dataframe.DataFrame at 0xffff5483f280>

In [None]:
CREATE OR REPLACE PROCEDURE parse_excel_sp(file_path string)
RETURNS VARIANT
LANGUAGE PYTHON
RUNTIME_VERSION = '3.8'
PACKAGES = ('snowflake-snowpark-python', 'pandas', 'openpyxl')
HANDLER = 'main'
AS
$$
from snowflake.snowpark.files import SnowflakeFile
from openpyxl import load_workbook
import pandas as pd
 
def main(session, file_path):
 with SnowflakeFile.open(file_path, 'rb') as f:
     workbook = load_workbook(f)
     sheet = workbook.active
     data = sheet.values
 
     # Get the first line in file as a header line
     columns = next(data)[0:]
     # Create a DataFrame based on the second and subsequent lines of data
     df = pd.DataFrame(data, columns=columns)
 
     df2 = session.create_dataframe(df)
     df2.write.mode("overwrite").save_as_table("EXCEL_SP")
 
 return True
$$;

In [28]:
file_path_url:str = session.sql("SELECT BUILD_SCOPED_FILE_URL(@ACCOUNTADMIN_MGMT.UTILITIES.EXCEL_FILES,'export.07.07.2023.xls')").collect();

In [29]:
file_path_url

[Row(BUILD_SCOPED_FILE_URL(@ACCOUNTADMIN_MGMT.UTILITIES.EXCEL_FILES,'EXPORT.07.07.2023.XLS')='https://compucom.us-east-1.snowflakecomputing.com/api/files/01adf3a4-0607-f5ce-0000-09790babe276/10415309559382/nxKQWq9gtjqyTitmeKqS00skpY0RW2VXnuyMoN1OL5BDmsiZgWgcwHaDM5%2ffujHryWeNuhT%2fYUOtAogJEo9Sko%2bn2vnAxzXWLd5NDTSmjvGjrGRu4QmmfAyHMGC0%2b65OQA3sr7UkRN0nKULhbv2cwAjrWTKuTT0WMDPYeWVkHyXKWdhMrNHOwtvhCtpcJO1B%2fg9k2CRmynSAgbsCo4lRqJ0x4%2b8P13ulcEi2gLhEV0WlwJ3zEkd3sIAhFmJkUfjQ3aarGB06')]

In [49]:
class FileFormat:
	def __init__(self, order_date:str, allocation_date:str, so_number:str, so_line:str, cust_po:str, end_user_po:str, account_rep:str, p_line:str, td_pn:str, manuf_pn:str):
		self.order_date = order_date
		self.allocation_date = allocation_date
		self.so_number = so_number
		self.so_line = so_line
		self.cust_po = cust_po
		self.end_user_po = end_user_po
		self.account_rep = account_rep
		self.p_line = p_line
		self.td_pn = td_pn
	
	@staticmethod
	def keys():
		return ['order_date', 'allocation_date', 'so_number', 'so_line', 'cust_po', 'end_user_po', 'account_rep', 'p_line', 'td_pn', 'manuf_pn']

	def values(self):
		return self.__dict__
	
	def __del__(self):
		"This (Magic/Dunder) method deletes the object from memory"
		pass
	

In [50]:
my_object = FileFormat("11.02.21",	"12.04.21",	"4583244", "001", "FC ALTUS GROUP", "FORECAST ORDER","", "LOGITC", "0424CC","960-000764")
print(my_object.values())

{'order_date': '11.02.21', 'allocation_date': '12.04.21', 'so_number': '4583244', 'so_line': '001', 'cust_po': 'FC ALTUS GROUP', 'end_user_po': 'FORECAST ORDER', 'account_rep': '', 'p_line': 'LOGITC', 'td_pn': '0424CC'}


In [53]:
import xlrd

def read_first_workbook_from_excel(file_path):
    with open(file_path, 'rb') as file:
        binary_data = file.read()
    
    workbooks = xlrd.open_workbook(file_contents=binary_data, on_demand=True)
    
    worksheet = workbooks.sheet_by_index(0)

    data = []
    
    for row in range(1, worksheet.nrows):

        file_row = FileFormat(
            order_date = worksheet.cell(row, 0).value,
            allocation_date = worksheet.cell(row, 1).value,
            so_number = worksheet.cell(row, 2).value,
            so_line = worksheet.cell(row, 3).value,
            cust_po = worksheet.cell(row, 4).value,
            end_user_po = worksheet.cell(row, 5).value,
            account_rep = worksheet.cell(row, 6).value,
            p_line = worksheet.cell(row, 7).value,
            td_pn = worksheet.cell(row, 8).value,
            manuf_pn = worksheet.cell(row, 9).value
        )
        
        data.append(file_row.values())
    
    df = pd.DataFrame(data, columns=FileFormat.keys())
    
    return df
    
# Example usage:
file_path = './testing.xls'
read_first_workbook_from_excel(file_path)


Unnamed: 0,order_date,allocation_date,so_number,so_line,cust_po,end_user_po,account_rep,p_line,td_pn,manuf_pn
0,44238.0,44298.0,4583244.0,1,FC ALTUS GROUP,FORECAST ORDER,,LOGITC,0424CC,
1,44266.0,44397.0,2448744.0,1,FC ALTUS GROUP,FORECAST ORDER,,HPQSNB,7O3561,
2,44312.0,44362.0,8488654.0,6,FC CDK,FORECAST ORDER,,CISCO,0H8421,
3,44312.0,44334.0,8488654.0,4,FC CDK,FORECAST ORDER,,CISCO,6017CW,
4,44312.0,,8488654.0,1,FC CDK,FORECAST ORDER,,CISCO,6184DA,
5,44312.0,44336.0,8488654.0,2,FC CDK,FORECAST ORDER,,CISCO,6754DA,
6,44280.0,44307.0,2048054.0,1,FC CDK,FORECAST ORDER,,CISCO,7111CY,
7,44312.0,44334.0,8488654.0,5,FC CDK,FORECAST ORDER,,CISCO,8200CX,
8,44312.0,44335.0,8488654.0,3,FC CDK,FORECAST ORDER,,CISCO,9355ZA,
9,44201.0,44264.0,6872434.0,5,FC CITY OF WINNIPEG,FORECAST ORDER,,LENNBO,0J5796,


In [None]:
CREATE OR REPLACE PROCEDURE parse_excel_sp(file_path string)
RETURNS VARIANT
LANGUAGE PYTHON
RUNTIME_VERSION = '3.10'
PACKAGES = ('snowflake-snowpark-python', 'pandas', 'xlrd')
HANDLER = 'main'
AS
$$
from snowflake.snowpark.files import SnowflakeFile
import xlrd
import pandas as pd
 
def main(session, file_path):
	with SnowflakeFile.open(file_path, 'rb') as file:
    	binary_data = file.read()

	workbooks = xlrd.open_workbook(file_contents=binary_data, on_demand=True)
	
	worksheet = workbooks.sheet_by_index(0)

	data = []
	
	for row in range(1, worksheet.nrows):
		order_date:str = worksheet.cell(row, 0).value
		allocation_date:str = worksheet.cell(row, 1).value
		so_number:str = worksheet.cell(row, 2).value
		data.append((order_date, allocation_date, so_number))

 
	return True
$$;

In [7]:
session.close()