In [1]:
!pip install Office365-REST-Python-Client

Defaulting to user installation because normal site-packages is not writeable


In [3]:
import io
import os
import linecache
import urllib
import pandas as pd
import glob
import numpy as np
from datetime import date
import datetime
import json
import requests
from office365.runtime.auth.client_credential import ClientCredential
from office365.sharepoint.client_context import ClientContext
from requests.auth import HTTPBasicAuth
from office365.sharepoint.files.file import File
from openpyxl import load_workbook
from snowflake.connector.pandas_tools import write_pandas
from snowflake.connector.pandas_tools import pd_writer
from snowflake.sqlalchemy import URL
from sqlalchemy import create_engine
import snowflake.connector

In [4]:
'''
Obtain credentials from credentials.txt where 1st line indicates user id, 
2nd line shows password and 3rd line is associated email address.
'''
user_id = linecache.getline('credentials.txt', 1).strip()
user_password = linecache.getline('credentials.txt', 2).strip()
user_email = linecache.getline('credentials.txt', 3).strip()
client_id = linecache.getline('credentials.txt', 4).strip()
client_secret = linecache.getline('credentials.txt', 5).strip()

In [5]:
site_url = 'https://ecentral.sharepoint.com/sites/SupplyChainDevelopment'
client_credentials = ClientCredential(client_id, client_secret)

# authenticate and connect to sharepoint
ctx = ClientContext(site_url).with_credentials(client_credentials)

In [6]:
def folder_details(ctx, folder_in_sharepoint):  
    folder = ctx.web.get_folder_by_server_relative_url(folder_in_sharepoint)
    fold_names = []  
    sub_folders = folder.files   
    ctx.load(sub_folders)  
    ctx.execute_query()  
    for s_folder in sub_folders:
        fold_names.append(s_folder.properties["Name"]) 
    return fold_names

In [7]:
folder_prefix = "/sites/SupplyChainDevelopment/"

In [8]:
# Function to Download File from SharePoint
def download_file_from_sharepoint(ctx, folder_prefix, folder_in_sharepoint, file_name, sheet_name):
    response = File.open_binary(ctx, folder_prefix + folder_in_sharepoint + "/" + file_name)
    print(response)
    bytes_file_obj = io.BytesIO()
    bytes_file_obj.write(response.content)
    bytes_file_obj.seek(0) #set file object to start

    df = pd.read_excel(bytes_file_obj, sheet_name = sheet_name)
    print(df.head())
    return df

In [9]:
# setup folder path    
folder_in_sharepoint = 'Shared%20Documents/General/1%20Supply%20Chain%20Strategy/2309%20Cost%20to%20serve/Data/Transportation%20Data'
# get the files in the folder
file_list = folder_details(ctx, folder_in_sharepoint) 
file_list

['All Freight and Parcel Dump.url',
 'Sample - Freight Data Dump as 10.02.2023.xlsx']

In [91]:
def get_oauth_token(svc_username=None, svc_password=None):
    """
    Retrives the authentication token for SBD Snowflake.
    This function automatically requests for username and password from the user through interactive prompts.
    If using service account credentials, they can be passed as svc_username and svc_password.
    Keywords:
    svc_username -- Service account username for which the token should be provided.
    svc_password -- Password corresponding to the service account user.
    Example usage:
    # Authentication with your personal user id.
    from sbd_common_utils.snowflake_utils import get_oauth_token
    access_token = get_oauth_token()
    # Authentication with service account.
    from sbd_common_utils.snowflake_utils import get_oauth_token
    from sbd_common_utils.common_utils import get_service_account_creds
    username, password = get_service_account_creds("/datascience/sandbox/someapp/service-account")
    access_token = get_oauth_token(username, password)
    """

    if svc_username and svc_password:
        username = svc_username
        password = svc_password
    else:
        None


    r = requests.post(
        "https://ssoprod.sbdinc.com/as/token.oauth2",
        data={
            "client_id": "Snowflake",
            "grant_type": "password",
            "username": username,
            "password": password,
            "client_secret": 'f9sq630wmLP6UjpSsOk7kTuP6xccCrSOC4YhE1VdTq3GCupqR7gjYcpuhEGRJ9e0',
            "scope": "session:role-any",
        },
    )
    r.raise_for_status()
    access_token = r.json()["access_token"]
    return access_token

In [92]:
access_token = get_oauth_token(svc_username = user_id, svc_password = user_password)

In [93]:
def check_table_presence(qualified_table_name):
    activesnowflakeconnector = snowflake.connector.connect(account = 'sbd_caspian.us-east-1', 
                                                           authenticator = 'oauth', 
                                                           token = access_token,
                                                           warehouse = 'DEV_AIDA_WH',
                                                           database = 'DEV_AIDA',
                                                           role = 'SC_STRATEGY_ANALYTICS_DEV_RW',
                                                           schema = 'SC_STRATEGY_ANALYTICS'
                                                          )
    cur = activesnowflakeconnector.cursor()
    
    # Extract location data 
    try:
        df = pd.DataFrame(cur.execute(f"desc table {qualified_table_name}"))
        return True
    except:
        return False

In [94]:
def drop_table(qualified_table_name):
    activesnowflakeconnector = snowflake.connector.connect(account = 'sbd_caspian.us-east-1', 
                                                           authenticator = 'oauth', 
                                                           token = access_token,
                                                           warehouse = 'DEV_AIDA_WH',
                                                           database = 'DEV_AIDA',
                                                           role = 'SC_STRATEGY_ANALYTICS_DEV_RW',
                                                           schema = 'SC_STRATEGY_ANALYTICS'
                                                          )
    cur = activesnowflakeconnector.cursor()
    
    # Extract location data 
    try:
        df = pd.DataFrame(cur.execute(f"drop table {qualified_table_name}"))
        return True
    except:
        return False

In [95]:
def create_table(df, 
                 table_name, 
                 database = 'DEV_AIDA',
                 schema = 'SC_STRATEGY_ANALYTICS'):
    engine = create_engine(URL(account = 'sbd_caspian.us-east-1', 
                 authenticator = 'oauth', 
                 token = access_token,
                 warehouse = 'DEV_AIDA_WH',
                 role = 'SC_STRATEGY_ANALYTICS_DEV_RW',
                 database = database,
                 schema = schema
                ))
    connection = engine.connect()

    qualified_table_name = f'{database}.{schema}.{table_name}'
    presence_flag = check_table_presence(qualified_table_name)
    print(f"presence_flag : {presence_flag}")
    if presence_flag == False:    
        df.to_sql(
            table_name, 
            engine, 
            if_exists = "replace", 
            index = False
        )
       
    else:
        #drop_flag = drop_table(qualified_table_name)
        #print(f"drop_flag : {drop_flag}")
        df.to_sql(
            table_name, 
            engine, 
            if_exists = "append", 
            index = False
        )

    connection.close()
    engine.dispose()

In [96]:
file_name = file_list[-1]
sheet_name = 'Freight 10 02'

In [88]:
df = download_file_from_sharepoint(ctx = ctx, 
                                   folder_prefix = folder_prefix, 
                                   folder_in_sharepoint = folder_in_sharepoint, 
                                   file_name = file_name, 
                                   sheet_name = sheet_name
                                  )
col_list = df.iloc[0]
df = df.iloc[1:,:]
df.columns = col_list
df.columns = [col.replace("(","_").replace(")","").replace(" ","_").replace("__","_") for col in df.columns]
df.columns = map(lambda x: str(x).upper(), df.columns)

<Response [200]>
             Unnamed: 0    Unnamed: 1   302354535.44999737       Unnamed: 3  \
0  SHIP_DATE_MONTH_YEAR  ACCOUNT_CODE  GL_ALLOCATED_AMOUNT    CURRENCY_CODE   
1   2023-06-01 00:00:00     56550.702               102.66  USD - US DOLLAR   
2   2023-02-01 00:00:00     56550.702               232.35  USD - US DOLLAR   
3   2023-03-01 00:00:00     56550.702               153.19  USD - US DOLLAR   
4   2022-12-01 00:00:00     56550.702               617.18  USD - US DOLLAR   

      Unnamed: 4               Unnamed: 5     Unnamed: 6     Unnamed: 7  \
0  DIVISION_CODE  INVOICE_DATE_MONTH_YEAR  LOCATION_CODE  MOVEMENT_TYPE   
1          56550      2023-06-01 00:00:00            702       Outbound   
2          56550      2023-02-01 00:00:00            702       Outbound   
3          56550      2023-03-01 00:00:00            702       Outbound   
4          56550      2022-12-01 00:00:00            702       Outbound   

             Unnamed: 8            Unnamed: 9  ...       

In [89]:
today = date.today()
df['LOAD_DATE'] = today
df['source_file_name'] = file
print(df.head())

  SHIP_DATE_MONTH_YEAR ACCOUNT_CODE GL_ALLOCATED_AMOUNT    CURRENCY_CODE  \
1  2023-06-01 00:00:00    56550.702              102.66  USD - US DOLLAR   
2  2023-02-01 00:00:00    56550.702              232.35  USD - US DOLLAR   
3  2023-03-01 00:00:00    56550.702              153.19  USD - US DOLLAR   
4  2022-12-01 00:00:00    56550.702              617.18  USD - US DOLLAR   
5  2023-08-01 00:00:00    56620.412               91.12  USD - US DOLLAR   

  DIVISION_CODE INVOICE_DATE_MONTH_YEAR LOCATION_CODE MOVEMENT_TYPE  \
1         56550     2023-06-01 00:00:00           702      Outbound   
2         56550     2023-02-01 00:00:00           702      Outbound   
3         56550     2023-03-01 00:00:00           702      Outbound   
4         56550     2022-12-01 00:00:00           702      Outbound   
5         56620     2023-08-01 00:00:00           412      Outbound   

  ACTUAL_DELIVERY_DATE ADDRESS_DIV_LOCATION  ... SHIP_WEIGHT_STANDARD  \
1  2023-06-30 00:00:00               550702

In [102]:
for i in range(0,len(df),5000):
    print(i)
    create_table(df = df.iloc[i : i + 5000], table_name = "SC_STRATEGY_CASS_TRANSPORTATION_DETAILS")
    print("***")

0
presence_flag : True


  df.to_sql(


***
5000
presence_flag : True
***
10000
presence_flag : True
***
15000
presence_flag : True
***
20000
presence_flag : True
***
25000
presence_flag : True
***
30000
presence_flag : True
***
35000
presence_flag : True
***
40000
presence_flag : True
***
45000
presence_flag : True
***
50000
presence_flag : True
***
55000
presence_flag : True
***
60000
presence_flag : True
***
65000
presence_flag : True
***
70000
presence_flag : True
***
75000
presence_flag : True
***
80000
presence_flag : True
***
85000
presence_flag : True
***
90000
presence_flag : True
***
95000
presence_flag : True
***
100000
presence_flag : True
***
105000
presence_flag : True
***
110000
presence_flag : True
***
115000
presence_flag : True
***
120000
presence_flag : True
***
125000
presence_flag : True
***
130000
presence_flag : True
***
135000
presence_flag : True
***
140000
presence_flag : True
***
145000
presence_flag : True
***
150000
presence_flag : True
***
155000
presence_flag : True
***
160000
presence_flag : T