Example Usage of ClientContext
- In the context of your code, ctx likely refers to the ClientContext object from the office365.sharepoint.client_context module. This object is used to interact with SharePoint sites. Here’s a brief overview of how it works:

In [None]:
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.user_credential import UserCredential

# SharePoint site URL
site_url = "https://your_sharepoint_site_url"

# User credentials
username = "your_username"
password = "your_password"

# Create a ClientContext object
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))

# Example: Get the web title
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("Web title: {0}".format(web.properties['Title']))

Explanation:
ClientContext: This object is used to connect to a SharePoint site.
UserCredential: This is used to authenticate the user.
ctx.load(web): Loads the web object.
ctx.execute_query(): Executes the query to retrieve the data from SharePoint.

In [None]:
# Define the local path and file name
local_path = "/path/to/your/directory/"
local_filename = "your_file_name.xlsx"

# Concatenate the path and file name
local_file = local_path + local_filename

# Print the full file path
print(local_file)

Below is the function to move a file between SharePoint directories :

In [None]:
#Retrieve File Information
file_info = ctx.web.get_file_by_server_relative_path(url).get().execute_query()
file_id = file_info.unique_id

In [None]:
#Build Query Strings
get_file_query_string = "getFileById('{}')".format(file_id)
moveto_query_params = {"newurl": dir + "/" + name, "flags": 1}
moveto_query_string = sharepoint_buildurl("moveto", moveto_query_params)

# moveto_query_params: 
- This is a dictionary that holds the parameters for the move operation.

# "newurl": dir + "/" + name:
- newurl: This key specifies the new URL where the file should be moved.
-  dir + "/" + name: This concatenates the directory path (dir) and the new file name (name) to form the full path of the new location.
- dir: The destination directory where the file will be moved.
- name: The new name of the file after it is moved.

# "flags": 1:
- This key-value pair specifies additional options for the move operation.
- flags: This parameter is used to control the behavior of the move operation. In this case, the value 1 typically means that the move operation should overwrite the destination file if it already exists.

In [None]:
#Construct the Move URL
moveto_url = "/".join(
    [ctx.service_root_url(), "web", get_file_query_string, moveto_query_string]
)

In [None]:
#Create and Execute the Request
request = RequestOptions(moveto_url)
request.method = "POST"
ctx.pending_request().execute_request_direct(request)

Example :

In [None]:
# Define the SharePoint context
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))

# Define the file URL, new name, and destination directory
file_url = "/sites/your_site/Shared Documents/your_file.xlsx"
new_name = "new_file_name.xlsx"
destination_dir = "/sites/your_site/Shared Documents/New Folder"

# Move the file
moveFile(file_url, new_name, destination_dir)


In [None]:
#Get the Root Folder
root_folder = ctx.web.get_folder_by_server_relative_path(sharepoint_path)


# ctx.web.get_folder_by_server_relative_path(sharepoint_path): 
- This method retrieves the folder object from SharePoint using the server-relative path provided in sharepoint_path.
# root_folder: 
- This variable now holds the reference to the specified folder in SharePoint.

In [None]:
#List All Files in the Directory
files = root_folder.get_files(True).execute_query()


# root_folder.get_files(True): 
- This method retrieves all files in the specified folder, including those in subdirectories. The True parameter indicates that the search should be recursive, meaning it will include files in all subdirectories.
# execute_query(): 
- This method executes the query to retrieve the files from SharePoint.

In [None]:
#This list to automate the process of loading data from files into the corresponding database tables
for file_name, table_name in categorySeq:
    print(f"Loading data from {file_name} into {table_name}")
    # Add your code here to load data from the file into the table


Output :
Loading data from Master_Send_TD into nps_master_send_td
Loading data from Master_Response_TD into nps_master_response_td
Loading data from Master_Send_BU into nps_master_send_bu
Loading data from Master_Response_BU into nps_master_response_bu


Example:

In [None]:
# List of categories
categories = ["Electronics", "Books", "Clothing", "Home & Kitchen"]

# Assign sequence numbers to categories
categorySeq = {category: index for index, category in enumerate(categories)}

# Print the sequence numbers
print(categorySeq)


In [None]:
#Iterate Over Categories
for eachCat in categorySeq:

Loops through each pair in categorySeq, where eachCat[0] is the file name pattern and eachCat[1] is the corresponding database table name.

In [None]:
#Find Matching Files
matching_files = [
    f
    for f in files
    if eachCat[0] in f.properties["Name"]  # search for the files with the matching name
    and f.serverRelativeUrl == sharepoint_path + "/" + f.name
]

Filters the list of files to find those that match the current category’s file name pattern and are located in the specified SharePoint path.

In [None]:
#Process Each Matching File
for f in matching_files:
    # download to the local folder
    file_url = f.properties["ServerRelativeUrl"]
    download_file = os.path.join(local_path, os.path.basename(file_url))


For each matching file, constructs the local file path where the file will be downloaded.

In [None]:
#Extract Year and Quarter from File Name
file_name = f.properties["Name"]
pattern = r'_(\d{4})(Q[1-4])'
match = re.search(pattern, file_name)

if match:
    year = match.group(1)
    quarter = match.group(2)

print(f"Year: {year}, Quarter: {quarter}")
print(f"Processing for {file_name}")


Uses a regular expression to extract the year and quarter from the file name. Prints the extracted values for verification.

In [None]:
#Download the File
with open(download_file, "wb") as local_file:
    file = (
        ctx.web.get_file_by_server_relative_path(file_url)
        .download(local_file)
        .execute_query()
    )

In [None]:
#Read the File into a DataFrame
nps_df = pd.read_excel(download_file)


Reads the downloaded Excel file into a pandas DataFrame

In [None]:
#Ingest Data into PostgreSQL
nps_df.to_sql('nps_temp', postgres_engine, schema='pxl_temp', if_exists='replace', index=False)


In [None]:
#Move Processed Files
moveFile(file_url, f.name, sharepoint_path_processed)


# Purpose: 
- Moves the processed file to a designated folder (sharepoint_path_processed) after it has been successfully processed.

# Function Call: 
- moveFile(file_url, f.name, sharepoint_path_processed) uses the moveFile function you defined earlier to move the file.

In [None]:
#Handle Non-Matching Files
else:  # if the file does not match any of the name pattern
    moveFile(file_url, f.name, sharepoint_path_failed)

# Purpose: 
- Moves files that do not match the expected name pattern to a different folder (sharepoint_path_failed).
# Function Call: 
- moveFile(file_url, f.name, sharepoint_path_failed) moves the file to the failed folder.

In [None]:
#Clean Up
# delete df and local file
del nps_df
os.remove(download_file)

# Purpose: 
- Cleans up by deleting the DataFrame and the local file after processing.
# Delete DataFrame: 
- del nps_df removes the DataFrame from memory.
# Remove Local File: 
- os.remove(download_file) deletes the downloaded file from the local directory.

Full Code Example :

In [None]:
for eachCat in categorySeq:
    # grab ALL the files with the matching name
    matching_files = [
        f
        for f in files
        if eachCat[0] in f.properties["Name"]  # search for the files with the matching name
        and f.serverRelativeUrl == sharepoint_path + "/" + f.name
    ]

    # process each matching file
    for f in matching_files:
        # download to the local folder
        file_url = f.properties["ServerRelativeUrl"]
        download_file = os.path.join(local_path, os.path.basename(file_url))
        
        # get the year and quarter from the file name 
        file_name = f.properties["Name"]
        pattern = r'_(\d{4})(Q[1-4])'
        match = re.search(pattern, file_name)
        
        if match:
            year = match.group(1)
            quarter = match.group(2)
        
        print(f"Year: {year}, Quarter: {quarter}")
        print(f"Processing for {file_name}")
        
        # open file
        with open(download_file, "wb") as local_file:
            file = (
                ctx.web.get_file_by_server_relative_path(file_url)
                .download(local_file)
                .execute_query()
            )

        # read the file and create a dataframe
        nps_df = pd.read_excel(download_file)

        # ingest to db, temp table first
        nps_df.to_sql('nps_temp', postgres_engine, schema='pxl_temp', if_exists='replace', index=False)

        # move the processed file to another folder
        moveFile(file_url, f.name, sharepoint_path_processed)

        # delete df and local file
        del nps_df
        os.remove(download_file)

    else:  # if the file does not match any of the name pattern
        moveFile(file_url, f.name, sharepoint_path_failed)


Additional Considerations:
- Error Handling: Add try-except blocks to handle potential errors during file download, processing, or database insertion.

- Logging: Implement logging to keep track of the processing status and any errors that occur.

- Performance Optimization: If processing a large number of files, consider optimizing the code to handle files in batches or use asynchronous processing.