In [None]:
!python -m pip install requests

In [None]:
%pip install msal requests 

In [None]:
%pip install msal requests pandas openpyxl python-pptx 

In [None]:
pip install --upgrade pip

In [None]:
# Restart the Python kernel 
dbutils.library.restartPython() 

In [None]:
import msal
import urllib.parse
import requests

In [None]:
tenant_id = dbutils.secrets.get(scope="company", key="tenant_id")
client_id = dbutils.secrets.get(scope="company", key="client_id")
client_secret = dbutils.secrets.get(scope="company", key="client_secret_id")

Sharepoint Authentication

In [None]:

authority = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
scope = ["https://graph.microsoft.com/.default"]

# Set up MSAL authentication
app = msal.ConfidentialClientApplication(
    client_id=client_id,
    client_credential=client_secret,
    authority=f"https://login.microsoftonline.com/{tenant_id}",
)

# Acquire token or refresh if expired
def get_access_token():
    token_response = app.acquire_token_silent(scopes=scope, account=None)
    
    # If no token is available, acquire a new one
    if not token_response:
        token_response = app.acquire_token_for_client(scopes=scope)

    access_token = token_response.get("access_token")
    if access_token:
        print("Access token acquired successfully.")
        return access_token
    else:
        raise Exception("Failed to acquire access token.")

# Get the token
access_token = get_access_token()

# Set headers with the access token
headers = {
    "Authorization": f"Bearer {access_token}"
}


File CSV path

In [None]:
import urllib.parse

# Define the SharePoint site and file path and encode it
sharepoint_site = 'andrey.sharepoint.com'
site_name = 'File'  
file_path = '/Shared Documents//Value Upload.csv' 
#just change the file_path, change the URL, you have to put it as it is, with spaces and slashes and dots.

encoded_file_path = urllib.parse.quote(file_path)
print(f"Encoded file path: {encoded_file_path}")

In [None]:
# Step 1: Retrieve the Site ID
site_url = f"https://graph.microsoft.com/v1.0/sites/{sharepoint_site}:/sites/{site_name}"
site_response = requests.get(site_url, headers=headers)

if site_response.status_code == 200:
    site_data = site_response.json()
    site_id = site_data['id']
    print(f"Successfully accessed the SharePoint site with Site ID: {site_id}")
else:
    raise Exception(f"Failed to access SharePoint site: {site_response.status_code}, {site_response.text}")

In [None]:
# Step 2: Retrieve the Document Library (Drive ID)
drive_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
drive_response = requests.get(drive_url, headers=headers)

if drive_response.status_code == 200:
    drives_data = drive_response.json()
    drive_id = None
    for drive in drives_data['value']:
        if "Shared Documents" or "Documents" in drive['name']:
            drive_id = drive['id']
            break
    
    if drive_id is None:
        raise Exception("Drive ID for 'Shared Documents' not found.")
    else:
        print(f"Drive ID: {drive_id}")
else:
    raise Exception(f"Failed to retrieve drives: {drive_response.status_code}, {drive_response.text}")

# Step 3: Retrieve the File ID
search_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/search(q='{file_path.split('/')[-1]}')"
search_response = requests.get(search_url, headers=headers)

if search_response.status_code == 200:
    search_data = search_response.json()
    if len(search_data['value']) > 0:
        file_id = search_data['value'][0]['id']
        print(f"Successfully retrieved the File ID: {file_id}")
    else:
        raise Exception("File not found in search results.")
else:
    raise Exception(f"Failed to search for the file: {search_response.status_code}, {search_response.text}")


Retrieve sharepoint file to a Dataframe

In [None]:
from io import BytesIO
import pandas as pd

# Step 4: Get the File Content
file_content_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{file_id}/content"
file_response = requests.get(file_content_url, headers=headers)

if file_response.status_code == 200:
    print("File retrieved successfully from SharePoint.")
    file_stream = BytesIO(file_response.content)
    df = pd.read_csv(file_stream)  # Load the CSV content into a DataFrame
else:
    raise Exception(f"Failed to retrieve the file: {file_response.status_code}, {file_response.text}")

# Check the first few rows to ensure the content was loaded correctly
df.head()


### Import to the FP&A ETL

In [None]:
#The model numbere will vary depending the ETL. in the endpoint .....
import json

# URL for Vena API with the templateId
url = "https://api/public/v1/etl/templates/121212/startWithFile"

# Prepare metadata as per API requirements
metadata = {
    "input": {
        "partName": "file",
        "fileFormat": "CSV",
        "fileEncoding": "UTF-8",
        "fileName": "Value Upload.csv"
    }
}

# Prepare the file content for the request
files = {
    'file': ('Value Upload.csv', file_response.content, 'text/csv'),
    'metadata': (None, json.dumps(metadata), 'application/json')
}

# Headers for API
vena_headers = {
    "Authorization": "Basic MTIxMjEyMTIxMjExMjE6MTIxMjEyMTEx",  # Replace with your base64 encoded credentials
    "accept": "application/json"
}

# Send the POST request to Vena
response = requests.post(url, files=files, headers=vena_headers)

# Output the response
print(f"Status Code: {response.status_code}")
print(f"Response: {response.text}")
