In [1]:
import os
import msal
from msal.application import ConfidentialClientApplication
from dotenv import load_dotenv
import jwt
import json
import requests
from datetime import datetime, timedelta
from typing import Optional, Dict, List

# Define the target directory
target_directory = r'C:\Users\pablosal\Desktop\sharepoint-indexing-azure-cognitive-search'

# Load .env file
load_dotenv()

# Check if the directory exists
if os.path.exists(target_directory):
    # Change the current working directory
    os.chdir(target_directory)
    print(f"Directory changed to {os.getcwd()}")
else:
    print(f"Directory {target_directory} does not exist.")

#load logging 
from utils.ml_logging import get_logger
logger = get_logger()

Directory changed to C:\Users\pablosal\Desktop\sharepoint-indexing-azure-cognitive-search


In [None]:
# There needs to be a config.json file that looks like this:
    
# {
#     "authority": "https://login.microsoftonline.com/XXX",
#     "client_id": "XXX",
#     "scope": [ "https://graph.microsoft.com/.default" ],
#     "secret": "XXX",
#     "site_domain": "XXX.sharepoint.com",
#     "site_name": "Mark8ProjectTeam",
#     "include_auth_info": true,
#     "search_service_name": "XXX",
#     "search_index_name": "sharepoint-index-1",
#     "search_admin_api_key": "XXX"
# }

In [2]:
accessToken = None 
requestHeaders = None 
tokenExpiry = None 
queryResults = None 

In [2]:
# Get the variables
tenantID = os.getenv('TENANT_ID')
clientID = os.getenv('CLIENT_ID')
clientSecret = os.getenv('CLIENT_SECRET')
graphURI = 'https://graph.microsoft.com'
authority = 'https://login.microsoftonline.com/' + tenantID
scope = ['https://graph.microsoft.com/.default']

In [3]:
def msgraph_auth(client_id: str, client_secret: str, authority: str, scope: list) -> ConfidentialClientApplication:
    """
    Authenticate with Microsoft Graph using MSAL for Python.

    This function authenticates with Microsoft Graph using the MSAL library. It attempts to acquire a token silently,
    and if that fails, it tries to acquire a new token. It decodes and prints the acquired access token and its expiry time.

    :param client_id: The application (client) ID of your Azure AD app registration.
    :param client_secret: The client secret for your Azure AD app registration.
    :param authority: The authority URL for your Azure AD tenant.
    :param scope: Scopes required for the token.
    :return: A dictionary with the access token and its expiry time if successful, None otherwise.
    :raises Exception: If there are issues in acquiring or decoding the token.
    """
    app = msal.ConfidentialClientApplication(client_id, authority=authority, client_credential=client_secret)
    
    try:
        access_token = app.acquire_token_silent(scope, account=None)
        if not access_token:
            access_token = app.acquire_token_for_client(scopes=scope)
            if 'access_token' in access_token:
                logger.info('New access token retrieved....')
            else:
                logger.error('Error acquiring authorization token. Check your tenantID, clientID, and clientSecret.')
                return None
        else:
            logger.info('Token retrieved from MSAL Cache....')

        algorithms = ["RS256"]
        decoded_access_token = jwt.decode(access_token['access_token'], algorithms=algorithms, options={"verify_signature": False})
        access_token_formatted = json.dumps(decoded_access_token, indent=2)
        logger.info('Decoded Access Token:\n%s', access_token_formatted)

        # Token Expiry
        token_expiry = datetime.fromtimestamp(int(decoded_access_token['exp']))
        logger.info('Token Expires at: %s', str(token_expiry))
        return access_token
    except Exception as err:
        logger.error("Error in msgraph_auth: %s", str(err))
        raise


In [4]:
client_auth = msgraph_auth(clientID,clientSecret,authority,scope)

2023-12-06 01:01:39,084 - micro - MainProcess - INFO     New access token retrieved.... (1309348350.py:msgraph_auth:22)
2023-12-06 01:01:39,084 - micro - MainProcess - INFO     Decoded Access Token:
{
  "aud": "https://graph.microsoft.com",
  "iss": "https://sts.windows.net/9495d8c9-4ebb-4107-b905-c7b45d1b7b7a/",
  "iat": 1701845798,
  "nbf": 1701845798,
  "exp": 1701849698,
  "aio": "E2VgYBBROMCs8qMiS07SOv7trJOiAA==",
  "app_displayname": "dev-graph",
  "appid": "118583ee-94ed-45dd-870b-73784045eb37",
  "appidacr": "1",
  "idp": "https://sts.windows.net/9495d8c9-4ebb-4107-b905-c7b45d1b7b7a/",
  "idtyp": "app",
  "oid": "4f614374-65fa-45fc-8369-cb616a6fe08f",
  "rh": "0.Ab0AydiVlLtOB0G5Bce0XRt7egMAAAAAAAAAwAAAAAAAAADLAAA.",
  "roles": [
    "TeamsActivity.Read.All",
    "SharePointTenantSettings.Read.All",
    "People.Read.All",
    "Sites.Read.All",
    "Directory.Read.All",
    "OnlineMeetingTranscript.Read.All",
    "BrowserSiteLists.ReadWrite.All",
    "Files.Read.All",
    "Mail.R

In [5]:
def make_ms_graph_request(access_token: str, url: str) -> Dict:
    """
    Make a request to the Microsoft Graph API.

    :param access_token: The access token for Microsoft Graph API authentication.
    :param url: The URL for the Microsoft Graph API endpoint.
    :return: The JSON response from the Microsoft Graph API.
    :raises Exception: If there's an HTTP error or other issues in making the request.
    """
    headers = {"Authorization": f"Bearer {access_token}"}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as err:
        logger.error(f"HTTP Error: {err}")
        raise
    except Exception as err:
        logger.error(f"Error in make_ms_graph_request: {err}")
        raise

def get_drive_id(access_token: str, site_id: str) -> str:
    """
    Get the drive ID from a Microsoft Graph site.

    :param access_token: The access token for Microsoft Graph API authentication.
    :param site_id: The site ID in Microsoft Graph.
    :return: The drive ID.
    :raises Exception: If there's an error in fetching the drive ID.
    """
    url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive"
    try:
        json_response = make_ms_graph_request(access_token, url)
        return json_response["id"]
    except Exception as err:
        logger.error(f"Error in get_drive_id: {err}")
        raise

def get_files_in_site(access_token: str, site_id: str, drive_id: str, minutes_ago: int = 0, file_formats: Optional[List[str]] = None) -> List[Dict]:
    """
    Get a list of files in a site's drive, optionally filtered by creation or last modification time and file formats.

    :param access_token: The access token for Microsoft Graph API authentication.
    :param site_id: The site ID in Microsoft Graph.
    :param drive_id: The drive ID in Microsoft Graph.
    :param minutes_ago: Integer to filter files created or updated within the specified number of minutes from now.
    :param file_formats: List of desired file formats.
    :return: A list of file details.
    :raises Exception: If there's an error in fetching file details.
    """
    url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children"
    try:
        json_response = make_ms_graph_request(access_token, url)
        files = json_response["value"]
        
        if minutes_ago > 0:
            time_limit = datetime.now() - timedelta(minutes=minutes_ago)
            files = [file for file in files if datetime.fromisoformat(file['createdDateTime'][:-1]) > time_limit or 
                     datetime.fromisoformat(file['lastModifiedDateTime'][:-1]) > time_limit]

        if file_formats:
            files = [file for file in files if any(file['name'].endswith(f'.{fmt}') for fmt in file_formats)]

        return files
    except Exception as err:
        logger.error(f"Error in get_files_in_site: {err}")
        raise
 

def get_file_permissions(access_token: str, site_id: str, item_id: str) -> List[Dict]:
    """
    Get the permissions of a file in a site.

    :param access_token: The access token for Microsoft Graph API authentication.
    :param site_id: The site ID in Microsoft Graph.
    :param item_id: The item ID of the file in Microsoft Graph.
    :return: A list of permission details.
    :raises Exception: If there's an error in fetching permission details.
    """
    url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/items/{item_id}/permissions"
    try:
        json_response = make_ms_graph_request(access_token, url)
        return json_response["value"]
    except Exception as err:
        logger.error(f"Error in get_file_permissions: {err}")
        raise

In [6]:
clien_auth = msgraph_auth(clientID,clientSecret,authority,scope)

2023-12-06 01:01:48,694 - micro - MainProcess - INFO     New access token retrieved.... (1309348350.py:msgraph_auth:22)
2023-12-06 01:01:48,694 - micro - MainProcess - INFO     Decoded Access Token:
{
  "aud": "https://graph.microsoft.com",
  "iss": "https://sts.windows.net/9495d8c9-4ebb-4107-b905-c7b45d1b7b7a/",
  "iat": 1701845807,
  "nbf": 1701845807,
  "exp": 1701849707,
  "aio": "E2VgYCg0Mim8XH/kdYHhNd6ppm56AA==",
  "app_displayname": "dev-graph",
  "appid": "118583ee-94ed-45dd-870b-73784045eb37",
  "appidacr": "1",
  "idp": "https://sts.windows.net/9495d8c9-4ebb-4107-b905-c7b45d1b7b7a/",
  "idtyp": "app",
  "oid": "4f614374-65fa-45fc-8369-cb616a6fe08f",
  "rh": "0.Ab0AydiVlLtOB0G5Bce0XRt7egMAAAAAAAAAwAAAAAAAAADLAAA.",
  "roles": [
    "TeamsActivity.Read.All",
    "SharePointTenantSettings.Read.All",
    "People.Read.All",
    "Sites.Read.All",
    "Directory.Read.All",
    "OnlineMeetingTranscript.Read.All",
    "BrowserSiteLists.ReadWrite.All",
    "Files.Read.All",
    "Mail.R

In [7]:
site_domain = "mngenvmcap747548.sharepoint.com"
site_name= "Contoso"

In [8]:
print ('Getting the Site ID...')
endpoint = 'https://graph.microsoft.com/v1.0/sites/' + f"{site_domain}" + ':/sites/' +  f"{site_name}" + ':/'
result = make_ms_graph_request(clien_auth['access_token'], endpoint)


Getting the Site ID...


In [9]:
print ('Getting the Site ID...')
endpoint = 'https://graph.microsoft.com/v1.0/sites/' + f"{site_domain}" + ':/sites/' +  f"{site_name}" + ':/'
result = make_ms_graph_request(clien_auth['access_token'], endpoint)
# site_id = result.json()["id"].split(',')[1]
site_id = result["id"]
print ('Site ID:', site_id)

Getting the Site ID...
Site ID: mngenvmcap747548.sharepoint.com,877fe60f-a62d-4ed1-8eda-af543c437d2c,ac47d8a7-cd54-4344-bd9d-26ada5a075c0
