#### libraries

In [0]:
# sharepoint modules
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.files.file import File

# local files handling when reading input
import os
# in-memory handling for writing files for export
# from io import BytesIO

# to read/write CSV in sharepoint
import pandas
'''
If using spark.read.csv() -> AnalysisException: Incompatible format detected.
Delta format not working either.
Eg:
  dfr = spark.read \
      .format("csv") \
      .option("header", "true") \
      .load(download_path)
'''

#### `general settings` to work with sharepoint

In [0]:
# sharepoint working site
site_url = "https://YOUR-COMPANY.sharepoint.com/sites/SASIPAutomationProject/"
# init client context with credentials
client = ClientContext(site_url).with_credentials(UserCredential("powerbiadmin@YOUR-COMPANY.com", dbutils.secrets.get('keylatpricemonito', 'sql-YOUR-COMPANY')))

#### create individual `dataframes from CSV files` (comma separated)

In [0]:
input_files = {
  "exclusion__df" : "https://YOUR-COMPANY.sharepoint.com/:f:/s/SASIPAutomationProject/ErUH64YzYENHtWrA02EEDV0BH9SUUQ1p5i1qlkUPnPkWLg?e=20GjiH",
  "role_map__df" : "https://YOUR-COMPANY.sharepoint.com/:f:/s/SASIPAutomationProject/EknYQFKVeTdDnEEvysgkUKAB2QnZdAM8kpHsDxJxYbpUVw?e=lY8zSW"
}

In [0]:
for key, value in input_files.items():
  print(key)
  print(value)

In [0]:
for key, value in input_files.items():
  
  print(f'creating dataframe: {key}')
  fold_link = value

  fold_obj = client.web.get_folder_by_guest_url_extended(fold_link).execute_query()
  sharepoint_files = fold_obj.files   
  # client to work with sharepoint_files
  client.load(sharepoint_files).execute_query()

  working_file = sharepoint_files[0]

  download_path = os.path.join(os.getcwd(), str(working_file))
  # apply sharepoint library method
  file_response = File.read(working_file)
  # Save file locally for pandas to read next
  with open(download_path, 'wb') as output_file:
    output_file.write(file_response)  
  
  # read file with pandas and convert to spark dataframe
  df = pandas.read_csv(download_path)
  globals()[key] = spark.createDataFrame(df)

  # delete unnecesary elements
  os.remove(download_path)
  del df


In [0]:
exclusion__df.show()

In [0]:
role_map__df.show()

#### `Move` files btw Sharepoint Folders

##### keep searching -> move_to or moveto don't work in this downloaded module

#### `Write` in sharepoint

In [0]:
# Folder link and object
output_folder = "https://YOUR-COMPANY.sharepoint.com/:f:/s/SASIPAutomationProject/Ehr1xSw61FRCnUk8aCvZ4NQB_qOJe7eqGjYdd0jXq9zrsg?e=MsHZxK"
output_folder_obj = client.web.get_folder_by_guest_url_extended(output_folder).execute_query()

#### Write in sharepoint - `A: Pandas`
##### simplier but less scalable if big data related

In [0]:
# Convert Spark DataFrame to Pandas DataFrame
role_map__pdf = role_map__df.toPandas()

# Convert the Pandas DataFrame to a CSV string
csv_data = role_map__pdf.to_csv(index=False)

# Upload CSV to SharePoint directly from the CSV string
output_file_name = "role_map.csv"
output_folder_obj.upload_file(output_file_name, csv_data.encode('utf-8')).execute_query()

#### Write in sharepoint - B: `just PySpark`
##### use A.. 

In [0]:
from io import StringIO

# Collect the DataFrame to a list of Row objects
rows = role_map__df.collect()

# Get the schema (column names)
columns = role_map__df.columns

# Create a CSV string from the collected data
csv_output = StringIO()
csv_output.write(','.join(columns) + '\n')  # Write header
for row in rows:
    csv_output.write(','.join(str(value) for value in row) + '\n')  # Write each row

# Get the CSV string from the StringIO object
csv_data = csv_output.getvalue()

# Upload CSV to SharePoint directly from the CSV string
output_file_name = "role_map.csv"
output_folder_obj.upload_file(output_file_name, csv_data.encode('utf-8')).execute_query()


#### `Delete` files in a Sharepoint Folder

In [0]:
# Folder link
deleteFolder_link = "https://YOUR-COMPANY.sharepoint.com/:f:/s/SASIPAutomationProject/Ehr1xSw61FRCnUk8aCvZ4NQB_qOJe7eqGjYdd0jXq9zrsg?e=MsHZxK"

# Retrieve the folder object
deleteFolder_obj = client.web.get_folder_by_guest_url_extended(deleteFolder_link).execute_query()

# Load the files in the folder
sharepoint_files_toDelete = deleteFolder_obj.files
client.load(sharepoint_files_toDelete).execute_query()

# Delete each file in the folder
for file in sharepoint_files_toDelete:
    print(f"Deleting file: {file.properties['Name']}")
    file.delete_object()
    client.execute_query()

print("All files deleted.")