# <span style="color: blue;">**Dataflow Gen 2 Migration Accelerator**</span>

## About the solution
This solution leverages the new Save As Fabric REST API for Dataflow Gen 2 along with a Fabric notebook to accelerate migration of Gen 1 Dataflows to Gen 2.

To use it:
- Download the ipynb file and import it into a new Fabric Workspace
- Run the notebook. This will:
	- create a Lakehouse to store migration files
	- deploy notebooks and a semantic model/report to the workspace
	- Note: this notebook will access this Fabric Toolbox folder to read in the json file with the item definitions. If you are not able to access the internet from your notebook, please download the json file and update the path accordingly.
- If desired, run the "Dataflow Inventory" notebook to get information on the Dataflows in your tenant
	- View the Inventory Report to decide which Dataflows to migrate
- Use the "Create DFG2s from DFG1s" notebook to automatically create Gen 2 Dataflows from your Gen 1 Dataflows
	- Use the appropriate cell based on your migration approach
		- Add any additional code to filter to a subset of dataflows if the default scope is too broad
- Note: The Save As REST API will maintain the connections and refresh schedule from your Gen 1 Dataflows but any incremental refresh settings will need to be recreated prior to initial refresh
- Check back for later versions of this tool, as additional notebooks are planned to help update downstream items with the GUIDs from the newly created Gen 2 Dataflows


In [None]:
import requests
import pandas as pd
import sempy.fabric as fabric
from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundException
import requests
import base64
import json
import time
try:
    import sempy_labs as labs
    print('labs already installed')
except:
    print('installing labs')
    %pip install semantic-link-labs
    import sempy_labs as labs

In [None]:
newids = {
    'Workspace_GUID': '',
    'Lakehouse_GUID': '',
    'Notebook_1 Dataflow Inventory_GUID': '',
    'Notebook_4 Create DFG2s from DFG1s': '', 
    'SemanticModel_2 Inventory Model_GUID': '',
    'Report_3 Inventory Report_GUID': '',
    'Notebook_5 Create or Update Downstream Items_GUID': '', 
    'Notebook_6 Create DFG2 Model from Import Model_GUID': '', 
    'Lakehouse_SQLEndpoint': '',
    'Lakehouse_DatabaseId': '',
    'OneLakeRegionPrefix': ''
}
thisworkspaceid = spark.conf.get("trident.workspace.id")
newids['Workspace_GUID'] = thisworkspaceid

newids

In [None]:
# Create DF Market Lakehouse
access_token = notebookutils.credentials.getToken("pbi")
headers = {"Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json"}
url = f"https://api.fabric.microsoft.com/v1/workspaces/{thisworkspaceid}/lakehouses"
body = {
  "displayName": "DFG2_Migration",
  "description": "Lakehouse for Dataflow Gen 2 Migration Data"
}
response = requests.post(url, headers=headers, json=body)
jsonresponse = response.json()
print(jsonresponse)
lakehouseid = jsonresponse['id']

# Add new LH id to newids
newids['Lakehouse_GUID'] = lakehouseid
newids

In [None]:
# Get Lakehouse SQL Endpoint
time.sleep(60) #gives time to create lakehouse and sql endpoint if "Run All" is used. Comment it out if you run each cell manually and repeat it until you see sqlendpoint and databaseid values in the output.
access_token = notebookutils.credentials.getToken("pbi")
headers = {"Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json"}
url = f"https://api.fabric.microsoft.com/v1/workspaces/{thisworkspaceid}/lakehouses/{lakehouseid}"

response = requests.get(url, headers=headers)
jsonresponse = response.json()
# print(jsonresponse)

# Add new LH info to newids
newids['Lakehouse_SQLEndpoint'] = jsonresponse['properties']['sqlEndpointProperties']['connectionString']
newids['Lakehouse_DatabaseId'] = jsonresponse['properties']['sqlEndpointProperties']['id']
newids

### *Note - Sometimes the SQL Endpoint generation takes longer than the time.sleep wait time. Make sure the sql endpoint and datamart id values are populated in the output of the cell above before continuing. Wait 10-20s and rerun the cell until you see values populated.*

In [None]:
# Get Lakehouse Region Prefix
onelakeblob = fabric.FabricRestClient().get(f"/v1/workspaces/{thisworkspaceid}").json()['oneLakeEndpoints']['blobEndpoint']
regionprefix = onelakeblob.split("//")[1].split("onelake")[0]
newids['OneLakeRegionPrefix'] = regionprefix
regionprefix

In [None]:
url = "https://raw.githubusercontent.com/microsoft/fabric-toolbox/refs/heads/main/accelerators/DFG2-migration-accelerator/DFG2_Migration_Items.json"
# url = "https://raw.githubusercontent.com/hoosierbi/fileshare/refs/heads/main/DFG2_Migration_Accelarator/DFG2_Migration_Items.json" # backup storage location
deployjson = requests.get(url).text
deploy_df = pd.read_json(deployjson)
deploy_df['ReplaceString'] = deploy_df['type'] + '_' + deploy_df['displayName'].replace(' ', '_') + '_GUID'
deploy_df


In [None]:
# Define Functions
def tobase64(textstring):
    textstring_bytes = textstring.encode("ascii")
    ascii_bytes = base64.b64encode(textstring_bytes)
    base64_output = ascii_bytes.decode("ascii")
    return base64_output

def convertpayloadstobase64(definitionjson):
    asjson = json.loads(definitionjson)
    for load in asjson['parts']:
        load['payload'] = tobase64(load['payload'])
    return asjson

def ReplaceGUIDs(defnstring):
    jsonstring = defnstring # json.dumps(defnstring)
    for guid1 in newids.keys():
        jsonstring = jsonstring.replace(guid1, newids[guid1])
    return jsonstring


# Create Item Function

def CreateItemFromDefinition(wsid, itemname, itemtype, itemdefinition):
    access_token = notebookutils.credentials.getToken("pbi")
    headers = {"Authorization": f"Bearer {access_token}",
                "Content-Type": "application/json"}
    workspaceId = wsid     
    url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspaceId}/items"
    body = {
        "displayName": itemname, 
        "type": itemtype, 
        "definition": itemdefinition
     }  
    response = requests.post(url, headers=headers, json = body)
    # return response.json()
    return response

In [None]:
deploylist =  [
    'Notebook_1 Dataflow Inventory_GUID'
    ,'Notebook_4 Create DFG2s from DFG1s_GUID'
    ,'SemanticModel_2 Inventory Model_GUID'
    ,'Report_3 Inventory Report_GUID'
]



access_token = notebookutils.credentials.getToken("pbi")
headers = {"Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json"}

for replacestring in deploylist:
    itemrecord = deploy_df[deploy_df['ReplaceString'] == replacestring]
    definitionstring = itemrecord.iloc[0]['Definition']
    convertedstring = convertpayloadstobase64(ReplaceGUIDs(definitionstring))
    createitem = CreateItemFromDefinition(thisworkspaceid, itemrecord.iloc[0]['displayName'], itemrecord.iloc[0]['type'], convertedstring)
    # createitem = CreateItemFromDefinition(thisworkspaceid, 'SMtest', itemrecord.iloc[0]['type'], convertedstring) # for troubleshooting

    print(createitem.status_code)

    if createitem.status_code in { 200, 201 }:
        newitemid = createitem.json()['id']
        newids[replacestring] = newitemid
        print(replacestring + " - " + newitemid)

    elif createitem.status_code==202:
        while True:
            url = createitem.headers["Location"]
            retry_after = createitem.headers.get("Retry-After",0)
            time.sleep(int(retry_after))

            headers = {"Authorization": f"Bearer {access_token}" }
            createitem = requests.get(url, headers=headers)
            createitem.raise_for_status()

            body = createitem.json()
            status = body["status"]
            if status == "Succeeded":
                url = createitem.headers["Location"]
                createitem = requests.get(url,headers=headers)
                newitemid = createitem.json()['id']
                newids[replacestring] = newitemid
                print(replacestring + " - " + newitemid)
                break