In [0]:
spark.conf.set("spark.sql.shuffle.partitions", sc.defaultParallelism*2)
dbutils.library.installPyPI('azure-storage-file-datalake' ,version="12.2.0" )
spark.conf.set('spark.sql.caseSensitive', False)

In [0]:
import datetime 
from azure.storage.filedatalake import DataLakeServiceClient

account_key = dbutils.secrets.get(scope ="am-da-kv-general" ,key="General-Datalake-Account-Key")
storage_account_name = dbutils.secrets.get(scope ="am-da-kv-general" ,key="am-da-s-general-adl-name")

try:  
    global service_client
        
    service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format(
        "https", storage_account_name), credential=account_key)
    
except Exception as e:
    print(e)

file_system_client = service_client.get_file_system_client(file_system="raw")
#paths = file_system_client.get_paths(path="batch/sales/bigmachine/commerce_v1/quote_header")

# manual date entry 
# from_date = datetime.datetime.strptime('2021-08-01', "%Y-%m-%d")

def getLatestFiles(start_from , directory_path):
  paths = file_system_client.get_paths(path=directory_path)
  output = []
  for path in paths:
    fileLastModifiedDatetime = datetime.datetime.strptime(path.last_modified, "%a, %d %b %Y %H:%M:%S %Z")
    if fileLastModifiedDatetime >= start_from and (path.name.endswith(".csv") or path.name.endswith(".json")) :
        output.append("/mnt/datalake_raw/"+path.name)
  return output 


#start_date= datetime.datetime.combine(datetime.datetime.today(), datetime.time(0, 0, 0, 0))
start_date = datetime.datetime.today() -  datetime.timedelta(hours = 48)
account_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/account/delta")
contact_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/contact/delta")
service_contract_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/service_contract/delta")
user_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/user/delta")
opportunity_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/opportunity/delta")
opportunity_product_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/opportunity_product/delta")
existing_quote_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/existing_quote/delta")
deleted_items_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/deleted_items/delta")
opportunity_contact_role_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/opportunity_contact_role/delta")
opportunity_partner_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/opportunity_partner/delta")
opportunity_ib_unit_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/opportunity_ib_unit/delta")
activity_list  = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/activity/delta")
ib_unit_list  = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/oraclecrm/ib_unit/delta")

quote_header_v2_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/bigmachine/commerce_v2/quote_header")
quote_line_v2_list = getLatestFiles(start_from = start_date  , directory_path = "batch/sales/bigmachine/commerce_v2/quote_line")

# if len(account_list)+ len(user_list)  + len(opportunity_list) + len(opportunity_product_list) + len(deleted_items_list) == 0:
#   dbutils.notebook.exit("No new deltas have been captured")

In [0]:
""" Deleted Items """
#https://adb-4156357882302374.14.azuredatabricks.net/?o=4156357882302374#

from pyspark.sql.functions import col , row_number , year , to_timestamp , to_date
from pyspark.sql import Window
from delta.tables import *

if deleted_items_list == None :
  
  DeletedItemDF = (spark.read
               .option("header","true")
               #.csv("/mnt/datalake_raw/batch/sales/oraclecrm/deleted_items/delta/*.csv")
               .csv(deleted_items_list)
               .select(col('DeletedBy')
                       ,col('DeletedById').alias("DeletedByKey")
                       ,to_date(col("DeletedDate"),"yyyy-MM-dd").alias("DeletedDate")
                       ,col("DeletedItemId").alias("DeletedItemKey")
                       ,col("ExternalSystemId").alias("ExternalSystemKey")
                       ,col("ObjectId").alias("ObjectKey")
                       ,col("Type")

               )
               )

  window = Window.partitionBy("DeletedItemKey").orderBy(DeletedItemDF["DeletedDate"].desc())
  
  deleted_items_update = (DeletedItemDF.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
  .filter("RowNumber == 1")
  .drop("RowNumber")
  .withColumn("DeletedYear",year(col("DeletedDate").cast("date"))))
  
  deleted_items_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/deleted_items_g") 

  deleted_items_g.alias("t").merge(deleted_items_update.alias("s"),"t.DeletedItemKey = s.DeletedItemKey") \
  .whenNotMatchedInsertAll().execute()
  

In [0]:
"""
Account Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *

if account_list:
  
  Account = (spark.read.format('csv')
#               .option("quote",'"')
#               .option("escape",'"')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(account_list)
#               .csv("/mnt/datalake_raw/batch/sales/oraclecrm/account/delta/*.csv")
             .select( 
                    col("Id").alias("AccountKey"),
                    col("PrimaryBillToCity").alias("AddressCity"),
                    col("PrimaryBillToCountry").alias("AddressCountry"),
                    col("PrimaryBillToStreetAddress").alias("AddressAddress1"),
                    col("PrimaryBillToStreetAddress2").alias("AddressAddress2"),
                    col("PrimaryBillToStreetAddress3").alias("AddressAddress3"),
                    col("PrimaryBillToCounty").alias("AddressCounty"),
                    col("WebSite").alias("WebSite"),
                    col("Location").alias("Location"),
                    col("MainPhone").alias("MainPhoneNumber"),
                    col("AccountName").alias("AccountName"),
                    col("ParentAccount").alias("ParentAccount"),
                    col("PartnerFlag").alias("AccountPartner"),
                    col("Priority").alias("Tier"),
                    col("PublicCompany").alias("PublicCompany"),
                    col("AccountType").alias("AccountType"),
                    col("PrimaryBillToPostalCode").alias("AddressZippostCode"),
                    col("PrimaryBillToState").alias("AddressUsState"),
                    col("ParentAccountId").alias("ParentAccountId"),
                    col("ParentAccountLocation").alias("ParentAccountLocation"),
                    col("PrimaryContactFullName").alias("PrimaryContact"),
                    col("PrimaryContactId").alias("PrimaryContactId"),
                    col("IndexedPick0").alias("Branch"),
                    col("ParentAccountExternalSystemId").alias("ParentAccountExternalUniqueId"),
                    col("ExternalSystemId").alias("ExternalUniqueId"),
                    col("OwnerFullName").alias("OwnerFullName"),
                    col("Description").alias("Description"),
                    col("CustomBoolean3").alias("BillingLocation"), 
                    col("CustomBoolean11").alias("DepositEligible"),
                    col("CustomBoolean10").alias("ProposalBreakDown"),
                    col("CustomBoolean12").alias("RepairProposalFromNationalAccountsOnly"),
                    col("CustomBoolean5").alias("ShippingLocation"),
                    col("CustomBoolean1").alias("TkeNationalAccount"),
                    col("CustomText2").alias("EbsAccountName"),
                    col("CustomText4").alias("EbsSiteLocation"),
                    col("CustomText6").alias("PartyCustomerNumbers"),
                    col("CustomMultiSelectPickList0").alias("GpoRebatePaidOn"),
                    col("CustomNumber20").alias("GpoDiscountPercentage"),
                    col("CustomNumber21").alias("GpoRebatePercentage"),
                    col("CustomText48").alias("EbsPartyId"),
                    col("CustomText51").alias("EbsSiteId"),
                    col("CustomText49").alias("EbsStatus"),
                    col("CustomText39").alias("OracleBranchNumber"),
                    col("CustomPickList0").alias("PaymentTerms"),
                    col("CustomText40").alias("UnionLocal"),
                    col("CustomText33").alias("TaxRegistrationNumber"),
                    col("OwnerId").alias("OwnerId"),
#                     col("OwnerId").alias("OwnerSignInId"),
#                     col("OwnerId").alias("OwnerExternalId"),               
                    col("CustomObject3Name").alias("ServiceContract"),
                    col("CustomObject3ExternalSystemId").alias("ServiceContractExternalUniqueID"),
                    col("CustomObject3IntegrationId").alias("ServiceContractIntegrationID"),
                    col("CustomObject1ExternalSystemId").alias("IBUnitExternalUniqueID"),
                    col("CustomObject1Name").alias("IBUnit"),
                    col("CustomObject1IntegrationId").alias("IBUnitIntegrationID"),
                    col("MarketSegment").alias("MarketSegment"),
                    col("CustomPickList4").alias("BuildingType"),
                     col("Status").alias("AccountStatus"),               
                     col("IndexedLongText0").alias("SiteLocation"),
                     col('MainFax').alias("MainFaxNumber"),
       col("CustomDate28").alias("CapitalPlanProposedDate"),
       col("CustomDate25").alias("FiscalYearStart"),
                    to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
                    to_timestamp(col("CreatedDate")).alias("CreatedDate") 

              )
        )

  window = Window.partitionBy("AccountKey").orderBy(Account["ModifiedDate"].desc())
  account_update = (Account.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge account: Update changed accounts and insert newly created accounts 


  account_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/account_g") 

  account_g.alias("t").merge(
      account_update.alias("s"),
      "t.AccountKey = s.AccountKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# Remove Deleted Accounts 

  deleted_items_update = spark.read.format('delta').load("/mnt/datalake_curated/view_migration/deleted_items_g")
  
  account_g.alias("t") \
    .merge( \
      deleted_items_update.alias("s"), \
      "s.Objectkey = t.AccountKey and type ='Account'" ) \
    .whenMatchedDelete() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/account_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/account")

# more on merge statements :
# https://docs.delta.io/latest/delta-update.html#language-python

In [0]:
"""
User Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *

if user_list:
  
  User = (spark.read.format('csv')
#               .option("quote",'"')
#               .option("escape",'"')
              .option("delimiter","|")
              .option("header","true")
              .option("multiLine","true")
#               .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(user_list)
              #.csv("/mnt/datalake_raw/batch/sales/oraclecrm/account/*.csv")
             .select( 
                    col("Id").alias("UserKey"),
                    col("CustomPickList0").alias("Branch"),
                    col("CustomPickList1").alias("FinancialReportingArea"),
                    col("CustomPickList2").alias("Region"),
                    col("CustomPickList3").alias("PrimaryLOB"),
                    col("CustomText33").alias("Global_Employee_Number_8_Digit_ID"),
                    col("DefaultAnalyticsBookId").alias("DefaultBookIdforAnalytics"),
                    col("EMailAddr").alias("Email"),
                    col("EmployeeNumber").alias("EmployeeNumber"),
                    col("ExternalSystemId").alias("ExternalUniqueID"),
                    col("FirstName").alias("FirstName"),
                    col("FullName").alias("Name"),
                    col("IntegrationId").alias("IntegrationID"),
                    col("LastName").alias("LastName"),
#                     col("ManagerAlias").alias("ReportsTo"),
                    col("ManagerFullName").alias("ReportsTo"),
                    col("ManagerId").alias("ManagerId"),
                    col("Status").alias("Status"),
                     col("Alias").alias("Alias"),
                    col("Supervisor").alias("Supervisor"),
                    col("UserSignInId").alias("UserSignInID"),
                    to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
                     to_timestamp(col("CreatedDate")).alias("CreatedDate")
              )
        )

  window = Window.partitionBy("UserKey").orderBy(User["ModifiedDate"].desc())
  user_update = (User.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

  # Merge account: Update changed accounts and insert newly created accounts 

  from delta.tables import *
  user_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/user_g") 

  user_g.alias("t").merge(
      user_update.alias("s"),
      "t.UserKey = s.UserKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

  # Remove Deleted Users! DOES NOT APPLY TO USER. No User Deletes are captured in deleted_items 
  # if deleted_items_update:
  #   user_g.alias("t") \
  #     .merge( \
  #       deleted_items_update.alias("s"), \
  #       "s.Objectkey = t.AccountKey and type ='User'" ) \
  #     .whenMatchedDelete() \
  #     .execute()

  # process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/user_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/user")



In [0]:
"""
Service Contract Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *


if service_contract_list:
  
  ServiceContract = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(service_contract_list)
             .select( 

                  col("Id").alias("ServiceContractKey"),
                  col("AccountExternalSystemId").alias("AccountExternalUniqueID"),
                  col("AccountId").alias("AccountId"),
                  col("AccountIntegrationId").alias("AccountIntegrationID"),
                  col("AccountLocation").alias("AccountLocation"),
                  col("AccountName").alias("BillingAccount"),
                  col("ActivityIntegrationId").alias("ActivityIntegrationID"),
#                   col("BookId").alias("BookId"),
#                   col("BookName").alias("Book"),
                  col("ContactExternalSystemId").alias("ContactExternalUniqueID"),
                  col("ContactFirstName").alias("ContactFirstName"),
                  col("ContactFullName").alias("Contact"),
                  col("ContactId").alias("ContactId"),
                  col("ContactIntegrationId").alias("ContactIntegrationID"),
                  col("ContactLastName").alias("ContactLastName"),
#                   col("CreatedBy").alias("Created"),
                  col("CreatedByAlias").alias("CreatedBy"),
#                   col("CreatedByEMailAddr").alias("CreatedByEmail"),
#                   col("CreatedByExternalSystemId").alias("CreatedByExternalUniqueID"),
#                   col("CreatedByFirstName").alias("CreatedByFirstName"),
#                   col("CreatedByFullName").alias("CreatedByName"),
#                   col("CreatedByIntegrationId").alias("CreatedByIntegrationID"),
#                   col("CreatedByLastName").alias("CreatedByLastName"),
#                   col("CreatedByUserSignInId").alias("CreatedByUserSignInID"),
                  col("Currency").alias("Currency"),
                  col("CustomBoolean0").alias("CustomerPONumRequired?"),
                  col("CustomBoolean1").alias("ContractAttached"),
#                   col("CustomBoolean10").alias("InactiveRepNotification"),
                  col("CustomBoolean11").alias("TKExtend"),
#                   col("CustomBoolean12").alias("Cancellation_Rep_Filter"),
#                   col("CustomBoolean13").alias("UpdateBooks"),
#                   col("CustomBoolean14").alias("DuplicateCheckFlag"),
                  col("CustomBoolean2").alias("Validated"),
                  col("CustomBoolean3").alias("PublicBid"),
                  col("CustomBoolean4").alias("CRPAccount"),
                  col("CustomBoolean5").alias("1stLevelApproval"),
                  col("CustomBoolean6").alias("RiskofLoss"),
#                   col("CustomBoolean7").alias("Create_Opportunity"),
                  col("CustomBoolean8").alias("ProjectCancellationDateRequired"),
                  col("CustomBoolean9").alias("Tier1Notification"),
                  col("CustomCurrency1").alias("CurrentYearlyContractPrice"),
                  col("CustomDate25").alias("ContractStartDate"),
                  col("CustomDate26").alias("AnniversaryDateofContract"),
                  col("CustomDate27").alias("OriginalContractDate"),
                  col("CustomDate28").alias("TKExtendStartDate"),
                  col("CustomDate29").alias("TKExtendEndDate"),
                  col("CustomDate30").alias("ApprovalDate"),
                  col("CustomDate31").alias("TKExtendModifiedDate"),
                  col("CustomDate32").alias("ProjectedCancellationDate"),
                  col("CustomDate33").alias("CurrentTermStartDate"),
                  col("CustomDate35").alias("StatusChangeDateStamp"),
                  col("CustomDate36").alias("CDMContractStartDate"),
                  col("CustomDate37").alias("TKExtendStatusChangeDate"),
                  col("CustomInteger0").alias("OriginalContractNote"),
                  col("CustomInteger1").alias("ContractTermYears"),
                  col("CustomInteger2").alias("NumofUnits"),
                  col("CustomInteger3").alias("RenewalTermMonths"),
                  col("CustomInteger4").alias("NumberofRenewals"),
                  col("CustomNote0").alias("ActiveUnitSerialNo"),
                  col("CustomNumber20").alias("EscalationCapPct"),
                  col("CustomNumber21").alias("DiscountPercentage"),
                  col("CustomObject1ExternalSystemId").alias("IBUnitExternalUniqueID"),
                  col("CustomObject1IntegrationId").alias("IBUnitIntegrationID"),
                  col("CustomObject1Name").alias("IBUnit"),
                  col("CustomObject1Type").alias("IBUnitType"),
                  col("CustomObject2Name").alias("ExistingQuote"),
                  col("CustomObject3ExternalSystemId").alias("ServiceContractExternalUniqueID"),
                  col("CustomObject3IntegrationId").alias("ServiceContractIntegrationID"),
                  col("CustomObject3Name").alias("ServiceContract"),
                  col("CustomObject3Type").alias("ServiceContractType"),
#                   col("CustomPickList0").alias("ValidationStatus–Cleansing"),
                  col("CustomPickList1").alias("CancellationTerms"),
#                   col("CustomPickList10").alias("DMAudit"),
                  col("CustomPickList11").alias("RiskCategory"),
                  col("CustomPickList12").alias("ReasonatRisk"),
                  col("CustomPickList13").alias("ContractStatusValue"),
                  col("CustomPickList2").alias("CancellationNotification"),
                  col("CustomPickList3").alias("ContractPaper"),
                  col("CustomPickList4").alias("RenewalNotification"),
                  col("CustomPickList5").alias("Tier"),
                  col("CustomPickList6").alias("ValidationStatus–TKExtend"),
                  col("CustomPickList7").alias("QualityAudit"),
                  col("CustomPickList8").alias("TKExtendStatus"),
#                   col("CustomPickList9").alias("Z_Unused_Pick0"),
                  col("CustomText0").alias("ContractNotes"),
                  col("CustomText1").alias("MessageNote"),
#                   col("CustomText10").alias("FormulaName"),
                  col("CustomText11").alias("TKExtendData"),
                  col("CustomText12").alias("TKExtendNotes"),
                  col("CustomText13").alias("1stLevelApprover"),
                  col("CustomText14").alias("ResolutionNotes"),
                  col("CustomText15").alias("MasterContractLineNum"),
#                   col("CustomText16").alias("FirstAssociatedBuilding"),
                  col("CustomText2").alias("BilltoName"),
                  col("CustomText3").alias("BilltoAddress"),
#                   col("CustomText30").alias("z_OldRowId"),
#                   col("CustomText31").alias("Z_BillingContactLastName"),
                  col("CustomText32").alias("PriceEscalationPeriod"),
                  col("CustomText33").alias("PriceEscalationType"),
#                   col("CustomText34").alias("CONTRACT_EXTENSION"),
                  col("CustomText35").alias("MonthofEscalation"),
                  col("CustomText36").alias("DateofRenewalNotifaction"),
                  col("CustomText37").alias("BillingContactFirstName"),
                  col("CustomText38").alias("BranchNumber"),
                  col("CustomText39").alias("CustomerSpecificPricing"),
                  col("CustomText4").alias("ShiptoName"),
#                   col("CustomText40").alias("LegacySystem"),
                  col("CustomText41").alias("TypeofBill"),
                  col("CustomText42").alias("OriginalContractDateText"),
                  col("CustomText43").alias("SpecialBilling?"),
                  col("CustomText44").alias("NationalAccount"),
                  col("CustomText45").alias("DiscountType"),
                  col("CustomText46").alias("AlphaCode"),
                  col("CustomText47").alias("CustomerNumber"),
                  col("CustomText48").alias("SalesPersonOriginal"),
                  col("CustomText49").alias("MonitoringBillRef"),
                  col("CustomText5").alias("ShiptoAddress"),
                  col("CustomText50").alias("ContractType"),
#                   col("CustomText51").alias("RT_OT"),
                  col("CustomText52").alias("RouteNum"),
                  col("CustomText53").alias("ServiceChargesPO"),
                  col("CustomText54").alias("CleansedBy"),
                  col("CustomText55").alias("CDMContractRepository"),
                  col("CustomText56").alias("TKExtendStatusValue"),
#                   col("CustomText57").alias("Z_Text"),
                  col("CustomText6").alias("BuildingName"),
                  col("CustomText7").alias("Existing/LegacyContractNumber"),
                  col("CustomText8").alias("CustomerPONum"),
                  col("CustomText9").alias("BillingContactLastName"),
#                   col("CustomWebLink0").alias("RefreshfromDC"),
#                   col("CustomWebLink1").alias("GenerateScanningCoversheet"),
#                   col("CustomWebLink2").alias("CreateCase"),
#                   col("CustomWebLink3").alias("SLAemail"),
#                   col("CustomWebLink4").alias("TKExtendDataCorrectionTask"),
                  col("ExternalSystemId").alias("ContractNumber"),
#                   col("IndexedBoolean0").alias("IndexedCheckbox"),
                  col("IndexedCurrency0").alias("CurrentContractMonthlyPrice"),
                  col("IndexedDate0").alias("ContractEndDate"),
                  col("IndexedNumber0").alias("IndexedNumber"),
                  col("IndexedPick0").alias("Branch"),
                  col("IndexedPick1").alias("Region"),
                  col("IntegrationId").alias("IntegrationID"),
#                   col("ModifiedBy").alias("Modified"),
                  # col("ModifiedDate").alias("ModifiedDate"),
                  col("Name").alias("CONTRACT_NUMBER"),
                  col("OpportunityExternalSystemId").alias("OpportunityExternalUniqueID"),
                  col("OpportunityId").alias("OpportunityId"),
                  col("OpportunityIntegrationId").alias("OpportunityIntegrationID"),
                  col("OpportunityName").alias("Opportunity"),
                  col("OpportunitySalesStage").alias("OpportunitySalesStage"),
                  col("OptimizedCustomPickList0").alias("EBSRelationship"),
                  col("Owner").alias("SalesPerson"),
                  col("OwnerExternalSystemId").alias("OwnerExternalUniqueID"),
                  col("OwnerFullName").alias("OwnerFullName"),
                  col("OwnerId").alias("PrimaryOwnerId"),
                  col("QuickSearch1").alias("ContractStatus"),
                  col("QuickSearch2").alias("QuickSearch2"),
                  col("ServiceRequestId").alias("CaseId"),
                  col("ServiceRequestIntegrationId").alias("CaseIntegrationID"),
                  col("ServiceRequestNumber").alias("Case"),
#                   col("SolutionIntegrationId").alias("SolutionIntegrationID"),
                  col("UpdatedByAlias").alias("ModifiedBy"),
#                   col("UpdatedByEMailAddr").alias("ModifiedByEmail"),
#                   col("UpdatedByExternalSystemId").alias("ModifiedByExternalUniqueID"),
#                   col("UpdatedByFirstName").alias("ModifiedByFirstName"),
#                   col("UpdatedByFullName").alias("ModifiedByName"),
#                   col("UpdatedByIntegrationId").alias("ModifiedByIntegrationID"),
#                   col("UpdatedByLastName").alias("ModifiedByLastName"),
#                   col("UpdatedByUserSignInId").alias("ModifiedByUserSignInID"),

                  to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
                  to_timestamp(col("CreatedDate")).alias("CreatedDate")

               )
              )
  window = Window.partitionBy("ServiceContractKey").orderBy(ServiceContract["ModifiedDate"].desc())

  service_contract_update = (ServiceContract.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

  # Merge Service Contracts: Update changed  Service Contracts and insert newly created Service Contracts 

  from delta.tables import *
  service_contract_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/service_contract_g") 

  service_contract_g.alias("t").merge(
      service_contract_update.alias("s"),
      "t.ServiceContractKey = s.ServiceContractKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/service_contract_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/service_contract")

In [0]:
"""
Contact Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year , lit
from pyspark.sql import Window
from delta.tables import *


if contact_list:
  
  Contact = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(contact_list)
#              .csv("/mnt/datalake_raw/batch/sales/oraclecrm/contact/delta/*.csv")
             .select( 
               
               col("Id").alias("ContactKey"),
               
                col("ContactEmail").alias("Email"),
                col("ContactFirstName").alias("FirstName"),
                col("ContactLastName").alias("LastName"),
                col("Owner").alias("SalesRep"),
                col("PrimaryCity").alias("AccountAddressCity"),
                col("PrimaryCountry").alias("AccountAddressCountry"),
                col("PrimaryAddressLine1").alias("AccountAddressAddress1"),
                col("PrimaryStreetAddress2").alias("AccountAddressAddress2"),
                col("PrimaryStreetAddress3").alias("AccountAddressAddress3"),
                col("PrimaryCounty").alias("AccountAddressCounty"),
                col("PrimaryZipCode").alias("AccountAddressZipCode"),
                col("PrimaryStateProvince").alias("AccountAddressUSState"),
                col("AlternateCity").alias("ContactAddressCity"),
                col("AlternateCountry").alias("ContactAddressCountry"),
                col("AlternateAddress1").alias("ContactAddressAddress1"),
                col("AlternateAddress2").alias("ContactAddressAddress2"),
                col("AlternateAddress3").alias("ContactAddressAddress3"),
                col("AlternateCounty").alias("ContactAddressCounty"),
                col("AlternateZipCode").alias("ContactAddressZipCode"),
                col("AlternateStateProvince").alias("ContactAddressUSState"),
#                 col("PrimaryCity").alias("ContactAddressCity"),
#                 col("PrimaryCountry").alias("ContactAddressCountry"),
#                 col("PrimaryAddressLine1").alias("ContactAddressAddress1"),
#                 col("PrimaryStreetAddress2").alias("ContactAddressAddress2"),
#                 col("PrimaryStreetAddress3").alias("ContactAddressAddress3"),
#                 col("PrimaryCounty").alias("ContactAddressCounty"),
#                 col("PrimaryZipCode").alias("ContactAddressZipCode"),
#                 col("PrimaryStateProvince").alias("ContactAddressUSState"),
                col("AccountName").alias("Account"),
                col("AccountLocation").alias("AccountLocation"),
                col("LastAssessmentDate").alias("LastAssessmentDate"),
                col("AccountId").alias("AccountId"),
                col("CustomObject3Name").alias("ServiceContract"),
                col("CustomObject3IntegrationId").alias("ServiceContractIntegrationId"),
                col("WorkPhone").alias("WorkPhoneNumber"),
                col("JobTitle").alias("JobTitle"),
                col("CellularPhone").alias("CellularPhoneNumber"),
                col("HomePhone").alias("HomePhoneNumber"),
                col("ContactType").alias("ContactType"),
                col("LeadSource").alias("LeadSource"),
                col("ClientStatus").alias("Status"),
                col("Description").alias("Description"),
                col("CustomPickList1").alias("JobRole"),
               
                col("IndexedPick0").alias("Branch"),
                col("IndexedPick1").alias("Region"),
                col("IndexedPick2").alias("FinancialReportingArea"),
               col("ExternalSystemId").alias("ExternalUniqueID"),
               lit("").alias("GoLive"),
               
                  to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
                  to_timestamp(col("CreatedDate")).alias("CreatedDate")
               )
              )
  window = Window.partitionBy("ContactKey").orderBy(Contact["ModifiedDate"].desc())

  contact_update = (Contact.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

  # Merge Contacts: Update changed contacts and insert newly created contacts

  from delta.tables import *
  contact_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/contact_g") 

  contact_g.alias("t").merge(
      contact_update.alias("s"),
      "t.ContactKey = s.ContactKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()
    
  # Remove Deleted contacts

  deleted_items_update = spark.read.format('delta').load("/mnt/datalake_curated/view_migration/deleted_items_g")
  
  contact_g.alias("t") \
    .merge( \
        deleted_items_update.alias("s"), \
        "s.Objectkey = t.ContactKey and type ='Contact'" ) \
    .whenMatchedDelete() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/contact_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/contact")

In [0]:
"""
Opportunity Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year , regexp_replace
from pyspark.sql import Window
from delta.tables import *

if opportunity_list:
  
  Opportunity = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(opportunity_list)
             .select( 
                            col("Id").alias("OpportunityKey"),
                            col("AccountId").alias("AccountId"),
                            col("AccountName").alias("Account"),
                            col("CloseDate").alias("AwardClosedDate"),
                            col("CreatedBy").alias("Created"),
                            col("CreatedByAlias").alias("CreatedBy"),
                            col("CustomBoolean0").alias("COVID19"),
                            col("CustomBoolean10").alias("PublicBid"),
                            col("CustomBoolean14").alias("Safety"),
                            col("CustomBoolean15").alias("OpportunityClosedYesNo"),
                            col("CustomBoolean17").alias("QuickQuote"),
                            col("CustomBoolean21").alias("StateCompliance"),
                            col("CustomBoolean28").alias("EagleQualified"),
                            col("CustomBoolean29").alias("EagleExempt"),
                            col("CustomBoolean31").alias("TKENationalAccount"),
                            col("CustomCurrency0").alias("ValidatedDownPayment"),
                            col("CustomCurrency1").alias("RegionalAssistanceAmount"),
                            col("CustomCurrency13").alias("ExpectedEagleRevenue"),
                            col("CustomCurrency2").alias("CompetitorPrice"),
                            col("CustomCurrency3").alias("ExistingOldContractValue"),
                            col("CustomCurrency4").alias("TKTripAmount"),
                            col("CustomCurrency5").alias("DownPaymentAmount"),
                            col("CustomCurrency7").alias("MarginDollars"),
                            col("CustomCurrency8").alias("RepairSellingPrice"),
                            col("CustomDate1").alias("StatusUpdateDate"),
                            col("CustomDate25").alias("FinalAcceptanceDate"),
                            col("CustomDate26").alias("BidDueDate"),
                            col("CustomDate28").alias("ContractStartDate"),
                            col("CustomDate29").alias("ContractEndDate"),
                            col("CustomDate31").alias("ExistingContractEndDate"),
                            col("CustomDate32").alias("ValidationDateforDeposit"),
                            col("CustomDate33").alias("DownPaymentReceivedDate"),
                            col("CustomDate34").alias("ExistingOldContractEndDate"),
                            col("CustomDate35").alias("JobCompletionDate"),
                            col("CustomDate36").alias("FollowUpDate"),
                            col("CustomDate39").alias("ContractExecutionDate"),
                            col("CustomDate40").alias("ContractBookedDate"),
                            col("CustomDate41").alias("ValidationDateforIC"),
                            col("CustomDate47").alias("RepairStatusDate"),
                            regexp_replace(col("CustomInteger1"),',','').alias("MechanicEmployeeID"),
                            col("CustomInteger3").alias("NumberOfUnits"),
                            col("CustomNumber0").alias("PriceEscalationCapPercent"),
                            col("CustomNumber1").alias("DaystoReceiveDownPayment"),
                            col("CustomNumber10").alias("PowerSponsorEmail"),
                            col("CustomNumber11").alias("NegotiationPlan"),
                            col("CustomNumber12").alias("PainChain"),
                            col("CustomNumber13").alias("CollaborationPlan"),
                            col("CustomNumber18").alias("BilltoCustomerNumber"),
                            col("CustomNumber2").alias("TotalLaborHours"),
                            col("CustomNumber22").alias("RepairGrossMarginPercent"),
                            col("CustomNumber35").alias("EagleManagerAlertcheck"),
                            col("CustomNumber37").alias("NumberOfMAXPlusunits"),
                            col("CustomNumber38").alias("NumberOfMAXPremiumunits"),
                            col("CustomNumber39").alias("NumberOfMAXProunits"),
                            col("CustomNumber5").alias("EBSBilledAmount"),
                            col("CustomNumber6").alias("EBSReceiptAmount"),
                            col("CustomNumber8").alias("StrengthofSale"),
                            col("CustomNumber9").alias("SponsorEmail"),
                            col("CustomObject1IntegrationId").alias("IBUnitIntegrationID"),
                            col("CustomObject1Name").alias("IBUnit"),
                            col("CustomObject2IntegrationId").alias("ExistingQuoteIntegrationID"),
                            col("CustomObject2Name").alias("BaseBid"),
                            col("CustomObject3Name").alias("ServiceContract"),
                            col("CustomPickList1").alias("PrimaryCompetitor"),
                            col("CustomPickList10").alias("PriceEscalationCapType"),
                            col("CustomPickList13").alias("BasisofDesignProduct"),
                            col("CustomPickList14").alias("CompetitorProduct"),
                            col("CustomPickList16").alias("CompetitorServiceLevel"),
                            col("CustomPickList17").alias("LeadSubSource"),
                            col("CustomPickList2").alias("ExistingServiceContract"),
                            col("CustomPickList21").alias("AccountType"),
                            col("CustomPickList22").alias("BuildingType"),
                            col("CustomPickList23").alias("NIMBranch"),
                            col("CustomPickList24").alias("NIMConversionProbability"),
                            col("CustomPickList25").alias("DDincludedinSpecification"),
                            col("CustomPickList26").alias("TypeofContractPaper"),
                            col("CustomPickList27").alias("RegionalAssistance"),
                            col("CustomPickList29").alias("NumberOfBuildingswithMAXactivationfee"),
                            col("CustomPickList3").alias("BasisofDesign"),
                            col("CustomPickList4").alias("CancellationTerms"),
                            col("CustomPickList5").alias("CancellationNoticeinDays"),
                            col("CustomPickList7").alias("BillingFrequency"),
                            col("CustomPickList8").alias("BranchID"),
                            col("CustomText0").alias("JobLocation"),
                            col("CustomText30").alias("OraclePSNumber"),
                            col("CustomText32").alias("ExistingTKEServiceContractNumber"),
                            col("CustomText33").alias("MechanicEmail"),
                            col("CustomText34").alias("ValidatedTKEJobNumber"),
                            col("CustomText37").alias("ValidatedDepositby"),
                            col("CustomText39").alias("IndependentCompetitor"),
                            col("CustomText4").alias("OpsManagerEmailIds"),
                            col("CustomText42").alias("ExternalSource"),
                            col("CustomText48").alias("RepairType"),
                            col("CustomText49").alias("TKTripID"),
                            col("CustomText5").alias("LeadSourceDescription"),
                            col("CustomText50").alias("Mechanic"),
                            col("CustomText52").alias("ValidatedBy"),
                            col("CustomText54").alias("eSignatureStatus"),
                            col("CustomText56").alias("Route"),
                            col("CustomText57").alias("RepairStatus"),
                            col("CustomText58").alias("RepairNumber"),
                            col("CustomText6").alias("CustomerContact"),
                            col("CustomText60").alias("EagleIdentify&QualifyLeadDate"),
                            col("CustomText61").alias("EagleEngageKeyStakeholdersDate"),
                            col("CustomText62").alias("EagleAnalyzeNeedsDate"),
                            col("CustomText63").alias("EagleDesignSolutionApproachDate"),
                            col("CustomText64").alias("EaglePresentOfferDate"),
                            col("CustomText8").alias("MWTimestamp"),
                            col("Description").alias("Description"),
                            col("ExpectedRevenue").alias("ExpectedRevenue"),
                            col("Id").alias("OpportunityId"),
                            col("IndexedBoolean0").alias("ValidateforIC"),
                            col("IndexedDate0").alias("ProposedDate"),
                            col("IndexedPick0").alias("Branch"),
                            col("IndexedPick1").alias("MarketSegment"),
                            col("IndexedPick2").alias("LineofBusiness"),
                            col("IndexedPick4").alias("Region"),
                            col("IndexedShortText0").alias("AddendumType"),
                            col("IntegrationId").alias("IntegrationID"),
                            col("KeyContactId").alias("PrimaryContactID"),
                            col("LeadSource").alias("LeadSource"),
                            col("NextStep").alias("NextStep"),
                            col("OpportunityName").alias("BuildingProject"),
                            col("OpportunityType").alias("TypeofSale"),
                            col("OptimizedCustomPickList0").alias("EagleSalesStage"),
                            col("Owner").alias("SalesRep"),
                            col("OwnerId").alias("PrimaryOwnerId"),
                            col("Priority").alias("Tier"),
                            col("Probability").alias("ProbabilityPercent"),
                            col("ReasonWonLost").alias("ReasonWonLost"),
                            col("Revenue").alias("Revenue"),
                            col("SalesProcess").alias("SalesMethodTranslation"),
                            col("SalesStage").alias("SalesStage"),
                            col("SalesStageId").alias("SalesStageId"),
                            col("SourceCampaign").alias("SourceCampaign"),
                            col("StageStatus").alias("StageStatus"),
                            col("Status").alias("Status"),
                            to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
                            to_timestamp(col("CreatedDate")).alias("CreatedDate")  
              )
        )

  window = Window.partitionBy("OpportunityKey").orderBy(Opportunity["ModifiedDate"].desc())
  opportunity_update = (Opportunity.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge Opportunities: Update changed opportunities and insert newly created ones 


  opportunity_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/opportunity_g") 

  opportunity_g.alias("t").merge(
      opportunity_update.alias("s"),
      "t.OpportunityKey = s.OpportunityKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# Remove Deleted Accounts 

  deleted_items_update = spark.read.format('delta').load("/mnt/datalake_curated/view_migration/deleted_items_g")
  
  opportunity_g.alias("t") \
    .merge( \
      deleted_items_update.alias("s"), \
      "s.Objectkey = t.OpportunityKey and type ='Opportunity'" ) \
    .whenMatchedDelete() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/opportunity_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/opportunity")




In [0]:
"""
Opportunity Product Revenue Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *

if opportunity_product_list:
  
  OpportunityProduct = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(opportunity_product_list)
             .select( 
    
                            col("`ProductRevenueData.Id`").alias("OpportunityProductRevenueKey"),
                            col("`ProductRevenueData.AccountExternalSystemId`").alias("AccountExternalUniqueId"),
                            col("`ProductRevenueData.AccountId`").alias("AccountId"),
                            col("`ProductRevenueData.AccountLocation`").alias("Location"),
                            col("`ProductRevenueData.AccountName`").alias("Account"),
                            col("`ProductRevenueData.ContactFullName`").alias("Contact"),
                            col("`ProductRevenueData.ContactId`").alias("ContactId"),
                            col("`ProductRevenueData.Contract`").alias("Contract"),
                            col("`ProductRevenueData.CreatedByAlias`").alias("CreatedBy"),
                            col("`ProductRevenueData.CreatedByUserSignInId`").alias("CreatedByUserSignInID"),
                            col("`ProductRevenueData.CurrencyCode`").alias("Currency"),
                            col("`ProductRevenueData.CustomBoolean3`").alias("OpportunityRollupCheck"),
                            col("`ProductRevenueData.CustomBoolean4`").alias("Proposed"),
                            col("`ProductRevenueData.CustomBoolean6`").alias("SustainabilityProducts"),
                            col("`ProductRevenueData.CustomBoolean9`").alias("SPOTCreated"),
                            col("`ProductRevenueData.CustomCurrency0`").alias("EstimatedRate"),
                            col("`ProductRevenueData.CustomCurrency1`").alias("EstimatedExpense"),
                            col("`ProductRevenueData.CustomCurrency2`").alias("EstimatedMechanicRate"),
                            col("`ProductRevenueData.CustomCurrency3`").alias("TKTripAmount"),
                            col("`ProductRevenueData.CustomCurrency4`").alias("LaborCost"),
                            col("`ProductRevenueData.CustomCurrency5`").alias("DiscountAmount"),
                            col("`ProductRevenueData.CustomCurrency6`").alias("FactoryMaterialCostByUnit"),
                            col("`ProductRevenueData.CustomCurrency7`").alias("PurchMaterialCostByUnit"),
                            col("`ProductRevenueData.CustomInteger1`").alias("NumberOfFrontOpenings"),
                            col("`ProductRevenueData.CustomInteger18`").alias("NumberOfMonths"),
                            col("`ProductRevenueData.CustomInteger2`").alias("NumberOfRearOpenings"),
                            col("`ProductRevenueData.CustomNumber0`").alias("Z_EstimatedHours"),
                            col("`ProductRevenueData.CustomNumber1`").alias("TotalLaborHours"),
                            col("`ProductRevenueData.CustomNumber2`").alias("Rise"),
                            col("`ProductRevenueData.CustomNumber20`").alias("GPMarginPercint"),
                            col("`ProductRevenueData.CustomNumber21`").alias("BranchMargin"),
                            col("`ProductRevenueData.CustomNumber22`").alias("Variancetostandard"),
                            col("`ProductRevenueData.CustomNumber23`").alias("Unitssimilaritytobase"),
                            col("`ProductRevenueData.CustomNumber3`").alias("Hoursoutsideofregular"),
                            col("`ProductRevenueData.CustomNumber4`").alias("TBMscore"),
                            col("`ProductRevenueData.CustomPickList0`").alias("ExistingManufacturer"),
                            col("`ProductRevenueData.CustomPickList1`").alias("ExistingMachineModel"),
                            col("`ProductRevenueData.CustomPickList10`").alias("HoistwayDoorHangersTracksAndRollers"),
                            col("`ProductRevenueData.CustomPickList11`").alias("Installation"),
                            col("`ProductRevenueData.CustomPickList15`").alias("ExistingControllerModel"),
                            col("`ProductRevenueData.CustomPickList16`").alias("ExistingControllerManufacturer"),
                            col("`ProductRevenueData.CustomPickList19`").alias("MachineModel"),
                            col("`ProductRevenueData.CustomPickList2`").alias("AGILEDestinationControls"),
                            col("`ProductRevenueData.CustomPickList20`").alias("ControllerModel"),
                            col("`ProductRevenueData.CustomPickList22`").alias("DoorOperatorManufacturer"),
                            col("`ProductRevenueData.CustomPickList23`").alias("FixtureManufacturer"),
                            col("`ProductRevenueData.CustomPickList3`").alias("StepWidth"),
                            col("`ProductRevenueData.CustomPickList4`").alias("TKEFactoryProductCode"),
                            col("`ProductRevenueData.CustomPickList7`").alias("ControllerType"),
                            col("`ProductRevenueData.CustomPickList8`").alias("ExistingControllerType"),
                            col("`ProductRevenueData.CustomPickList9`").alias("BalustradeType"),
                            col("`ProductRevenueData.CustomText2`").alias("LeadSourceDescription"),
                            col("`ProductRevenueData.CustomText3`").alias("EquipmentConditions"),
                            col("`ProductRevenueData.CustomText31`").alias("UnitDesignation"),
                            col("`ProductRevenueData.CustomText32`").alias("GeneralRepairType"),
                            col("`ProductRevenueData.CustomText34`").alias("BrandofRepairMaterial"),
                            col("`ProductRevenueData.CustomText35`").alias("NewOpportunityId"),
                            col("`ProductRevenueData.CustomText36`").alias("AssetId"),
                            col("`ProductRevenueData.CustomText39`").alias("Mechanic"),
                            col("`ProductRevenueData.CustomText40`").alias("MechanicEmail"),
                            col("`ProductRevenueData.CustomText41`").alias("Category"),
                            col("`ProductRevenueData.CustomText42`").alias("EnvironmentalConditions"),
                            col("`ProductRevenueData.CustomText43`").alias("Usage"),
                            col("`ProductRevenueData.CustomText44`").alias("SpecialBillingRate"),
                            col("`ProductRevenueData.CustomText46`").alias("HelperZone"),
                            col("`ProductRevenueData.CustomText47`").alias("MechanicZone"),
                            col("`ProductRevenueData.CustomText52`").alias("Voltage"),
                            col("`ProductRevenueData.Description`").alias("Description"),
                            col("`ProductRevenueData.ExpectedRevenue`").alias("ExpectedRevenue"),
                            col("`ProductRevenueData.ExternalSystemId`").alias("ExternalUniqueID"),
                            col("`ProductRevenueData.Forecast`").alias("Forecast"),
                            col("`ProductRevenueData.IndexedCurrency0`").alias("Existing_OldContractValue"),
                            col("`ProductRevenueData.IndexedNumber0`").alias("NumOfStops"),
                            col("`ProductRevenueData.IndexedPick0`").alias("UnitClassification"),
                            col("`ProductRevenueData.IndexedPick1`").alias("Capacity"),
                            col("`ProductRevenueData.IndexedPick2`").alias("Speed"),
                            col("`ProductRevenueData.IndexedPick3`").alias("ControllerManufacturer"),
                            col("`ProductRevenueData.IndexedPick4`").alias("MachineManufacturer"),
                            col("`ProductRevenueData.IndexedPick5`").alias("RepairType"),
                            col("`ProductRevenueData.OpportunityId`").alias("OpportunityId"),
                            col("`ProductRevenueData.OpportunityName`").alias("Opportunity"),
                            col("`ProductRevenueData.OpportunitySalesStage`").alias("SalesStage"),
                            col("`ProductRevenueData.Owner`").alias("SalesRep"),
                            col("`ProductRevenueData.OwnerId`").alias("PrimaryOwnerId"),
                            col("`ProductRevenueData.Probability`").alias("ProbabilityPercint"),
                            col("`ProductRevenueData.ProductCategory`").alias("ProductCategory"),
                            col("`ProductRevenueData.ProductCategoryId`").alias("ProductCategoryID"),
                            col("`ProductRevenueData.ProductExternalSystemId`").alias("ProductExternalID"),
                            col("`ProductRevenueData.ProductId`").alias("ProductId"),
                            col("`ProductRevenueData.ProductName`").alias("Product"),
                            col("`ProductRevenueData.ProductPartNumber`").alias("PartNumber"),
                            col("`ProductRevenueData.ProductStatus`").alias("ProductStatus"),
                            col("`ProductRevenueData.PurchaseDate`").alias("PurchaseDate"),
                            col("`ProductRevenueData.PurchasePrice`").alias("PricePerUnit"),
                            col("`ProductRevenueData.Quantity`").alias("NumOfUnits"),
                            col("`ProductRevenueData.Revenue`").alias("Revenue"),
                            col("`ProductRevenueData.SerialNumber`").alias("OracleSerialNumber"),
                            col("`ProductRevenueData.ShipDate`").alias("ShipDate"),
                            col("`ProductRevenueData.StartCloseDate`").alias("RevenueStartDate"),
                            col("`ProductRevenueData.Status`").alias("Status"),
                            col("`ProductRevenueData.Type`").alias("LineofBusiness"),
                            col("`ProductRevenueData.UpdatedByAlias`").alias("ModifiedBy"),
                            col("`ProductRevenueData.UpdatedByUserSignInId`").alias("ModifiedByUserSignInID"),
                            to_timestamp(  col("`ProductRevenueData.ModifiedDate`")).alias("ModifiedDate"), 
                            to_timestamp(  col("`ProductRevenueData.CreatedDate`")).alias("CreatedDate") 
 
              )
        )

  window = Window.partitionBy("OpportunityProductRevenueKey").orderBy(OpportunityProduct["ModifiedDate"].desc())
  opportunity_product_update = (OpportunityProduct.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge Opportunities: Update changed opportunities and insert newly created ones 


  opportunity_product_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/opportunity_product_g") 

  opportunity_product_g.alias("t").merge(
      opportunity_product_update.alias("s"),
      "t.OpportunityProductRevenueKey = s.OpportunityProductRevenueKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# Remove Deleted Accounts 

  deleted_items_update = spark.read.format('delta').load("/mnt/datalake_curated/view_migration/deleted_items_g")
  
  opportunity_product_g.alias("t") \
    .merge( \
      deleted_items_update.alias("s"), \
      "s.Objectkey = t.OpportunityProductRevenueKey and type ='Revenue'" ) \
    .whenMatchedDelete() \
    .execute()
  
  opportunity_product_g.alias("t") \
  .merge( \
      deleted_items_update.alias("s"), \
      "s.Objectkey = t.OpportunityId and type ='Opportunity'" ) \
  .whenMatchedDelete() \
  .execute()


# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/opportunity_product_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/opportunity_product")


In [0]:
"""
Existing Quote Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *

if existing_quote_list:
  
  ExistingQuote = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(existing_quote_list)
             .select( 
                          col("Id").alias("ExistingQuoteKey"),
                          col("OpportunityId").alias("OpportunityId"),
                          col("QuickSearch1").alias("QuoteDescription"),
                          col("Name").alias("QuoteNumber"),
                          col("IndexedCurrency0").alias("Revenue"),
                          col("ExternalSystemId").alias("ExternalUniqueID"),
                          col("stStatus").alias("Status"),
                          to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
                          to_timestamp(col("CreatedDate")).alias("CreatedDate")  
              )
        )

  window = Window.partitionBy("ExistingQuoteKey").orderBy(ExistingQuote["ModifiedDate"].desc())
  existing_quote_update = (ExistingQuote.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge Opportunities: Update changed Existing Quotes and insert newly created ones 

  existing_quote_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/existing_quote_g") 

  existing_quote_g.alias("t").merge(
      existing_quote_update.alias("s"),
      "t.ExistingQuoteKey = s.ExistingQuoteKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/existing_quote_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/existing_quote")


In [0]:

"""
Opportunity IB Unit Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *

if opportunity_ib_unit_list:
  
  OpportunityIB = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              #.option("multiLine","true")
               .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(opportunity_ib_unit_list)
             .select( 
              col("`CustomObject1Data.Id`").alias("OpportunityIBUnitKey"),
               col("`ExternalSystemId`").alias("OpportunityExternalUniqueId") ,
               col("`IntegrationId`").alias("OpportunityIntegrationId"),
               col("`CustomObject1Data.Name`").alias("IBUnit"),
               col("`Id`").alias("OpportunityId"),
               col("`CustomObject1Data.Id`").alias("ChildObjectId"),
               to_timestamp(  col("`CustomObject1Data.ModifiedDate`")).alias("ModifiedDate"), 
               to_timestamp(  col("`CustomObject1Data.CreatedDate`")).alias("CreatedDate") 
 
              ).filter("OpportunityIBUnitKey is not null")
        )

  window = Window.partitionBy("OpportunityIBUnitKey").orderBy(OpportunityIB["ModifiedDate"].desc())
  opportunity_ib_update = (OpportunityIB.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge IB Units: Update changed IB Units and insert newly created ones 


  opportunity_ib_unit_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/opportunity_ib_unit_g") 

  opportunity_ib_unit_g.alias("t").merge(
      opportunity_ib_update.alias("s"),
      "t.OpportunityIBUnitKey = s.OpportunityIBUnitKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# Remove Deleted Accounts 

  deleted_items_update = spark.read.format('delta').load("/mnt/datalake_curated/view_migration/deleted_items_g")
  
  opportunity_ib_unit_g.alias("t") \
    .merge( \
      deleted_items_update.alias("s"), \
      "s.ObjectKey = t.OpportunityId and Type ='Opportunity'" ) \
    .whenMatchedDelete() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/opportunity_ib_unit_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/opportunity_ib_unit")

In [0]:
"""
Opportunity Partner Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *

if opportunity_partner_list:
  
  OpportunityPartner = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(opportunity_partner_list)
             .select( 
                     col("`PartnerData.Id`").alias("OpportunityPartnerKey"),
                     col("`PartnerData.RelationshipRole`").alias("AccountType"),
                     col("`PartnerData.PartnerId`").alias("PartnerId"),
                     col("`PartnerData.PartnerName`").alias("AccountName"),
                     col("`PartnerData.OpportunityId`").alias("OpportunityId"),
                     to_timestamp(  col("`PartnerData.ModifiedDate`")).alias("ModifiedDate"), 
                     to_timestamp(  col("`PartnerData.CreatedDate`")).alias("CreatedDate") 
 
              )
        )

  window = Window.partitionBy("OpportunityPartnerKey").orderBy(OpportunityPartner["ModifiedDate"].desc())
  opportunity_partner_update = (OpportunityPartner.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge Opportunities: Update changed opportunities and insert newly created ones 


  opportunity_partner_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/opportunity_partner_g") 

  opportunity_partner_g.alias("t").merge(
      opportunity_partner_update.alias("s"),
      "t.OpportunityPartnerKey = s.OpportunityPartnerKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# Remove Deleted Accounts 

  deleted_items_update = spark.read.format('delta').load("/mnt/datalake_curated/view_migration/deleted_items_g")
  
  opportunity_partner_g.alias("t") \
    .merge( \
      deleted_items_update.alias("s"), \
      "s.Objectkey = t.OpportunityId and type ='Opportunity'" ) \
    .whenMatchedDelete() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/opportunity_partner_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/opportunity_partner")

In [0]:
"""
Opportunity Contact Role Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year , concat_ws
from pyspark.sql import Window
from delta.tables import *

if opportunity_contact_role_list:
  
  opportunityContactRole = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(opportunity_contact_role_list)
             .select( 
 
             col("`ContactRoleData.Id`").alias("opportunityContactRoleKey"),
               col("`ContactRoleData.ContactId`").alias("ContactId"),  
               concat_ws(' ',col("`ContactRoleData.ContactFirstName`"),col("`ContactRoleData.ContactFirstName`")).alias("ContactName"),  
               col("`ContactRoleData.Primary`").alias("Primary"),  
               col("`ContactRoleData.BuyingRole`").alias("EagleRoles"),  
               col("`ContactRoleData.ContactFirstName`").alias("FirstName"),  
               col("`ContactRoleData.ContactLastName`").alias("LastName"),  
               col("`ContactRoleData.OpportunityId`").alias("OpportunityId"),  
            to_timestamp(  col("`ContactRoleData.ModifiedDate`")).alias("ModifiedDate"), 
            to_timestamp(  col("`ContactRoleData.CreatedDate`")).alias("CreatedDate") 
 
              )
        )

  window = Window.partitionBy("opportunityContactRoleKey").orderBy(opportunityContactRole["ModifiedDate"].desc())
  opportunity_contact_role_update = (opportunityContactRole.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge Opportunities: Update changed opportunities and insert newly created ones 


  opportunity_contact_role_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/opportunity_contact_role_g") 

  opportunity_contact_role_g.alias("t").merge(
      opportunity_contact_role_update.alias("s"),
      "t.opportunityContactRoleKey = s.opportunityContactRoleKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# Remove Deleted Accounts 

  deleted_items_update = spark.read.format('delta').load("/mnt/datalake_curated/view_migration/deleted_items_g")
  
  opportunity_contact_role_g.alias("t") \
    .merge( \
      deleted_items_update.alias("s"), \
      "s.Objectkey = t.OpportunityId and type ='Opportunity'" ) \
    .whenMatchedDelete() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/opportunity_contact_role_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/opportunity_contact_role")

In [0]:
"""
Activity Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year
from pyspark.sql import Window
from delta.tables import *

if activity_list:
  
  Activity = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(activity_list)
             .select( 
                    col("Id").alias("ActivityKey"),
                    col("AccountId").alias("AccountId"),
                    col("AccountName").alias("Account"),
                    col("Activity").alias("Activity"),
                    col("Alias").alias("Alias"),
                    col("CompletedDatetime").alias("CompletedDate"),
                    col("CustomPickList1").alias("ActivitySubType"),
                    col("DueDate").alias("DueDate"),
                    col("IndexedPick0").alias("Branch"),
                    col("IndexedPick1").alias("Region"),
                    col("IntegrationId").alias("IntegrationID"),
                    col("OpportunityId").alias("OpportunityId"),
                    col("OwnerExternalSystemId").alias("OwnerExternalUniqueId"),
                    col("OwnerFullName").alias("OwnerFullName"),
                    col("OwnerId").alias("OwnerId"),
                    col("OwnerIntegrationId").alias("OwnerIntegrationId"),
                    col("PrimaryOwnerId").alias("PrimaryOwnerId"),
                    col("ServiceRequestNumber").alias("Case"),
                    col("Status").alias("Status"),
                    col("Type").alias("Type"),
                    col("Description").alias("Description"),
                     col("Completed").alias("Completed"),
                     col("CustomBoolean0").alias("ActivityCompleted"),
                    to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
                     to_timestamp(col("CreatedDate")).alias("CreatedDate")
              )
        )

  window = Window.partitionBy("ActivityKey").orderBy(Activity["ModifiedDate"].desc())
  activity_update = (Activity.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge Activity: Update changed activities and insert newly created activities 


  activity_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/activity_g") 

  activity_g.alias("t").merge(
      activity_update.alias("s"),
      "t.ActivityKey = s.ActivityKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/activity_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/activity")


In [0]:
"""
IB Unit Curated  
"""

from pyspark.sql.functions import col , row_number , year , to_timestamp , year , lit
from pyspark.sql import Window
from delta.tables import *

if ib_unit_list:
  
  IBUnit = (spark.read.format('csv')
             .option("delimiter","|") 
             .option("header","true")
              .option("multiLine","true")
              .option("encoding","UTF-16")
              .option("timestampFormat","MM/dd/yyyy hh:mm:SS a")
              .load(ib_unit_list)
             .select( 
                    col("Id").alias("IBUnitKey"),
             col("AccountLocation").alias("AccountLocation"),
            col("Owner").alias("OwnerAlias"),
            col("AccountName").alias("SiteLocation"),
            col("AccountId").alias("AccountId"),
            col("CustomObject1Name").alias("IBUnit"),
            col("CustomObject3Name").alias("ServiceContract"),
            col("OpportunityName").alias("Opportunity"),
            col("OpportunityId").alias("OpportunityId"),
            col("ServiceRequestNumber").alias("Case"),
            col("ServiceRequestId").alias("CaseId"),
            col("CreatedDate").alias("Created:Date"),
            col("OwnerFullName").alias("OwnerFullName"),
            col("QuickSearch1").alias("BillingSiteNumber"),
            col("QuickSearch2").alias("EquipmentType"),
            col("Type").alias("ProductType"),
            col("IndexedNumber0").alias("Latitude"),
            col("Name").alias("OracleSerialNumber"),
            col("AccountIntegrationId").alias("AccountIntegrationID"),
            col("AccountExternalSystemId").alias("AccountExternalUniqueID"),
            col("OpportunityExternalSystemId").alias("OpportunityExternalUniqueID"),
            col("OpportunityIntegrationId").alias("OpportunityIntegrationID"),
            col("ServiceRequestExternalSystemId").alias("CaseExternalUniqueID"),
            col("ServiceRequestIntegrationId").alias("CaseIntegrationID"),
            col("UpdatedByAlias").alias("ModifiedBy"),
            col("CreatedByAlias").alias("CreatedBy"),
            col("CustomObject1Type").alias("IBUnitType"),
            col("CustomObject1ExternalSystemId").alias("IBUnitExternalUniqueID"),
            col("CustomObject1IntegrationId").alias("IBUnitIntegrationID"),
            col("CustomObject3IntegrationId").alias("ServiceContractIntegrationID"),
            col("CustomObject3ExternalSystemId").alias("ServiceContractExternalUniqueID"),
            col("Description").alias("Description"),
            col("bAnnual_Test_Covered").alias("FirstSafetyTestCoveredinContract"),
            col("bAnnual_Two_Men_Required").alias("FirstSafetyTestTwoMenRequired"),
            col("bCritical_Unit").alias("CriticalUnit"),
            col("bFireman_Operation").alias("FiremanOperation"),
            col("bFive_Yr_Test_Rqd_By_Contract").alias("AccountContractStatusFlag"),
            col("bHigh_Callback_Unit").alias("HighCallbackUnit"),
#col("bIB_Account_Linkage_Flag").alias("IBAccountLinkageFlag"), 
            col("bMAX_Device_Flag").alias("MAXDeviceFlag"),
            col("bPeriodic_Two_Men_Required").alias("SecondSafetyTestTwoMenRequired"),
            col("bPeriodic_test_covered").alias("SecondSafetyTestCoveredinContract"),
            col("coptimizedBilling_Amount").alias("BillingAmount"),
            col("dContract_End_Date_1").alias("ContractEndDate"),
            col("dExpiration_Date").alias("ExpirationDate"),
            col("dFactory_Warranty_End_Date").alias("FactoryWarrantyEndDate"),
            col("dFinal_Acceptance_Date").alias("FinalAcceptanceDate"),
            col("dInstall_Date_Old").alias("TKExtendEndDate"),
            col("dLast_Annual_Inspection").alias("FirstLastSafetyTest"),
            col("dLast_Periodic_Inspection_Date").alias("SecondLastSafetyTest"),
            col("dNext_Annual_Safety_Inspection_Date").alias("FirststNextSafetyInspection"),
            col("dNext_Periodic_Safety_Inspection_Date").alias("SecondNextSafetyInspection"),
            col("dtoptimizedContract_Start_Date").alias("ContractStartDate"),
            col("dtoptimizedInstall_Date").alias("InstallDate"),
            col("ltContract_Line_Number").alias("ContractLineNumber"),
            col("ltCurrently_Maintained_by").alias("CurrentlyMaintainedby"),
            col("ltCustomers_Unit_nickname").alias("UnitName"),
            col("ltEquipment_Condition").alias("EquipmentCondition"),
            col("ltExternal_reference").alias("IBExternalreference"),
            col("ltHandrail_Type").alias("Wrap"),
            col("ltInstalled_Location").alias("InstalledLocation"),
            col("ltJack_Type").alias("JackType"),
            col("ltLegal_Identification").alias("LegalIdentificationNumber"),
            col("ltMachine_Room_Location").alias("MachineRoomLocation"),
            col("ltRearSide_Door_Complexity").alias("Rear_SideDoorComplexity"),
            col("ltSequence").alias("Sequence2"),
            col("ltService_Contract_Number").alias("AllAssociatedServiceContracts"),
            col("ltTax_Exempt_Building_Classification").alias("TaxExemptBuildingClassification"),
            col("ltType_of_Buffer").alias("BufferType"),
            col("ltType_of_Controller").alias("ControllerType"),
            col("ltType_of_Leveling_Devices").alias("TypeofLevelingDevices"),
            col("ltType_of_Valve").alias("HydraulicValveInfo"),
            col("ltUnit_Telephone_Number").alias("UnitTelephoneNumber2"),
            col("nCar_Door_Height_in").alias("CarDoorHeight_in"),
            col("nCar_Door_Width_in").alias("CarDoorWidth_in"),
            col("nCategory_1_Safety_Frequency_in_Years").alias("1stSafetyFrequency_years"),
            col("nHoistwayRope_Diameter_in_Inches").alias("RopeSize"),
            col("nHoistwayRope_Length_in_feet").alias("RopeLength"),
            col("nNumber_of_Front_Openings").alias("FrontOpenings"),
            col("nNumber_of_Landings").alias("NumberofLandings"),
            col("nNumber_of_Rear_Openings").alias("RearSideOpenings"),
            col("nNumber_of_Ropes").alias("NumberOfRopes"),
            col("nNumber_of_Steps").alias("NumberofSteps"),
            col("nPeriodic_Inspection_Frequency").alias("SecondSafetyFrequency_years"),
            col("nSequence").alias("Sequence"),
            col("noptimizedLongitude").alias("Longitude"),
            col("noptimized_of_Openings").alias("NumberofOpenings"),
            col("plEBS_Relationship").alias("ContractStatusValue"),
            col("plJack_Lift_Type").alias("JackLiftType"),
            col("plJack_Orientation").alias("JackOrientation"),
            col("plMachine_Manufacturer").alias("MachineManufacturer"),
            col("plMachine_Model").alias("MachineModel"),
            col("plUsage").alias("Usage"),
            col("ploptimizedEBS_Relationship").alias("EBSRelationship"),
            col("stBalustrade_Type_US").alias("BalustradeType"),
            col("stBilling_Site_Location").alias("CRMBillingAccountRowID"),
            col("stBranch").alias("Branch"),
            col("stBranch_").alias("EBSBranchNumber"),
            col("stBuilding_Type").alias("BuildingType"),
            col("stCapacity").alias("Capacity"),
            col("stCategory_1_Test_Duration").alias("1stSafetyTestDuration"),
            col("stCategory_2_Safety_Inspection_Type").alias("2ndSafetyInspectionType"),
            col("stContract_Line_Status").alias("ContractLineStatus"),
            col("stController_Manufacturer").alias("ControllerManufacturer"),
            col("stController_Model").alias("ControllerModel"),
            col("stDistrict").alias("Region"),
            col("stDoor_Opening_Type").alias("DoorOpeningType"),
            col("stDoor_Type").alias("ApplicationofUnit"),
            col("stDoors_Complexity").alias("FrontDoorComplexity"),
            col("stDrive_Configuration").alias("DriveType"),
            col("stDrive__Motor_Starter_Make").alias("DriveMotorStarterMake"),
            col("stElevator_ID").alias("ElevatorID"),
            col("stEmergency_phone_number").alias("UnitTelephoneNumber"),
            col("stEnvironment").alias("Environment"),
            col("stEquipment_SubType").alias("EquipmentSubType"),
            col("stJack_OEM").alias("JackOEM"),
            col("stLength_of_Handrail").alias("LengthofHandrail"),
            col("stMAX_Device_ID").alias("MAXDeviceID"),
            col("stMachine_Room__Seismic_Devices").alias("TKExtendStatus"),
            col("stMarket_Segment").alias("MarketSegment"),
            col("stMechanic_Employee_").alias("MechanicEmployeeNumber"),
            col("stMechanic_Person_ID").alias("MechanicPersonID"),
            col("stMechanic_Supervisor_Employee_").alias("MechanicSupervisorEmployeeNumber"),
            col("stMechanic_Supervisor_Name").alias("MechanicSupervisorName"),
            col("stMechanic_Supervisor_Person_ID").alias("MechanicSupervisorPersonID"),
            col("stModel_Name").alias("Model"),
            col("stMotor_Current").alias("MotorCurrent"),
            col("stNumber_in_Group").alias("NumberinGroup"),
            col("stOEM_Serial_Number").alias("FactorySerialNumber"),
            col("stOEM_Unit").alias("OEMUnit"),
            col("stOld_Unit_RowID").alias("OldUnitCRMRowID"),
            col("stOther_Capacity").alias("ContractStatus"),
            col("stPeriodic_Safety_Test_Duration").alias("2ndSafetyTestDuration"),
            col("stPrevious_Site_Id").alias("PreviousSiteId"),
            col("stPump_Motor_OEM").alias("PumpMotorOEM"),
            col("stPump_Motor_Type").alias("HydraulicPumpUnitType"),
            col("stRegion").alias("FinancialReportingArea"),
            col("stRise__Travel").alias("Rise_Travel"),
            col("stRoute_").alias("RouteNumber"),
            col("stRoute_Mechanic").alias("RouteMechanic"),
            col("stSafety_Inspection_Type").alias("1stSafetyInspectionType"),
            col("stShipping_Site_Number").alias("SiteNumber"),
            col("stSpeed").alias("Speed"),
            col("stStatus").alias("UnitStatus"),
            col("stStep_Width").alias("StepWidth"),
            col("stSuspension_Roping").alias("HoistwaySuspensionRoping"),
            col("stUnit_Group_Name").alias("GroupName"),
            col("stYear_of_Modernization").alias("YearofModernization"),
            col("stoptimizedOptimized_Account_Id").alias("OptimizedAccountId"),
            col("dLast_Safety_Survey_Date").alias("LastSafetySurveyDate"),
            to_timestamp(col("ModifiedDate")).alias("ModifiedDate"), 
            to_timestamp(col("CreatedDate")).alias("CreatedDate")
              )
        )

  window = Window.partitionBy("IBUnitKey").orderBy(IBUnit["ModifiedDate"].desc())
  ib_unit_update = (IBUnit.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
     .filter("RowNumber == 1")
     .filter("ModifiedDate is not null")   
     .drop("RowNumber")
     .withColumn("CreatedYear" ,year("CreatedDate"))
     .coalesce(1))

# Merge account: Update changed IB units and insert newly created IB units. 


  ib_unit_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/ib_unit_g") 

  ib_unit_g.alias("t").merge(
      ib_unit_update.alias("s"),
      "t.IBUnitKey = s.IBUnitKey") \
    .whenNotMatchedInsertAll() \
    .whenMatchedUpdateAll() \
    .execute()

# process full copy in csv for view migration

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/ib_unit_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/ib_unit")


In [0]:
"""
Quote Header
"""


from pyspark.sql.types import StringType , TimestampType , DoubleType , StructType , StructField , LongType
from pyspark.sql.functions import col  , year , to_timestamp , to_date , row_number , lit
from pyspark.sql import Window
from delta.tables import *  
if quote_header_v2_list:  
  quoteSchema = StructType([
                  StructField("crmOpportunityId_quote",StringType(), True),
                  StructField("quoteNumber_quote",StringType(), True),
                  StructField("transactionID_quote",StringType(), True),
                  StructField("crmBranch_quote",StringType(),True),
                  StructField("_newBillTo_company_name",StringType(), True),
                  StructField("_newBillTo_last_name",StringType(), True),
                  StructField("_newBillTo_first_name",StringType(), True),
                  StructField("_newBillTo_address",StringType(), True),
                  StructField("_newBillTo_address_2",StringType(), True),
                  StructField("_newBillTo_city",StringType(), True),
                  StructField("_newBillTo_state",StringType(), True),
                  StructField("_newBillTo_zip",StringType(), True),
                  StructField("_newBillTo_company_name_2",StringType(), True),
                  StructField("_newBillTo_country",StringType(), True),
                  StructField("_shippingAddress_company_name",StringType(), True),
                  StructField("_shippingAddress_last_name",StringType(), True),
                  StructField("_shippingAddress_first_name",StringType(), True),
                  StructField("_shippingAddress_address",StringType(), True),
                  StructField("_shippingAddress_address_2",StringType(), True),
                  StructField("_shippingAddress_city",StringType(), True),
                  StructField("_shippingAddress_state",StringType(), True),
                  StructField("_shippingAddress_zip",StringType(), True),
                  StructField("_shippingAddress_company_name_2",StringType(), True),
                  StructField("_shippingAddress_country",StringType(), True),
                  StructField("siteAccountID_quote",StringType(), True),         
                  StructField("crmLineOfBusiness_quote",StringType(), True),
                  StructField("sublineOfBusiness_quote",StringType(), True),
                  StructField("createdDate_quote",TimestampType(), True),
                  StructField("totalMaterialCost_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                  StructField("mechanicLaborRate_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
  #added 03/08/2022 BS
                  StructField("mechanicLaborOvertimeRate_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
  #added 03/08/2022 BS
                  StructField("adjusterLaborRate_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
  #added 03/08/2022 BS
                  StructField("adjusterLaborOvertimeRate_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
  #added 03/08/2022 BS
                  StructField("teamLaborRate_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
  #added 03/08/2022 BS
                  StructField("teamLaborOvertimeRate_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
  #added 03/08/2022 BS
                  StructField("totalLaborCost_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                  StructField("total_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                  StructField("totalCost_quote" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                  StructField("totalMarginPercent_quote",StringType(), True ),  
                  StructField("desiredContractType_quote",StringType(),True),
                  StructField("addendumType_quote",StringType(), True),
                  StructField("_date_modified",TimestampType(), True) ,
                  StructField("_date_added",TimestampType(), True)  ,

              ])

  Quote_Header = (
                          spark.read
                            .schema(quoteSchema)
                            .option("multiline","true")
                            .json(quote_header_v2_list)

                 )
  

  window = Window.partitionBy("transactionID_quote").orderBy(Quote_Header["_date_modified"].desc())
  quote_header_update = (Quote_Header.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
 .filter("RowNumber == 1")
 .drop("RowNumber")
 .select(
            "crmOpportunityId_quote",
            "quoteNumber_quote",
            "transactionID_quote",
            "crmBranch_quote",
            "_newBillTo_company_name",
            "_newBillTo_last_name",
            "_newBillTo_first_name",
            "_newBillTo_address",
            "_newBillTo_address_2",
            "_newBillTo_city",
            "_newBillTo_state",
            "_newBillTo_zip",
            "_newBillTo_company_name_2",
            "_newBillTo_country",
            "_shippingAddress_company_name",
            "_shippingAddress_last_name",
            "_shippingAddress_first_name",
            "_shippingAddress_address",
            "_shippingAddress_address_2",
            "_shippingAddress_city",
            "_shippingAddress_state",
            "_shippingAddress_zip",
            "_shippingAddress_company_name_2",
            "_shippingAddress_country",
            "siteAccountID_quote",
            "crmLineOfBusiness_quote",
            "sublineOfBusiness_quote",  
            col("totalMaterialCost_quote.value").alias("totalMaterialCost_quote"),
            col("mechanicLaborRate_quote.value").alias("mechanicLaborRate_quote"),
            col("mechanicLaborOvertimeRate_quote.value").alias("mechanicLaborOvertimeRate_quote"),
            col("adjusterLaborRate_quote.value").alias("adjusterLaborRate_quote"),
            col("adjusterLaborOvertimeRate_quote.value").alias("adjusterLaborOvertimeRate_quote"),
            col("teamLaborRate_quote.value").alias("teamLaborRate_quote"),
            col("teamLaborOvertimeRate_quote.value").alias("teamLaborOvertimeRate_quote"),
            col("totalLaborCost_quote.value").alias("totalLaborCost_quote"),
            col("total_quote.value").alias("total_quote"),
            col("totalCost_quote.value").alias("totalCost_quote"),
            col("totalMarginPercent_quote"),
            "createdDate_quote",
            "desiredContractType_quote",
            "addendumType_quote",
            "_date_modified",
            "_date_added"
 
 ).withColumn("CreatedYear",year(col("createdDate_quote").cast("date"))))

# Merge Quotes 
  quote_header_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/quote_header_g") 

  quote_header_g.alias("t").merge(
        quote_header_update.alias("s"),
        "t.transactionID_quote = s.transactionID_quote" ) \
      .whenNotMatchedInsertAll() \
      .whenMatchedUpdateAll() \
      .execute()


spark.read.format('delta').load('/mnt/datalake_curated/view_migration/quote_header_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/quote_header")

In [0]:
"""
Quote Line
"""


from pyspark.sql.types import StringType , TimestampType , DoubleType , StructType , StructField
from pyspark.sql.functions import col  , year , to_timestamp , to_date , row_number , lit
from pyspark.sql import Window
from delta.tables import *  
if quote_line_v2_list:  
  quoteLineSchema = StructType([
                       StructField("_bs_id", StringType() , False),
                      StructField("_id", StringType() , False),
                    StructField("_sequence_number", StringType() , False),
                    StructField("lineDocNum_line", StringType() , False),
                    StructField("_document_number", StringType() , False),
                    StructField("_parent_doc_number", StringType() , False),
                    StructField("buildingName_line", StringType() , True),
                    StructField("lineType_line", StructType([StructField("value", StringType() , True)])), 
                    StructField("itemDescription_line", StringType() , True),
                    StructField("unitDesignation_line", StringType() , True),
                    StructField("_part_number", StringType() , True),
                    StructField("oracleSerialNumber_line", StringType() , True),
                    StructField("oemSerialNumber_line", StringType() , True),
                    StructField("_date_modified",TimestampType(), True) ,
                    StructField("_date_added",TimestampType(), True)  ,
                    StructField("numOfUnitsOnTheEstimate_line", StringType() , True),
                     StructField("proposalPricePerUnit_line" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                     StructField("proposalPrice_line" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                     StructField("totalCost_line" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                    StructField("totalLaborHours_line" , StringType() , True),
                    StructField("totalTeamLaborHours_line" , StringType() , True),
                    StructField("unitLaborCost_line" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                    StructField("unitMaterialCost_line" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                     StructField("unitTotalLaborHours_line" , StringType() , True),
                      StructField("extraExpenses_line" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                     StructField("grossMarginWithoutOverhead_line" , StringType() , True),
                       StructField("marginAmount_line" ,StructType([StructField("value", DoubleType() , True) , StructField("currency" , StringType() , True)])),
                    StructField("marginPercentage_line" , StringType() , True),
                      StructField("mechanicHours_line" , StringType() , True),
                  StructField("helperHours_line" , StringType() , True),
                  StructField("useTax_line" , StringType() , True),
                  StructField("contractType_line",StringType(),True)
                    #StructField("_config_attr_info", StructType([StructField("value",StringType(), True)]), True)
                             ] 
                             )

  Quote_Line = (
                          spark.read
                             .schema(quoteLineSchema)
                            .option("multiline","true")
                            .option("timestampFormat","MM/dd/yyyy HH:mm:SS a")
                            .json(quote_line_v2_list)
                            .select( 
                          "_bs_id"
                          ,"_id"
                          ,"_sequence_number"
                          ,"lineDocNum_line"
                          ,"_parent_doc_number"
                          ,"buildingName_line"
                          ,col("lineType_line.value").alias("lineType_line") 
                          ,"itemDescription_line"
                          ,"unitDesignation_line"
                          ,"_part_number"
                          ,"oracleSerialNumber_line"
                          ,"oemSerialNumber_line"
                          ,"numOfUnitsOnTheEstimate_line"                            
                         ,col("proposalPricePerUnit_line.value").alias("proposalPricePerUnit_line") 
                              ,col("proposalPrice_line.value").alias("proposalPrice_line") 
                              ,col("totalCost_line.value").alias("totalCost_line") 
                              ,col("unitLaborCost_line.value").alias("unitLaborCost_line") 
                              ,col("unitMaterialCost_line.value").alias("unitMaterialCost_line")
                              ,col("extraExpenses_line.value").alias("extraExpenses_line") 
                              ,col("marginAmount_line.value").alias("marginAmount_line") 
                              ,"totalLaborHours_line"
                              ,"totalTeamLaborHours_line"
                              ,"unitTotalLaborHours_line"
                              ,"grossMarginWithoutOverhead_line"
                              ,"marginPercentage_line"    
                          ,"mechanicHours_line"
                            ,"helperHours_line"
                            ,"useTax_line"
                              ,"contractType_line"
                           ,"_date_modified"
                          ,"_date_added"

                                    )
                       )


  window = Window.partitionBy("_id").orderBy(Quote_Line["_date_modified"].desc())

  quote_line_update = (Quote_Line.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
   .filter("RowNumber == 1")
   .drop("RowNumber")
   .withColumn("CreatedYear",year(col("_date_added").cast("date")))
  )


  # Merge quote line : Update changed quote lines and insert newly created accounts 

  quote_line_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/quote_line_g") 

  quote_line_g.alias("t").merge(
        quote_line_update.alias("s"),
        "t._bs_id = s._bs_id and t._id = s._id ") \
      .whenNotMatchedInsertAll() \
      .whenMatchedUpdateAll() \
      .execute()

# Flag deletes 

#   deletes = (quote_line_g.toDF().alias('t')
#              .join(quote_line_update
#              , ['_bs_id','SourceSystem'] ,how = 'leftsemi')
#              .join(quote_line_update ,
#               ['_bs_id','_id','SourceSystem'] , how ='leftanti'))

#   quote_line_g.alias("t").merge(
#         deletes.alias("s"),
#         "t._bs_id = s._bs_id and t._id = s._id and t.SourceSystem = s.SourceSystem") \
#       .whenMatchedUpdate(set = {'isDeleted':lit(1)}) \
#       .execute()

spark.read.format('delta').load('/mnt/datalake_curated/view_migration/quote_line_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/quote_line")

In [0]:
"""
Quote Line Config & Material Summary
"""

from pyspark.sql.types import StringType , StructField , StructType , TimestampType
from pyspark.sql.functions import split , col , lit , explode, from_csv , first , row_number ,  size , arrays_zip , year ,to_timestamp , to_date 
from pyspark.sql.window import Window
spark.conf.set('spark.sql.caseSensitive', True)
from delta.tables import *  

if quote_line_v2_list:
  quoteLineSchema = StructType([StructField("_bs_id",StringType() , False),
                                StructField("_id",StringType() , False),
                                 StructField("_sequence_number",StringType() , True),
                                StructField("lineDocNum_line",StringType() , True),
                                StructField("_document_number",StringType() , False),
                                StructField("_date_modified",TimestampType(), False) ,
                                StructField("_date_added",TimestampType(), False) ,
                                StructField("_config_attr_info", StructType([StructField("value",StringType(), True)]), True)

                               ])



  Quote_Line_Config = (
                        spark.read
                           .schema(quoteLineSchema)
                          .option("multiline","true")
                          .option("timestampFormat","MM/dd/yyyy HH:mm:SS a")
                          .json(quote_line_v2_list)
                          .select(
                          "_bs_id"
                          ,"_id"
                          ,"_sequence_number"
                         ,"lineDocNum_line"
                            ,"_document_number" 
                          ,"_date_modified"
                           ,"_date_added"
                           ,"_config_attr_info.value"

                          )

                     )

  # Drop Duplicates and retieve the most recent lines 
  window = Window.partitionBy("_id").orderBy(Quote_Line_Config["_date_modified"].desc())

  Quote_Line_Config = (Quote_Line_Config.dropDuplicates().withColumn("RowNumber" , row_number().over(window))
    .filter("RowNumber == 1")
    .filter("value is not null")
    .drop("RowNumber"))

  quote_line_config = (Quote_Line_Config
  .select(split(col("value"), '\\|\\^\\|').alias("ColumnArray"),"_bs_id","_id" ,"_document_number","lineDocNum_line" , "_date_modified" ,"_date_added")
  .select("_bs_id","_id" ,"_document_number","lineDocNum_line","_date_modified" ,"_date_added",explode("ColumnArray"))
  .select("_bs_id","_id","_document_number","lineDocNum_line", "_date_modified" ,"_date_added",split("col","~")[0].alias("colName") , split("col","~")[2].alias("value"))
  .groupBy("_bs_id","_id" ,"_document_number","lineDocNum_line", "_date_modified" ,"_date_added").pivot("colName").agg(first("value"))

       )
  cols =[  "_bs_id",
      "_id",
      "_document_number",
      "lineDocNum_line",
      "_date_modified",
      "_date_added",
       "numberOfCarsInGroupDropdown",
       "existingEquipment",
        "equipmentType",
        "equipmentTypeVIEW",
        "equipmentClass",
        "upspeedOfCar",
        "unitCapacityForRepair",
        "numberOfCarsInGroup",
        "repairNumberOfStops",
        "numberOfFrontOpenings",
        "numberOfRearOpenings",
        "totalOpenings",
        "boardName",
        "controllerBrand",
        "specificControllerModels",
        "controllerType",
        "directReplacement",
        "applicationOfUnit",
        "boardType",
        "purification",
      # Added on 9/3 
        "bACNet_material",
        "cabHeightSelection",
        "compensationType",
        "controllerLocation",
        "counterweightSafety",
        "flooringThickness",
        "pitDepthInFeet",
        "interimMaintenanceTotalPrice",
        "interimMaintenanceMonths",
        "freeServiceTotalPrice",
        "freeServiceMonths",
        "appliedSRTDiscounts",
        "mAXFactoryLeadTime",
        "batch1LeadTime",
        "batch2LeadTime",
        "batch3LeadTime",
        "equipmentClassification",
        "rearDoorTypeAndHand",
        "driveType",
        "machineRoomLocation",
        "jackType",
        "capacity",
        "frontDoorTypeAndHand",
        "frontOpenings",
        "unitNumberOfStops",
        "rearOpenings",
        "futureSpeed",
        "travelInFeet",
        "totalFactoryMaterialCost",
        "otherMaterialCosts",
        "miscellaneousLaborHours",
        "adjustingInspectionNIMHours",
        "jHAHours",
        "totalCabHours",
        "totalCarDoorEquipmentHours",
        "totalCarFixturesHours",
        "totalCarHours",
        "totalControllerTaskHours",
        "totalGovernorHours",
        "totalHallFixturesHours",
        "totalHoistwayDoorEquipmentHours",
        "totalHoistwayEquipmentHours",
        "totalJackHours",
        "totalMachineHours",
        "totalPitEquipmentHours",
        "totalPowerUnitHours",
        "totalStandardJobTasksHours",
        "totalWorkByOthersHours",
        "aGILEDestinationControls",
        "roping",
        "selectedRoping",
        "ropeSize",
        "finalRopeSize",
        "finalRopeQuantity",
        "existingCarWeight",

        "existingMotorHP",
        "existingMotorRPM",
        "mainlineVoltage",
        "overheadInFeet",
        "futureOverheadInFeet",


        "orderType",
        "existingControllerManufacturer",
        "existingControllerModel",
        "existingControllerType", 
    # end 
        "nickname_ServiceUnitsSet",
        "legalID_ServiceUnitsSet",
        "numberOfStops_ServiceUnitsSet",
        "frontOpenings_ServiceUnitsSet",
        "customerNumber_ServiceUnitsSet",
        "branch_ServiceUnitsSet",
        "routenumber_ServiceUnitsSet",
        "siteAddressLine1_ServiceUnitsSet",
        "siteAddressLine2_ServiceUnitsSet",
        "city_ServiceUnitsSet",
        "state_ServiceUnitsSet",
        "zipCode_ServiceUnitsSet",
        "rearOpenings_ServiceUnitsSet",
        "productType_ServiceUnitsSet",
        "applicationOfUnit_ServiceUnitsSet",
        "manufacturer_ServiceUnitsSet",
        "oEMSerialNumber_ServiceUnitsSet",
        "controllerManufacturer_ServiceUnitsSet",
        "controllerModel__ServiceUnitsSet",
        "controllerType_ServiceUnitsSet",
        "criticalUnit_ServiceUnitsSet",
        "pumpMotorOEM_ServiceUnitsSet",
        "driveConfiguration_ServiceUnitsSet",
        "speed_ServiceUnitsSet",
        "periodicTestCovered_ServiceUnitsSet",
        "annualTestCovered_ServiceUnitsSet",
        "lastAnnualInspection_ServiceUnitsSet",
        "lastPeriodicInspectionDate_ServiceUnitsSet",
        "nextSafetyInspectionDate_ServiceUnitsSet",
        "lastCategory5InspectionDate_ServiceUnitsSet",
        "capacity_ServiceUnitsSet",
        "machineRoomLocation_ServiceUnitsSet",
        "pumpMotorType_ServiceUnitsSet",
        "jackType_ServiceUnitsSet",
        "accountName_ServiceUnitsSet",
        "doorComplexity_ServiceUnitsSet",
        "equipmentType_ServiceUnitsSet",
        "motorStarter_ServiceUnitsSet",
        "equipmentCondition_ServiceUnitsSet",
        "oracleSerialNumber_ServiceUnitsSet",
        "equipmentTypeVIEW_ServiceUnitsSet",
        "equipmentClass_ServiceUnitsSet",
        "equipmentManufacturer_ServiceUnitsSet",
        "equipmentModel_ServiceUnitsSet",
        "machineManufacturer_ServiceUnitsSet",
        "customerNumber_UnitsSet",
        "unitNickName_UnitsSet",
        "unitNickName_UnitsSet_baseTab",
        "factoryJob_UnitsSet",
        "equipmentTypeVIEW_UnitsSet",
        "siteAddressLine1_UnitsSet",
        "siteAddressLine2_UnitsSet",
    #     "siteAddressLine3_UnitsSet",
        "oEMSerialNumber_UnitsSet",
        "city_UnitsSet",
        "zipCode",
        "legalID_UnitsSet",
        "oracleSerialNumber_UnitsSet",
        "state_UnitsSet",
        "buildingTypeName_UnitsSet",
    #     "addingOrRemovingOpenings_UnitsSet",
    #     "qDU",
        "frontOpenings_UnitsSet",
        "rearOpenings_UnitsSet",
        "unitNicknameReadonly_UnitsSet",
        "accountName_UnitsSet",
        "selectUnit_UnitsSet",
         
         # add on 11/10/2021
        "additionalOfLandings",
        "additionalFrontOpenings",
        "additionalRearOpenings",
        "futureCapacity",
        "carUpspeed",
        "additionalTravelInFeet",
        "addALanding", #added on 12/07/21 
        "tMMechanicBillingRate", #added 03/08/2022 BS
        "tMTeamBillingRate", #added 03/08/2022 BS
        "tMOvertimeMechanicBillingRate", #added 03/08/2022 BS
        "tMOvertimeTeamBillingRate", #added 03/08/2022 BS
        "tMPremiumMechanicBillingRate", #added 03/08/2022 BS
        "tMPremiumTeamBillingRate" #added 03/08/2022 BS
  ]
  for column in cols:
    if column not in quote_line_config.columns:
      quote_line_config= quote_line_config.withColumn(column,lit(""))
  
  quote_line_config_details_update = quote_line_config.select(*cols)


# Merge quote line config & material Summary : Update changed quotes and insert newly created accounts 

  quote_line_config_details_g = DeltaTable.forPath(spark, "/mnt/datalake_curated/view_migration/quote_line_config_g") 
  
  quote_line_config_details_g.alias("t").merge(
  quote_line_config_details_update.alias("s"),
  "t._bs_id = s._bs_id and t._id = s._id") \
  .whenNotMatchedInsertAll() \
  .whenMatchedUpdateAll() \
  .execute()

  

# Save quote line Config csv
spark.read.format('delta').load('/mnt/datalake_curated/view_migration/quote_line_config_g')\
.coalesce(1)\
.write\
.format('csv')\
.option("header","true")\
.mode("append")\
.save("/mnt/datalake_curated/view_migration/quote_line_config")

In [0]:
dbutils.notebook.exit("Job Completed Successfuly!")

Job Completed Successfuly!