### DELTA COLUMN MAPPING
- Column mapping feature allows Delta table columns and the underlying Parquet file columns to use different names. This enables Delta schema evolution operations such as RENAME COLUMN and DROP COLUMNS on a Delta table without the need to rewrite the underlying Parquet files.
- Column mapping requires the following Delta protocols:

**Reader version 2 or above.**

**Writer version 5 or above.**

For a Delta table with the required protocol versions, you can enable column mapping by setting **delta.columnMapping.mode to name.**

In [0]:
%sql
create table my_db.persons
(
  empid int,
  empname string,
  empcity string,
  empsalary int
) USING Delta;

In [0]:
%sql
insert into my_db.persons(empid,empname,empcity,empsalary)
values (1,'A','Hyderabad',1000),
(2,'B','Chennai',2000),
(3,'C','Mumbai',3000),
(4,'D','Bengaluru',4000);

num_affected_rows,num_inserted_rows
4,4


* Trying to rename a column before enabling the Column mapping

In [0]:
%sql
alter table my_db.persons rename column empsalary to salary;

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-4262887905780457>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-4262887905780457>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;

* Now trying to drop a column before enabling the column mapping

In [0]:
%sql
alter table my_db.persons drop column empsalary;

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-4262887905780459>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-4262887905780459>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;

* In above scenarios , we can see that Delta table column mapping is not enabled so we are unable to rename or drop.

- Enabling the Column mapping for persons table

In [0]:
%sql
  ALTER TABLE my_db.persons SET TBLPROPERTIES (
    'delta.minReaderVersion' = '2',
    'delta.minWriterVersion' = '5',
    'delta.columnMapping.mode' = 'name'
  )

##### NOTE
* After enabling the column mapping , if we are renaming the column names or dropping the columns from table it will not rename or
dropping column from actual raw files.

In [0]:
%sql
desc my_db.persons

col_name,data_type,comment
empid,int,
empname,string,
empcity,string,
empsalary,int,


In [0]:
%sql
alter table my_db.persons rename column empsalary to salary;

In [0]:
%sql
desc my_db.persons

col_name,data_type,comment
empid,int,
empname,string,
empcity,string,
salary,int,


In [0]:
%sql
alter table my_db.persons drop column salary;

In [0]:
%sql
desc  my_db.persons

col_name,data_type,comment
empid,int,
empname,string,
empcity,string,


In [0]:
%sql
select * from my_db.persons

empid,empname,empcity
1,A,Hyderabad
2,B,Chennai
3,C,Mumbai
4,D,Bengaluru


* Checking the underlying data files

In [0]:
%sql
describe detail my_db.persons

format,id,name,description,location,createdAt,lastModified,partitionColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics
delta,89f7496c-fa2c-43e1-937f-681deea1e845,spark_catalog.my_db.persons,,dbfs:/user/hive/warehouse/my_db.db/persons,2025-01-29T12:39:45.112+0000,2025-01-29T12:46:23.000+0000,List(),1,1364,"Map(delta.columnMapping.mode -> name, delta.columnMapping.maxColumnId -> 4)",2,5,"List(appendOnly, changeDataFeed, checkConstraints, columnMapping, generatedColumns, invariants)",Map()


In [0]:
%fs

cp /user/hive/warehouse/my_db.db/persons/part-00000-9efb8d4f-bbdb-42d7-b8e1-575188df8e32-c000.snappy.parquet /FileStore/part-00000-9efb8d4f-bbdb-42d7-b8e1-575188df8e32-c000.snappy.parquet

In [0]:
%sql
select * from 
parquet.`/FileStore/part-00000-9efb8d4f-bbdb-42d7-b8e1-575188df8e32-c000.snappy.parquet`

empid,empname,empcity,empsalary
1,A,Hyderabad,1000
2,B,Chennai,2000
3,C,Mumbai,3000
4,D,Bengaluru,4000


**we can see that the underlying parquet file has no impact with inbuilt column names and data**

- If we want to change the underlying column names and data as well then we can use REORG Command.

In [0]:
%sql
REORG TABLE my_db.persons apply(purge)

path,metrics
dbfs:/user/hive/warehouse/my_db.db/persons,"List(1, 1, List(1243, 1243, 1243.0, 1, 1243), List(1364, 1364, 1364.0, 1, 1364), 0, null, 1, 1, 0, true, 0, 0, 1738155498452, 1738155507998, 8, 1, null, List(0, 0), 3, 3, 548)"


In [0]:
%fs
ls dbfs:/user/hive/warehouse/my_db.db/persons

path,name,size,modificationTime
dbfs:/user/hive/warehouse/my_db.db/persons/PE/,PE/,0,0
dbfs:/user/hive/warehouse/my_db.db/persons/_delta_log/,_delta_log/,0,0
dbfs:/user/hive/warehouse/my_db.db/persons/part-00000-9efb8d4f-bbdb-42d7-b8e1-575188df8e32-c000.snappy.parquet,part-00000-9efb8d4f-bbdb-42d7-b8e1-575188df8e32-c000.snappy.parquet,1364,1738154455000


* It created new folder under persons directory where it will create the updated file after deleting and renaming the columns

In [0]:
%sql
describe history my_db.persons

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
5,2025-01-29T12:58:25.000+0000,6821501072026142,rockyrams1998@gmail.com,REORG,"Map(predicate -> [], applyPurge -> true, batchId -> 0)",,List(4262887905780450),0129-120610-n6g9lwij,4.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1364, p25FileSize -> 1243, numDeletionVectorsRemoved -> 0, minFileSize -> 1243, numAddedFiles -> 1, maxFileSize -> 1243, p75FileSize -> 1243, p50FileSize -> 1243, numAddedBytes -> 1243)",,Databricks-Runtime/12.2.x-scala2.12
4,2025-01-29T12:46:23.000+0000,6821501072026142,rockyrams1998@gmail.com,DROP COLUMNS,"Map(columns -> [""salary""])",,List(4262887905780450),0129-120610-n6g9lwij,3.0,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12
3,2025-01-29T12:45:58.000+0000,6821501072026142,rockyrams1998@gmail.com,RENAME COLUMN,"Map(oldColumnPath -> empsalary, newColumnPath -> salary)",,List(4262887905780450),0129-120610-n6g9lwij,2.0,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12
2,2025-01-29T12:44:14.000+0000,6821501072026142,rockyrams1998@gmail.com,SET TBLPROPERTIES,"Map(properties -> {""delta.minReaderVersion"":""2"",""delta.minWriterVersion"":""5"",""delta.columnMapping.mode"":""name""})",,List(4262887905780450),0129-120610-n6g9lwij,1.0,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12
1,2025-01-29T12:40:55.000+0000,6821501072026142,rockyrams1998@gmail.com,WRITE,"Map(mode -> Append, partitionBy -> [])",,List(4262887905780450),0129-120610-n6g9lwij,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 4, numOutputBytes -> 1364)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-01-29T12:39:46.000+0000,6821501072026142,rockyrams1998@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(4262887905780450),0129-120610-n6g9lwij,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


In [0]:
%fs
ls dbfs:/user/hive/warehouse/my_db.db/persons/PE/

path,name,size,modificationTime
dbfs:/user/hive/warehouse/my_db.db/persons/PE/part-00000-a3df0c4f-e292-4dc9-b87a-a678c2804e79-c000.snappy.parquet,part-00000-a3df0c4f-e292-4dc9-b87a-a678c2804e79-c000.snappy.parquet,1243,1738155505000


In [0]:
%fs
cp dbfs:/user/hive/warehouse/my_db.db/persons/PE/part-00000-a3df0c4f-e292-4dc9-b87a-a678c2804e79-c000.snappy.parquet /FileStore/data/part-00000-a3df0c4f-e292-4dc9-b87a-a678c2804e79-c000.snappy.parquet

In [0]:
%sql
select * from parquet.`/FileStore/data/part-00000-a3df0c4f-e292-4dc9-b87a-a678c2804e79-c000.snappy.parquet`

empid,empname,empcity
1,A,Hyderabad
2,B,Chennai
3,C,Mumbai
4,D,Bengaluru
