## Overview

This notebook will show you how to create and query a table or DataFrame that you uploaded to DBFS. [DBFS](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) is a Databricks File System that allows you to store data for querying inside of Databricks. This notebook assumes that you have a file already inside of DBFS that you would like to read from.

This notebook is written in **Python** so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` syntax. Python, Scala, SQL, and R are all supported.

In [2]:
# File location and type
file_location = "/FileStore/tables/policies.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

display(df)

_c0,agriculturestructureindicator,basefloodelevation,basementenclosurecrawlspacetype,cancellationdateoffloodpolicy,censustract,condominiumindicator,construction,countycode,crsdiscount,deductibleamountinbuildingcoverage,deductibleamountincontentscoverage,elevatedbuildingindicator,elevationcertificateindicator,elevationdifference,federalpolicyfee,floodzone,hfiaasurcharge,houseofworshipindicator,latitude,locationofcontents,longitude,lowestadjacentgrade,lowestfloorelevation,nonprofitindicator,numberoffloorsininsuredbuilding,obstructiontype,occupancytype,originalconstructiondate,originalnbdate,policycost,policycount,policyeffectivedate,policyterminationdate,policytermindicator,postfirmconstructionindicator,primaryresidenceindicator,propertystate,reportedzipcode,ratemethod,regularemergencyprogramindicator,reportedcity,smallbusinessindicatorbuilding,totalbuildinginsurancecoverage,totalcontentsinsurancecoverage,totalinsurancepremiumofthepolicy
0,,,0,,36103159510.0,N,N,36103,0.0,1,,N,,999,35,VE,0,,40.7,,-73.1,,,,1,,1,1925-01-01T00:00:00.000+0000,2004-04-29T00:00:00.000+0000,3290,1,2009-04-29T00:00:00.000+0000,2010-04-29T00:00:00.000+0000,1,N,N,NY,11782,1,R,CHERRY GROVE,,250000,0,3255
1,,,2,,36059405200.0,N,N,36059,0.0,0,0,N,,999,13,X,0,,40.7,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.7,,,,3,,1,1989-02-01T00:00:00.000+0000,2006-07-05T00:00:00.000+0000,388,1,2009-07-05T00:00:00.000+0000,2010-07-05T00:00:00.000+0000,1,Y,N,NY,11003,7,R,ELMONT,,250000,100000,375
2,,,0,,36055015000.0,N,N,36055,0.0,0,0,N,1.0,999,0,C,0,,43.1,Lowest floor above ground level and higher floors (No basement/enclosure/crawlspace/subgrade crawlspace),-77.9,,,,2,,1,1950-07-01T00:00:00.000+0000,2007-02-04T00:00:00.000+0000,269,1,2009-01-02T00:00:00.000+0000,2009-02-04T00:00:00.000+0000,1,N,Y,NY,14428,7,R,CHURCHVILLE,,250000,100000,258
3,,,0,,36055015000.0,N,N,36055,0.0,0,0,N,1.0,999,13,C,0,,43.1,Lowest floor above ground level and higher floors (No basement/enclosure/crawlspace/subgrade crawlspace),-77.9,,,,2,,1,1950-07-01T00:00:00.000+0000,2007-02-04T00:00:00.000+0000,348,1,2009-02-04T00:00:00.000+0000,2010-02-04T00:00:00.000+0000,1,N,Y,NY,14428,7,R,CHURCHVILLE,,250000,100000,335
4,,7.0,0,,36059416100.0,N,N,36059,0.0,0,0,N,3.0,3,35,A04,0,,40.7,,-73.5,,10.4,,2,,1,1991-06-01T00:00:00.000+0000,1991-09-20T00:00:00.000+0000,317,1,2009-09-20T00:00:00.000+0000,2010-09-20T00:00:00.000+0000,1,Y,Y,NY,11783,1,R,SEAFORD,,247500,0,282
5,,,1,,36059415401.0,N,N,36059,0.0,0,0,N,1.0,999,13,X,0,,40.7,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.5,,,,4,,1,1958-01-01T00:00:00.000+0000,2000-05-14T00:00:00.000+0000,388,1,2009-05-14T00:00:00.000+0000,2010-05-14T00:00:00.000+0000,1,N,Y,NY,11710,7,R,BELLMORE,,250000,100000,375
6,,,2,,36103190707.0,N,N,36103,0.0,0,0,N,,999,13,X,0,,41.0,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-72.3,,,,2,,1,1973-06-01T00:00:00.000+0000,2001-03-30T00:00:00.000+0000,144,1,2009-03-30T00:00:00.000+0000,2010-03-30T00:00:00.000+0000,1,N,Y,NY,11963,7,R,SAG HARBOR,,20000,8000,131
7,,,2,2010-03-02T00:00:00.000+0000,36047062800.0,N,N,36047,0.0,0,0,N,1.0,999,13,C,0,,40.6,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.9,,,,3,,1,1929-05-14T00:00:00.000+0000,2006-06-17T00:00:00.000+0000,388,1,2009-06-17T00:00:00.000+0000,2010-03-02T00:00:00.000+0000,1,N,Y,NY,11229,7,R,BROOKLYN,,250000,100000,375
8,,,1,2009-11-10T00:00:00.000+0000,36059413400.0,N,N,36059,0.0,1,1,N,1.0,999,13,X,0,,40.6,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.6,,,,4,,1,1962-01-01T00:00:00.000+0000,2009-10-10T00:00:00.000+0000,388,1,2009-10-10T00:00:00.000+0000,2009-11-10T00:00:00.000+0000,1,N,Y,NY,11572,7,R,OCEANSIDE,,250000,100000,375
9,,,1,,36119005902.0,N,N,36119,0.0,1,1,N,,999,35,A08,0,,40.9,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.8,,,,3,10.0,1,1966-07-19T00:00:00.000+0000,2001-03-28T00:00:00.000+0000,2081,1,2009-03-28T00:00:00.000+0000,2010-03-28T00:00:00.000+0000,1,N,Y,NY,10801,1,R,NEW ROCHELLE,,248400,1500,2046


In [3]:
# Create a view or table

temp_table_name = "policies_csv"

df.createOrReplaceTempView(temp_table_name)

In [4]:
%sql

/* Query the created temp table in a SQL cell */

select * from `policies_csv`

_c0,agriculturestructureindicator,basefloodelevation,basementenclosurecrawlspacetype,cancellationdateoffloodpolicy,censustract,condominiumindicator,construction,countycode,crsdiscount,deductibleamountinbuildingcoverage,deductibleamountincontentscoverage,elevatedbuildingindicator,elevationcertificateindicator,elevationdifference,federalpolicyfee,floodzone,hfiaasurcharge,houseofworshipindicator,latitude,locationofcontents,longitude,lowestadjacentgrade,lowestfloorelevation,nonprofitindicator,numberoffloorsininsuredbuilding,obstructiontype,occupancytype,originalconstructiondate,originalnbdate,policycost,policycount,policyeffectivedate,policyterminationdate,policytermindicator,postfirmconstructionindicator,primaryresidenceindicator,propertystate,reportedzipcode,ratemethod,regularemergencyprogramindicator,reportedcity,smallbusinessindicatorbuilding,totalbuildinginsurancecoverage,totalcontentsinsurancecoverage,totalinsurancepremiumofthepolicy
0,,,0,,36103159510.0,N,N,36103,0.0,1,,N,,999,35,VE,0,,40.7,,-73.1,,,,1,,1,1925-01-01T00:00:00.000+0000,2004-04-29T00:00:00.000+0000,3290,1,2009-04-29T00:00:00.000+0000,2010-04-29T00:00:00.000+0000,1,N,N,NY,11782,1,R,CHERRY GROVE,,250000,0,3255
1,,,2,,36059405200.0,N,N,36059,0.0,0,0,N,,999,13,X,0,,40.7,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.7,,,,3,,1,1989-02-01T00:00:00.000+0000,2006-07-05T00:00:00.000+0000,388,1,2009-07-05T00:00:00.000+0000,2010-07-05T00:00:00.000+0000,1,Y,N,NY,11003,7,R,ELMONT,,250000,100000,375
2,,,0,,36055015000.0,N,N,36055,0.0,0,0,N,1.0,999,0,C,0,,43.1,Lowest floor above ground level and higher floors (No basement/enclosure/crawlspace/subgrade crawlspace),-77.9,,,,2,,1,1950-07-01T00:00:00.000+0000,2007-02-04T00:00:00.000+0000,269,1,2009-01-02T00:00:00.000+0000,2009-02-04T00:00:00.000+0000,1,N,Y,NY,14428,7,R,CHURCHVILLE,,250000,100000,258
3,,,0,,36055015000.0,N,N,36055,0.0,0,0,N,1.0,999,13,C,0,,43.1,Lowest floor above ground level and higher floors (No basement/enclosure/crawlspace/subgrade crawlspace),-77.9,,,,2,,1,1950-07-01T00:00:00.000+0000,2007-02-04T00:00:00.000+0000,348,1,2009-02-04T00:00:00.000+0000,2010-02-04T00:00:00.000+0000,1,N,Y,NY,14428,7,R,CHURCHVILLE,,250000,100000,335
4,,7.0,0,,36059416100.0,N,N,36059,0.0,0,0,N,3.0,3,35,A04,0,,40.7,,-73.5,,10.4,,2,,1,1991-06-01T00:00:00.000+0000,1991-09-20T00:00:00.000+0000,317,1,2009-09-20T00:00:00.000+0000,2010-09-20T00:00:00.000+0000,1,Y,Y,NY,11783,1,R,SEAFORD,,247500,0,282
5,,,1,,36059415401.0,N,N,36059,0.0,0,0,N,1.0,999,13,X,0,,40.7,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.5,,,,4,,1,1958-01-01T00:00:00.000+0000,2000-05-14T00:00:00.000+0000,388,1,2009-05-14T00:00:00.000+0000,2010-05-14T00:00:00.000+0000,1,N,Y,NY,11710,7,R,BELLMORE,,250000,100000,375
6,,,2,,36103190707.0,N,N,36103,0.0,0,0,N,,999,13,X,0,,41.0,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-72.3,,,,2,,1,1973-06-01T00:00:00.000+0000,2001-03-30T00:00:00.000+0000,144,1,2009-03-30T00:00:00.000+0000,2010-03-30T00:00:00.000+0000,1,N,Y,NY,11963,7,R,SAG HARBOR,,20000,8000,131
7,,,2,2010-03-02T00:00:00.000+0000,36047062800.0,N,N,36047,0.0,0,0,N,1.0,999,13,C,0,,40.6,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.9,,,,3,,1,1929-05-14T00:00:00.000+0000,2006-06-17T00:00:00.000+0000,388,1,2009-06-17T00:00:00.000+0000,2010-03-02T00:00:00.000+0000,1,N,Y,NY,11229,7,R,BROOKLYN,,250000,100000,375
8,,,1,2009-11-10T00:00:00.000+0000,36059413400.0,N,N,36059,0.0,1,1,N,1.0,999,13,X,0,,40.6,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.6,,,,4,,1,1962-01-01T00:00:00.000+0000,2009-10-10T00:00:00.000+0000,388,1,2009-10-10T00:00:00.000+0000,2009-11-10T00:00:00.000+0000,1,N,Y,NY,11572,7,R,OCEANSIDE,,250000,100000,375
9,,,1,,36119005902.0,N,N,36119,0.0,1,1,N,,999,35,A08,0,,40.9,Basement/Enclosure/Crawlspace/Subgrade Crawlspace and above,-73.8,,,,3,10.0,1,1966-07-19T00:00:00.000+0000,2001-03-28T00:00:00.000+0000,2081,1,2009-03-28T00:00:00.000+0000,2010-03-28T00:00:00.000+0000,1,N,Y,NY,10801,1,R,NEW ROCHELLE,,248400,1500,2046


In [5]:
# With this registered as a temp view, it will only be available to this particular notebook. If you'd like other users to be able to query this table, you can also create a table from the DataFrame.
# Once saved, this table will persist across cluster restarts as well as allow various users across different notebooks to query this data.
# To do so, choose your table name and uncomment the bottom line.

permanent_table_name = "policies_csv"

df.write.format("parquet").saveAsTable(permanent_table_name)