## Overview

This notebook will show you how to create and query a table or DataFrame that you uploaded to DBFS. [DBFS](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) is a Databricks File System that allows you to store data for querying inside of Databricks. This notebook assumes that you have a file already inside of DBFS that you would like to read from.

This notebook is written in **Python** so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` syntax. Python, Scala, SQL, and R are all supported.

In [2]:
# File location and type
file_location = "/FileStore/tables/claims.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

  #

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("sep", delimiter) \
  .option("header", first_row_is_header) \
  .load(file_location)

display(df)

_c0,agriculturestructureindicator,asofdate,basefloodelevation,basementenclosurecrawlspacetype,reportedcity,condominiumindicator,policycount,countycode,crsdiscount,dateofloss,elevatedbuildingindicator,elevationcertificateindicator,elevationdifference,censustract,floodzone,houseworship,latitude,locationofcontents,longitude,lowestadjacentgrade,lowestfloorelevation,numberoffloorsintheinsuredbuilding,nonprofitindicator,obstructiontype,occupancytype,originalconstructiondate,originalnbdate,amountpaidonbuildingclaim,amountpaidoncontentsclaim,amountpaidonincreasedcostofcomplianceclaim,postfirmconstructionindicator,ratemethod,smallbusinessindicatorbuilding,state,totalbuildinginsurancecoverage,totalcontentsinsurancecoverage,yearofloss,reportedzipcode,primaryresidence
0,,2019-05-31T00:00:00.000+0000,,1,MOUNT KISCO,N,1,36119,0.0,2001-06-16T00:00:00.000+0000,N,,999,36119013000.0,X,,41.2,,-73.7,,,3,,10.0,1,1916-01-01T00:00:00.000+0000,2000-02-25T00:00:00.000+0000,0.0,0.0,,N,7,,NY,20000,5000,2001,10549,Y
1,,2019-05-31T00:00:00.000+0000,,0,ROCKY POINT,N,1,36103,0.0,2013-06-07T00:00:00.000+0000,N,,999,36103158407.0,X,,41.0,,-73.0,,,2,,,1,1912-01-01T00:00:00.000+0000,1998-09-03T00:00:00.000+0000,0.0,0.0,,N,7,,NY,250000,100000,2013,11778,Y
2,,2019-05-31T00:00:00.000+0000,7.0,0,"DAVIS PARK, FI",N,1,36103,0.0,2012-10-29T00:00:00.000+0000,Y,,4,36103159510.0,AE,,40.7,,-73.0,,11.0,1,,10.0,1,1965-01-01T00:00:00.000+0000,1990-12-09T00:00:00.000+0000,5503.73,0.0,0.0,N,1,,NY,250000,34300,2012,11772,N
3,,2019-05-31T00:00:00.000+0000,12.0,0,"DAVIS PARK, FI",N,1,36103,0.0,2012-10-29T00:00:00.000+0000,Y,,8,36103159510.0,VE,,40.7,,-73.0,,19.6,1,,10.0,1,1953-07-01T00:00:00.000+0000,1998-01-25T00:00:00.000+0000,250000.0,31100.0,0.0,N,A,,NY,250000,31100,2012,11772,N
4,,2019-05-31T00:00:00.000+0000,12.0,0,POINT O WOODS,N,1,36103,0.0,2011-08-28T00:00:00.000+0000,Y,,2,36103146002.0,VE,,40.8,,-73.2,,14.2,2,,10.0,1,1987-09-30T00:00:00.000+0000,1987-12-28T00:00:00.000+0000,6901.48,0.0,0.0,Y,A,,NY,250000,100000,2011,11706,N
5,,2019-05-31T00:00:00.000+0000,12.0,0,POINT O WOODS,N,1,36103,0.0,2012-10-29T00:00:00.000+0000,Y,,2,36103146002.0,VE,,40.8,,-73.2,,14.2,2,,10.0,1,1987-09-30T00:00:00.000+0000,1987-12-28T00:00:00.000+0000,64147.69,0.0,0.0,Y,A,,NY,250000,100000,2012,11706,N
6,,2019-05-31T00:00:00.000+0000,9.0,2,WESTHAMPTON BEA,N,1,36103,0.05,2005-10-14T00:00:00.000+0000,N,,-4,36103190502.0,AE,,40.8,,-72.6,9.2,4.9,3,,10.0,1,1978-01-01T00:00:00.000+0000,1994-04-24T00:00:00.000+0000,0.0,,,Y,2,,NY,50000,0,2005,11978,Y
7,,2019-05-31T00:00:00.000+0000,9.0,2,WESTHAMPTON BEA,N,1,36103,0.0,2011-09-01T00:00:00.000+0000,N,,-4,36103190502.0,AE,,40.8,,-72.6,9.2,4.9,3,,,1,1978-01-01T00:00:00.000+0000,1994-04-24T00:00:00.000+0000,0.0,,,Y,2,,NY,250000,0,2011,11978,Y
8,,2019-05-31T00:00:00.000+0000,,3,FREEPORT,N,1,36059,0.15,2011-08-28T00:00:00.000+0000,Y,,999,36059414501.0,AE,,40.6,,-73.6,,,3,,50.0,1,1951-01-01T00:00:00.000+0000,2009-11-30T00:00:00.000+0000,15998.04,,0.0,N,1,,NY,50000,0,2011,11520,Y
9,,2019-05-31T00:00:00.000+0000,,3,FREEPORT,N,1,36059,0.15,2012-10-29T00:00:00.000+0000,Y,,999,36059414501.0,AE,,40.6,,-73.6,,,3,,50.0,1,1951-01-01T00:00:00.000+0000,2009-11-30T00:00:00.000+0000,50000.0,,0.0,N,1,,NY,50000,0,2012,11520,Y


In [3]:
# Create a view or table

temp_table_name = "claims_csv"

df.createOrReplaceTempView(temp_table_name)

In [4]:
%sql

/* Query the created temp table in a SQL cell */

select * from `claims_csv` limit 10

_c0,agriculturestructureindicator,asofdate,basefloodelevation,basementenclosurecrawlspacetype,reportedcity,condominiumindicator,policycount,countycode,crsdiscount,dateofloss,elevatedbuildingindicator,elevationcertificateindicator,elevationdifference,censustract,floodzone,houseworship,latitude,locationofcontents,longitude,lowestadjacentgrade,lowestfloorelevation,numberoffloorsintheinsuredbuilding,nonprofitindicator,obstructiontype,occupancytype,originalconstructiondate,originalnbdate,amountpaidonbuildingclaim,amountpaidoncontentsclaim,amountpaidonincreasedcostofcomplianceclaim,postfirmconstructionindicator,ratemethod,smallbusinessindicatorbuilding,state,totalbuildinginsurancecoverage,totalcontentsinsurancecoverage,yearofloss,reportedzipcode,primaryresidence
0,,2019-05-31T00:00:00.000+0000,,1,MOUNT KISCO,N,1,36119,0.0,2001-06-16T00:00:00.000+0000,N,,999,36119013000,X,,41.2,,-73.7,,,3,,10.0,1,1916-01-01T00:00:00.000+0000,2000-02-25T00:00:00.000+0000,0.0,0.0,,N,7,,NY,20000,5000,2001,10549,Y
1,,2019-05-31T00:00:00.000+0000,,0,ROCKY POINT,N,1,36103,0.0,2013-06-07T00:00:00.000+0000,N,,999,36103158407,X,,41.0,,-73.0,,,2,,,1,1912-01-01T00:00:00.000+0000,1998-09-03T00:00:00.000+0000,0.0,0.0,,N,7,,NY,250000,100000,2013,11778,Y
2,,2019-05-31T00:00:00.000+0000,7.0,0,"DAVIS PARK, FI",N,1,36103,0.0,2012-10-29T00:00:00.000+0000,Y,,4,36103159510,AE,,40.7,,-73.0,,11.0,1,,10.0,1,1965-01-01T00:00:00.000+0000,1990-12-09T00:00:00.000+0000,5503.73,0.0,0.0,N,1,,NY,250000,34300,2012,11772,N
3,,2019-05-31T00:00:00.000+0000,12.0,0,"DAVIS PARK, FI",N,1,36103,0.0,2012-10-29T00:00:00.000+0000,Y,,8,36103159510,VE,,40.7,,-73.0,,19.6,1,,10.0,1,1953-07-01T00:00:00.000+0000,1998-01-25T00:00:00.000+0000,250000.0,31100.0,0.0,N,A,,NY,250000,31100,2012,11772,N
4,,2019-05-31T00:00:00.000+0000,12.0,0,POINT O WOODS,N,1,36103,0.0,2011-08-28T00:00:00.000+0000,Y,,2,36103146002,VE,,40.8,,-73.2,,14.2,2,,10.0,1,1987-09-30T00:00:00.000+0000,1987-12-28T00:00:00.000+0000,6901.48,0.0,0.0,Y,A,,NY,250000,100000,2011,11706,N
5,,2019-05-31T00:00:00.000+0000,12.0,0,POINT O WOODS,N,1,36103,0.0,2012-10-29T00:00:00.000+0000,Y,,2,36103146002,VE,,40.8,,-73.2,,14.2,2,,10.0,1,1987-09-30T00:00:00.000+0000,1987-12-28T00:00:00.000+0000,64147.69,0.0,0.0,Y,A,,NY,250000,100000,2012,11706,N
6,,2019-05-31T00:00:00.000+0000,9.0,2,WESTHAMPTON BEA,N,1,36103,0.05,2005-10-14T00:00:00.000+0000,N,,-4,36103190502,AE,,40.8,,-72.6,9.2,4.9,3,,10.0,1,1978-01-01T00:00:00.000+0000,1994-04-24T00:00:00.000+0000,0.0,,,Y,2,,NY,50000,0,2005,11978,Y
7,,2019-05-31T00:00:00.000+0000,9.0,2,WESTHAMPTON BEA,N,1,36103,0.0,2011-09-01T00:00:00.000+0000,N,,-4,36103190502,AE,,40.8,,-72.6,9.2,4.9,3,,,1,1978-01-01T00:00:00.000+0000,1994-04-24T00:00:00.000+0000,0.0,,,Y,2,,NY,250000,0,2011,11978,Y
8,,2019-05-31T00:00:00.000+0000,,3,FREEPORT,N,1,36059,0.15,2011-08-28T00:00:00.000+0000,Y,,999,36059414501,AE,,40.6,,-73.6,,,3,,50.0,1,1951-01-01T00:00:00.000+0000,2009-11-30T00:00:00.000+0000,15998.04,,0.0,N,1,,NY,50000,0,2011,11520,Y
9,,2019-05-31T00:00:00.000+0000,,3,FREEPORT,N,1,36059,0.15,2012-10-29T00:00:00.000+0000,Y,,999,36059414501,AE,,40.6,,-73.6,,,3,,50.0,1,1951-01-01T00:00:00.000+0000,2009-11-30T00:00:00.000+0000,50000.0,,0.0,N,1,,NY,50000,0,2012,11520,Y


In [5]:
# With this registered as a temp view, it will only be available to this particular notebook. If you'd like other users to be able to query this table, you can also create a table from the DataFrame.
# Once saved, this table will persist across cluster restarts as well as allow various users across different notebooks to query this data.
# To do so, choose your table name and uncomment the bottom line.

permanent_table_name = "claims_csv"

df.write.format("parquet").saveAsTable(permanent_table_name)