In [1]:
#r "nuget: Microsoft.Spark,2.1.0"

Installed package Microsoft.Spark version 2.1.0

Loading extensions from `Microsoft.Data.Analysis.Interactive.dll`

In [2]:
open Microsoft.Spark.Sql

In [3]:
let sparkSession = SparkSession.Builder().AppName("Street Counter F#").GetOrCreate()

[2022-03-24T13:20:19.4678215Z] [ab20b024e23c] [Info] [ConfigurationService] Using port 5567 for connection.
[2022-03-24T13:20:19.4702832Z] [ab20b024e23c] [Info] [JvmBridge] JvMBridge port is 5567
[2022-03-24T13:20:19.4708189Z] [ab20b024e23c] [Info] [JvmBridge] The number of JVM backend thread is set to 10. The max number of concurrent sockets in JvmBridge is set to 7.


In [4]:
let dfCsv =   sparkSession
                    .Read()
                    .Option("delimiter", ";")
                    .Schema("WOJ string ,POW string ,GMI string ,RODZ_GMI string , " +
                            "SYM string , SYM_UL string , " +
                            "CECHA string , NAZWA_1 string ,NAZWA_2 string , " +
                            "STAN_NA string")
                    .Csv("streets.csv")

In [5]:
let dataIn = dfCsv
                  .WithColumn("STREET", Functions.ConcatWs(" ", dfCsv.["CECHA"], dfCsv.["NAZWA_1"], dfCsv.["NAZWA_2"]))

In [6]:
let dataGroup = dataIn
                    .Select("STREET")
                    .GroupBy("STREET")
                    .Count()
                    .WithColumnRenamed("count","COUNT")

In [7]:
let dataOut = dataGroup
                    .OrderBy(dataGroup.["COUNT"]
                    .Desc()
                     )

In [8]:
dataOut.Show()

+-------------+-----+
|       STREET|COUNT|
+-------------+-----+
|    ul. Leśna| 3347|
|    ul. Polna| 3311|
|ul. Słoneczna| 2831|
|   ul. Krótka| 2433|
| ul. Ogrodowa| 2328|
|  ul. Szkolna| 2313|
| ul. Brzozowa| 1870|
|   ul. Lipowa| 1857|
|  ul. Sosnowa| 1769|
| ul. Kwiatowa| 1706|
|   ul. Łąkowa| 1678|
| ul. Akacjowa| 1528|
|   ul. Dębowa| 1440|
| ul. Spokojna| 1408|
|ul. Spacerowa| 1399|
|ul. Kościelna| 1397|
|  ul. Zielona| 1394|
|  ul. Parkowa| 1367|
| ul. Sportowa| 1330|
|    ul. Cicha| 1314|
+-------------+-----+
only showing top 20 rows



In [9]:
#r "nuget:XPlot.Plotly,4.0.1"

Installed package XPlot.Plotly version 4.0.1

In [10]:
#r "nuget:XPlot.Plotly.Interactive,4.0.1"

Installed package XPlot.Plotly.Interactive version 4.0.1

Loading extensions from `XPlot.Plotly.Interactive.dll`

Configuring PowerShell Kernel for XPlot.Plotly integration.

Installed support for XPlot.Plotly.

In [11]:
let first = dataOut.Take(10)

In [12]:
let streets = first |> Seq.map(fun a->a.GetAs<string>("STREET"))

In [13]:
let counts = first |> Seq.map(fun a->a.GetAs<int>("COUNT"))

In [14]:
open XPlot.Plotly

In [15]:
let bar = Bar(x=streets , y=counts)

In [16]:
let chartBar = bar |> Chart.Plot

In [17]:
display(chartBar)

In [18]:
let pie = Pie(values=counts,labels=streets)

In [19]:
let chartPie =  pie |> Chart.Plot

In [20]:
display(chartPie)