In [1]:
using IterTools
using DataFrames
using LibPQ
using BenchmarkTools
using CSV
using Tables

## Download Test Data

In [5]:
download("https://nyc-tlc.s3.amazonaws.com/trip+data/green_tripdata_2019-12.csv", 
    "test_data.csv")

"test_data.csv"

## Create Connection

In [2]:
con_str = "postgres://postgres:python_tutorial_5432@192.168.1.24:15432/postgres"

"postgres://postgres:python_tutorial_5432@192.168.1.24:15432/postgres"

In [3]:
con = LibPQ.Connection(con_str)

PostgreSQL connection (CONNECTION_OK) with parameters:
  user = postgres
  password = ********************
  dbname = postgres
  host = 192.168.1.24
  port = 15432
  client_encoding = UTF8
  options = -c DateStyle=ISO,YMD -c IntervalStyle=iso_8601 -c TimeZone=UTC
  application_name = LibPQ.jl
  sslmode = prefer
  sslcompression = 0
  gssencmode = disable
  target_session_attrs = any

## Create Test Table

In [88]:
execute(con, "drop table test_df")

PostgreSQL result

In [90]:
sql = """
create table test_df (
    trip_reason varchar,
    lpep_pickup_datetime timestamp, 
    passenger_count int, 
    trip_distance numeric
);"""
execute(con, sql)

PostgreSQL result

## Create Test Data

In [4]:
df = CSV.File("test_data.csv") |> DataFrame;

In [5]:
df_sample = df[:, [:lpep_pickup_datetime, :passenger_count, :trip_distance]];
#dropmissing!(df_sample);

In [6]:
df_sample[!, :trip_reason] .= "for, more; \$, and fun";

In [7]:
names(df_sample)

4-element Array{Symbol,1}:
 :lpep_pickup_datetime
 :passenger_count     
 :trip_distance       
 :trip_reason         

## Definition of Upload Function

In [8]:
_prepare_field(x:: Any) = x
_prepare_field(x:: Missing) = ""
_prepare_field(x:: AbstractString) = string("\"", reduce(replace, (","=>"\\,", "\""=>"\\\"",), init=x), "\"")

_prepare_field (generic function with 3 methods)

In [9]:
"""
    load_by_copy!(table, con:: LibPQ.Connection, tablename:: AbstractString)


Only for illustration purposes. Does not support `,` inside columns.
"""
function load_by_copy!(table, con:: LibPQ.Connection, tablename:: AbstractString)
    row_names = join(string.(Tables.columnnames(table)), ",")
    row_strings = imap(Tables.eachrow(table)) do row
        join((_prepare_field(x) for x in row), ",")*"\n"
    end
    copyin = LibPQ.CopyIn("COPY $tablename ($row_names) FROM STDIN (FORMAT CSV);", row_strings)
    execute(con, copyin)
end      

load_by_copy!

## Test

In [10]:
execute(con, "delete from test_df;")

PostgreSQL result

In [13]:
@time load_by_copy!(df_sample, con, "test_df")

  6.831418 seconds (25.50 M allocations: 1007.936 MiB, 2.24% gc time)


PostgreSQL result

In [14]:
@time load_by_copy!(df_sample[!, reverse(names(df_sample))], con, "test_df")

  6.775907 seconds (25.50 M allocations: 1021.690 MiB, 2.62% gc time)


PostgreSQL result

In [15]:
execute(con, "select count(*) from test_df") |> DataFrame

Unnamed: 0_level_0,count
Unnamed: 0_level_1,Int64⍰
1,1802508


## Comparison to SQL Insert

In [16]:
@time begin
    execute(con, "BEGIN;")
    LibPQ.load!(df_sample, con,
        """INSERT INTO test_df (lpep_pickup_datetime, passenger_count, 
            trip_distance, trip_reason) VALUES (\$1, \$2, \$3, \$4);""")
    execute(con, "COMMIT;")
end

 97.046209 seconds (38.33 M allocations: 2.109 GiB, 0.94% gc time)


PostgreSQL result

In [17]:
execute(con, "select count(*) from test_df") |> DataFrame

Unnamed: 0_level_0,count
Unnamed: 0_level_1,Int64⍰
1,2253135


The `load_by_copy!` method is more than 10 times faster for inserting data into PostgreSQL!

## Close Connection

In [163]:
close(con)

## Scratch

In [140]:
?replace

search: [0m[1mr[22m[0m[1me[22m[0m[1mp[22m[0m[1ml[22m[0m[1ma[22m[0m[1mc[22m[0m[1me[22m [0m[1mr[22m[0m[1me[22m[0m[1mp[22m[0m[1ml[22m[0m[1ma[22m[0m[1mc[22m[0m[1me[22m! [0m[1mr[22m[0m[1me[22mdis[0m[1mp[22m[0m[1ml[22m[0m[1ma[22my



```
replace(A, old_new::Pair...; [count::Integer])
```

Return a copy of collection `A` where, for each pair `old=>new` in `old_new`, all occurrences of `old` are replaced by `new`. Equality is determined using [`isequal`](@ref). If `count` is specified, then replace at most `count` occurrences in total.

The element type of the result is chosen using promotion (see [`promote_type`](@ref)) based on the element type of `A` and on the types of the `new` values in pairs. If `count` is omitted and the element type of `A` is a `Union`, the element type of the result will not include singleton types which are replaced with values of a different type: for example, `Union{T,Missing}` will become `T` if `missing` is replaced.

See also [`replace!`](@ref).

# Examples

```jldoctest
julia> replace([1, 2, 1, 3], 1=>0, 2=>4, count=2)
4-element Array{Int64,1}:
 0
 4
 1
 3

julia> replace([1, missing], missing=>0)
2-element Array{Int64,1}:
 1
 0
```

---

```
replace(new::Function, A; [count::Integer])
```

Return a copy of `A` where each value `x` in `A` is replaced by `new(x)` If `count` is specified, then replace at most `count` values in total (replacements being defined as `new(x) !== x`).

# Examples

```jldoctest
julia> replace(x -> isodd(x) ? 2x : x, [1, 2, 3, 4])
4-element Array{Int64,1}:
 2
 2
 6
 4

julia> replace(Dict(1=>2, 3=>4)) do kv
           first(kv) < 3 ? first(kv)=>3 : kv
       end
Dict{Int64,Int64} with 2 entries:
  3 => 4
  1 => 3
```

---

```
replace(s::AbstractString, pat=>r; [count::Integer])
```

Search for the given pattern `pat` in `s`, and replace each occurrence with `r`. If `count` is provided, replace at most `count` occurrences. `pat` may be a single character, a vector or a set of characters, a string, or a regular expression. If `r` is a function, each occurrence is replaced with `r(s)` where `s` is the matched substring (when `pat`is a `Regex` or `AbstractString`) or character (when `pat` is an `AbstractChar` or a collection of `AbstractChar`). If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then capture group references in `r` are replaced with the corresponding matched text. To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`).

# Examples

```jldoctest
julia> replace("Python is a programming language.", "Python" => "Julia")
"Julia is a programming language."

julia> replace("The quick foxes run quickly.", "quick" => "slow", count=1)
"The slow foxes run quickly."

julia> replace("The quick foxes run quickly.", "quick" => "", count=1)
"The  foxes run quickly."

julia> replace("The quick foxes run quickly.", r"fox(es)?" => s"bus\1")
"The quick buses run quickly."
```
