## 0.Import and package installation

In [None]:
# Import libraries and modules
using DataFrames
using Statistics
using CSV

In [None]:
# install package
using Pkg
Pkg.add("JSON")

In [None]:
# Search Packages
https://juliapackages.com/

## 1.Pandas Series vs Julia Array DataFrame comparison

In [None]:
# Pandas series vs Julia vector
s = [1, 2, 3]

In [None]:
# Get first element of array or Series
s[1]

In [None]:
# Pandas vs Julia DataFrame
df = DataFrame(a=11:13, b=21:23)

In [None]:
# Create random DataFrame
using Random
Random.seed!(1);
df = DataFrame(rand(10, 3), [:a, :b, :c])

## 2.Import Data Julia vs Pandas

In [None]:
# Read CSV file
df = CSV.read("file.csv", DataFrame)

In [None]:
# Read JSON file
using JSON
JSON.parsefile("file.json")

In [None]:
# Read data from URL
A = urldownload("https://example.com/file.csv")
A |> DataFrame

In [None]:
# Read delimited file
readdlm("delim_file.txt", '\t', Int, '\n')

## 3.Data export - Pandas vs Julia

In [None]:
# Writes to a CSV file
CSV.write("file.csv", df)

In [None]:
# Writes to a file in JSON format
using JSON3
JSON3.write("file.json",df1)

## 4.Statistics, samples and summary of the data

In [None]:
# First n rows
first(df, 6)

In [None]:
# Last n rows
last(df, 6)

In [None]:
# Summary statistics
describe(df)

In [None]:
# Describe columns
describe(df[!, [:a]])

In [None]:
# Statistical functions
using Statistics
mean(df.A)

## 5.Select data by index, by label, get subset

In [None]:
# Select first N rows - all columns
df[1:3, :]

In [None]:
# Select rows by index
df[[1, 2, 3], :]

In [None]:
# Select columns by name(copy)
df[:, [:a, :b]]

In [None]:
# Select columns by name(reference)
df[!, [:A]]

In [None]:
# Subset rows and columns
df[1:3, [:b, :a]]

In [None]:
# Reverse selection
df[[3, 1], [:c]]

In [None]:
# Select NaN values
findall(ismissing, df[:, "a"])

In [None]:
# Select non NaN values
filter(!ismissing, df[:, "a"])

## 6.Add new columns and rows

In [None]:
# Add new column based on other column
df[!, "d"] = df[!, "a"] * 100

In [None]:
# Add new column single value
df[!, "e"] .= false

In [None]:
# Add new row at the end of DataFrame
push!(df,[0, 0, 0])

In [None]:
# add rows from DataFrame to existing DataFrame
append!(df,df2)

## 7.Drop data from DataFrame

In [None]:
# (Series) Drop values from Series by index (row axis)
filter!(e->e≠1,a)

In [None]:
# (Series) Drop values from Series by index (row axis)
filter!(e->e∉[1, 2],a)

In [None]:
# Drop column by name col_1 (column axis)
dropmissing!(df[:, ["b"]])

In [None]:
# Drops all rows that contain null values
dropmissing!(df)

In [None]:
# Drops all rows that contain null values
df[all.(!ismissing, eachrow(df)), :]

In [None]:
# Drops all columns that contain null values
df[:, all.(!ismissing, eachcol(df))]

## 8.Sorting and rank values in Pandas vs Julia

In [None]:
# sort array of values
sort([2,3,1])

In [None]:
# sort in reverse order
sort([2,3,1], rev=true)

In [None]:
# sort DataFrame by column
sort(df, [:a])

In [None]:
# sort DataFrame by multiple columns
sort(df, [order(:a, rev=true), :b])

## 9.Filter data based on multiple criteria

In [None]:
# find columns with na
mapcols(x -> any(ismissing, x), df)

In [None]:
# Values greater than X
filter(row -> row.a > 100, df)

In [None]:
# Filter Multiple Conditions - & - and; | - or
filter(row -> row.a == 'a' && row.b >= 5, df)

In [None]:
# filter by sting value
df[ ( df.a .== "test" ) , :]

In [None]:
# combine conditions
df[ ( df.a .== "test" ) .& ( df.b .== "a2" ), :]

## 10.Group by and summarize data

In [None]:
# Group by single column
groupby(df, [:a])

In [None]:
# group by multiple columns and sum third
gdf = groupby(df, [:a, :b])
combine(gdf, :c => sum)

In [None]:
# group by and count
combine(groupby(df, [:x1]), nrow => :count)

## 11.Convert to date, string, numeric

In [None]:
# replace NA values
replace(df.a,missing => 0)

In [None]:
# convert .. to NA
ifelse.(df .== "..", missing, df)

In [None]:
# convert string to int
df[!, :a] = parse.(Int64, df[!, :a])

In [None]:
# convert string to date
using Dates
df.Date = Date.(df.Date, "dd-mm-yyyy")