# This is the file for Julia tutorials

In [1]:
x = 2
y = 1


if x>y
    println("$x is greater than $y.")
elseif x<y
    println("$x is less than $y.")
else
    println("$x is equal to $y.")
end

2 is greater than 1.


Now we try more compactly commands in Julia.

The trick is to use the structure ? to determine whether the statement is true or false. 

In [3]:
x = 2
y = 3

x > y ? println("$x is greater than $y.") : println("$x is less or equal to $y.")

2 is less or equal to 3.


Let us try the while loop. 

In [7]:
i = 1

while i <= 10
    println(i)
    i +=1
end

1
2
3
4
5
6
7
8
9
10


In [1]:
i = 1
while true
    println(i)
    if i >=5
        break
    end
    i += 1
end

1
2
3
4
5


Now let us move to for loop.        

In [2]:
for i in 1:10
    println(i)
end

1
2
3
4
5
6
7
8
9
10


In [3]:
for i in 5:2:11
    println(i)
end

5
7
9
11


In [6]:
for i in 20:-5:1
    println(i)
end

20
15
10
5


In [8]:
for greekletters in 'α':'ω'
    println(greekletters)
end

α
β
γ
δ
ε
ζ
η
θ
ι
κ
λ
μ
ν
ξ
ο
π
ρ
ς
σ
τ
υ
φ
χ
ψ
ω


In [9]:
d1 = Dict("A"=>1,"B"=>π,"C"=>"doggo")

Dict{String, Any} with 3 entries:
  "B" => π
  "A" => 1
  "C" => "doggo"

In [10]:
for (key,value) in d1
    println("key = $key\t value = $value.")
end

key = B	 value = π.
key = A	 value = 1.
key = C	 value = doggo.


In [11]:
a2 = []
for i in 1:10
    push!(a2,i)
end

a2

10-element Vector{Any}:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10

Here is the illustration how to manipulate inner and outer loops.

In [12]:
x,y = 5,5
A = fill(0,(x,y))

5×5 Matrix{Int64}:
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0

In [13]:
for i in 1:x
    for j in 1:y
        A[i,j] = i+j
    end
end

In the above coding, j controls inner loop, in this case, from 1 to 5, and i controls outer loop, from 1 to 5 as well. 

In [14]:
A

5×5 Matrix{Int64}:
 2  3  4  5   6
 3  4  5  6   7
 4  5  6  7   8
 5  6  7  8   9
 6  7  8  9  10

In computer science, we could also do **syntactic sugar**, which means syntax is designed to make things easier to read or to express. It makes the language "sweeter" for human use. 

In [15]:
x,y = 6,6
B = fill(0,(x,y))

for i in 1:x, j in 1:y
    B[i,j] = i + j
end

B 

6×6 Matrix{Int64}:
 2  3  4   5   6   7
 3  4  5   6   7   8
 4  5  6   7   8   9
 5  6  7   8   9  10
 6  7  8   9  10  11
 7  8  9  10  11  12

## Here is how to create functions in Julia  

In [1]:
function f(x,y)
    x+y
end

f (generic function with 1 method)

In [2]:
x,y = 4,5
f(x,y)

9

In [3]:
# compactly function form
g(x,y) = x + y

g (generic function with 1 method)

In [4]:
g(x,y)

9

### function with control flows

In [6]:
function myconditional(x,y)
    if x > y 
        println("$x ≥ $y.")
    elseif x < y
        println("$x ≤ $y.")
    else
        println("$x = $y.")
    end
end

myconditional (generic function with 1 method)

In [7]:
myconditional(3,2)

3 ≥ 2.


In [8]:
myconditional(π,3.1415926)

π ≥ 3.1415926.




# DataFrames with Julia

In [13]:
using DataFrames,CSV

In [2]:
brand = ["Company A","Company B","Company C"];
tier = ["Premium","Economy","Standard"];
quantity = [10,100,50];
price = [100,10,50];
cost = [30,7,25];

Step 1: create a Data Frame to contain those basic information

In [3]:
df = DataFrame(
    Brand = brand,
    Tier = tier,
    Quantity = quantity,
    Price = price,
    Cost = cost,
)

Unnamed: 0_level_0,Brand,Tier,Quantity,Price,Cost
Unnamed: 0_level_1,String,String,Int64,Int64,Int64
1,Company A,Premium,10,100,30
2,Company B,Economy,100,10,7
3,Company C,Standard,50,50,25


Step 2: calculate revenue, profit, and margin 

In [5]:
df.Revenue = df.Quantity.*df.Price;

In [6]:
df.Profit = df.Revenue .- df.Cost;

In [8]:
df.Margin = df.Profit./df.Revenue;

In [9]:
df

Unnamed: 0_level_0,Brand,Tier,Quantity,Price,Cost,Revenue,Profit,Margin
Unnamed: 0_level_1,String,String,Int64,Int64,Int64,Int64,Int64,Float64
1,Company A,Premium,10,100,30,1000,970,0.97
2,Company B,Economy,100,10,7,1000,993,0.993
3,Company C,Standard,50,50,25,2500,2475,0.99


Step 3: calculate total quantity, total revenue, total profit, and total margin

In [10]:
dftotalquantity = sum(df.Quantity)

dftotalrevenue = sum(df.Revenue)

dftotalprofit = sum(df.Profit)

dftotalmargin = dftotalprofit/dftotalrevenue

0.9862222222222222

In [11]:
dftotal = DataFrame(
    Quantity = dftotalquantity,
    Revenue = dftotalrevenue,
    Profit = dftotalprofit,
    Margin = dftotalmargin,
)

Unnamed: 0_level_0,Quantity,Revenue,Profit,Margin
Unnamed: 0_level_1,Int64,Int64,Int64,Float64
1,160,4500,4438,0.986222


In [14]:
CSV.write("mydf.csv",df)

"mydf.csv"

In [16]:
describe(df)

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Union…,Any,Union…,Any,Int64,DataType
1,Brand,,Company A,,Company C,0,String
2,Tier,,Economy,,Standard,0,String
3,Quantity,53.3333,10,50.0,100,0,Int64
4,Price,53.3333,10,50.0,100,0,Int64
5,Cost,20.6667,7,25.0,30,0,Int64
6,Revenue,1500.0,1000,1000.0,2500,0,Int64
7,Profit,1479.33,970,993.0,2475,0,Int64
8,Margin,0.984333,0.97,0.99,0.993,0,Float64


In [17]:
nrow(df)

3

In [18]:
ncol(df)

8

In [19]:
size(df)

(3, 8)

In [20]:
names(df)

8-element Vector{String}:
 "Brand"
 "Tier"
 "Quantity"
 "Price"
 "Cost"
 "Revenue"
 "Profit"
 "Margin"

In [21]:
propertynames(df)

8-element Vector{Symbol}:
 :Brand
 :Tier
 :Quantity
 :Price
 :Cost
 :Revenue
 :Profit
 :Margin

In [23]:
rename(df, :Brand => :Company)

Unnamed: 0_level_0,Company,Tier,Quantity,Price,Cost,Revenue,Profit,Margin
Unnamed: 0_level_1,String,String,Int64,Int64,Int64,Int64,Int64,Float64
1,Company A,Premium,10,100,30,1000,970,0.97
2,Company B,Economy,100,10,7,1000,993,0.993
3,Company C,Standard,50,50,25,2500,2475,0.99


In [24]:
df

Unnamed: 0_level_0,Brand,Tier,Quantity,Price,Cost,Revenue,Profit,Margin
Unnamed: 0_level_1,String,String,Int64,Int64,Int64,Int64,Int64,Float64
1,Company A,Premium,10,100,30,1000,970,0.97
2,Company B,Economy,100,10,7,1000,993,0.993
3,Company C,Standard,50,50,25,2500,2475,0.99


rename() function would not change the names of data frame permanently. 

To permanently change the name, we need rename!() function. 

In [25]:
rename!(df, :Profit => :Profits)

Unnamed: 0_level_0,Brand,Tier,Quantity,Price,Cost,Revenue,Profits,Margin
Unnamed: 0_level_1,String,String,Int64,Int64,Int64,Int64,Int64,Float64
1,Company A,Premium,10,100,30,1000,970,0.97
2,Company B,Economy,100,10,7,1000,993,0.993
3,Company C,Standard,50,50,25,2500,2475,0.99


In [26]:
df

Unnamed: 0_level_0,Brand,Tier,Quantity,Price,Cost,Revenue,Profits,Margin
Unnamed: 0_level_1,String,String,Int64,Int64,Int64,Int64,Int64,Float64
1,Company A,Premium,10,100,30,1000,970,0.97
2,Company B,Economy,100,10,7,1000,993,0.993
3,Company C,Standard,50,50,25,2500,2475,0.99


## Data Analysis with DataFrame (Examples)

First, we download the data from Wikipedia about World Population. 
We use DataFrame and CSV to convert it into data frame for further analysis. 

In [27]:
df = DataFrame(CSV.File("population.csv"))

Unnamed: 0_level_0,Rank,Country / Dependency,Region,Population,Percentage of the world
Unnamed: 0_level_1,String3,String,String15?,String15,String15
1,–,World,missing,7950786000,100%
2,1,China,Asia,1412600000,17.8%
3,2,India,Asia,1376380925,17.3%
4,3,United States,Americas,332684062,4.18%
5,4,Indonesia,Asia[b],272248500,3.42%
6,5,Pakistan,Asia,225199937,2.83%
7,6,Brazil,Americas,214642119,2.70%
8,7,Nigeria,Africa,211400708,2.66%
9,8,Bangladesh,Asia,172732070,2.17%
10,9,Russia,Europe[c],145478097,1.83%


Second, we start "data wrangling". 

In [28]:
df.id = 1:nrow(df)
df = select!(df, :id,:)

Unnamed: 0_level_0,id,Rank,Country / Dependency,Region,Population,Percentage of the world
Unnamed: 0_level_1,Int64,String3,String,String15?,String15,String15
1,1,–,World,missing,7950786000,100%
2,2,1,China,Asia,1412600000,17.8%
3,3,2,India,Asia,1376380925,17.3%
4,4,3,United States,Americas,332684062,4.18%
5,5,4,Indonesia,Asia[b],272248500,3.42%
6,6,5,Pakistan,Asia,225199937,2.83%
7,7,6,Brazil,Americas,214642119,2.70%
8,8,7,Nigeria,Africa,211400708,2.66%
9,9,8,Bangladesh,Asia,172732070,2.17%
10,10,9,Russia,Europe[c],145478097,1.83%


In [29]:
names(df)

9-element Vector{String}:
 "id"
 "Rank"
 "Country / Dependency"
 "Region"
 "Population"
 "Percentage of the world"
 "Date"
 "Source (official or from the United Nations)"
 "Notes"

We need to remove comma in the original data.

In [30]:
df.Pop = replace.(df.Population,"," => "")

242-element Vector{String}:
 "7950786000"
 "1412600000"
 "1376380925"
 "332684062"
 "272248500"
 "225199937"
 "214642119"
 "211400708"
 "172732070"
 "145478097"
 ⋮
 "5000"
 "4000"
 "1966"
 "1734"
 "1549"
 "1501"
 "825"
 "573"
 "40"

Convert String into Integer.

In [32]:
df.Pop64 = parse.(Int,df.Pop)

242-element Vector{Int64}:
 7950786000
 1412600000
 1376380925
  332684062
  272248500
  225199937
  214642119
  211400708
  172732070
  145478097
          ⋮
       5000
       4000
       1966
       1734
       1549
       1501
        825
        573
         40

In [33]:
df

Unnamed: 0_level_0,id,Rank,Country / Dependency,Region,Population,Percentage of the world
Unnamed: 0_level_1,Int64,String3,String,String15?,String15,String15
1,1,–,World,missing,7950786000,100%
2,2,1,China,Asia,1412600000,17.8%
3,3,2,India,Asia,1376380925,17.3%
4,4,3,United States,Americas,332684062,4.18%
5,5,4,Indonesia,Asia[b],272248500,3.42%
6,6,5,Pakistan,Asia,225199937,2.83%
7,7,6,Brazil,Americas,214642119,2.70%
8,8,7,Nigeria,Africa,211400708,2.66%
9,9,8,Bangladesh,Asia,172732070,2.17%
10,10,9,Russia,Europe[c],145478097,1.83%


In [34]:
df.Poprev = round.(df.Pop64 ./10^6,digits = 3)

242-element Vector{Float64}:
 7950.786
 1412.6
 1376.381
  332.684
  272.248
  225.2
  214.642
  211.401
  172.732
  145.478
    ⋮
    0.005
    0.004
    0.002
    0.002
    0.002
    0.002
    0.001
    0.001
    0.0