<a href="https://colab.research.google.com/github/prabhmeharbedi/R-Programming/blob/main/R_Programming.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---
# **R Programming**
---
 - Basics Example to Understand R
 - Reading and Writing to File
 - Simple plotting
 - Multi-criteria decision making (MCDM) using Topsis
 - Model Evaluation Parameters for Regression and Classification using R

### **Self learning resource**
- Tutorial on R **<a href="https://www.w3schools.com/r/" target="_blank"> Click Here</a>**




---
# **1. Simple Examples**
---
## **1.1 Addition of two numbers**
---

In [1]:
2 + 2                 # Addition

---
## **1.2 Addition of two numbers**
---

In [2]:
a = 10                  # To assign
b = 20
c = a + b                 # Addition
c

In [3]:
# To Print
cat(c)

30

In [4]:
# To Print
print(c)

[1] 30


---
## **1.3 Use of function**
---

In [5]:
# Exponential function
exp(-200)


In [6]:
# Log Function
log(100, base = 10)

---
## **1.4 Generate Random Number**
---

In [7]:
# Generate one random numbers between 0 and 1
runif(1)

In [8]:
# Generate five random numbers between 0 and 1
round(runif(5),3)

In [9]:
# Generate five random numbers between 1 and 5
runif(5, min = 1, max = 5)

In [10]:
# Get the type of a variable
N <- runif(5)
N
class(N)
# in R, indexing starts from 1

---
## **1.5 Basics operations**
---

In [11]:
cat("N   : ",N)
cat("\nMin : ",min(N))          # Min
cat("\nMax : ",max(N))          # Max
cat("\nSum : ",sum(N))          # Sum
cat("\nMean: ",mean(N))         # Mean
cat("\nSD  : ",sd(N))           # Standad Deviation

N   :  0.4236605 0.1851277 0.9539444 0.8218573 0.8345672
Min :  0.1851277
Max :  0.9539444
Sum :  3.219157
Mean:  0.6438314
SD  :  0.3252414

---
## **1.6 Loop**
---

In [12]:
# Print 1 to 10
for ( i in 1:10){
  cat (i,"\n")
}

1 
2 
3 
4 
5 
6 
7 
8 
9 
10 


In [13]:
# Print 10 random number between 1 and 100 using loop
for ( i in 1:10){
  cat (i, "-->", round(runif(1,min=1,max=100),0), "\n")
}

1 --> 68 
2 --> 37 
3 --> 19 
4 --> 79 
5 --> 52 
6 --> 56 
7 --> 95 
8 --> 99 
9 --> 16 
10 --> 87 


In [14]:
# Print 10 random number between 1 and 100
N = round(runif(10, min=1, max=100), 0)
cat(N)

10 11 84 21 100 82 17 84 73 77

---
## **1.7 If-Else**
---

In [15]:
# If-else
i=20
if (i==20){
  cat("Yes")
}else {
  cat("No")
}

Yes

In [16]:
# Error: If-else
i=20
if (i==20){
  cat("Yes")
}                         # else should be in same line as }
else{
  cat("NO")
}

ERROR: ignored

In [None]:
# If-else
a = 10
b = 20
if (a > b){
  cat(a, " is Greater")
}else{
  cat(b, " is Greater")
}

---
## **1.8 Some More Examples**
---

In [None]:
# Actual
Actual <- round(runif(100, min = 1, max = 5),2)
Actual
class(Actual)

In [None]:
# Use of Head
head(Actual)
head(Actual,10)
head(Actual,20)


In [None]:
# Use of tail
tail(Actual)
tail(Actual,10)
tail(Actual,20)

In [None]:
# Predicted
Predicted <- round(runif(100, min = 1, max = 5),2)
head(Predicted)

---
## **1.9 Basic Operations (Difference, SOD, MAE, Correlation)**
---

In [None]:
# Difference
D = Actual - Predicted
cat(D)

In [None]:
# Sum of Difference (SOD)
SOD = sum(D)
cat(SOD)

In [None]:
# Correlation
COR = cor(Actual, Predicted)
cat(COR)

In [None]:
# Mean Absolute Error (MAE)
MAE = mean(abs(Actual-Predicted))
cat(MAE)

In [None]:
# Accuracy with 0.1 acceptable error
Accuracy = abs(Actual-Predicted) > 0.1
cat(Accuracy)

Accuracy = mean(Accuracy)*100
Accuracy

---
## **1.10 Writing to the file**
---

In [None]:
# Get the Current Working Directory, Similar to pwd in linux
getwd()
# for mac use pwd - present working directory in terminal.app to see

In [None]:
# Writing actual and predicted to a file
write.csv(data.frame(Actual,Predicted), file="SampleData.csv", row.names=FALSE)
# Go to the path and check for file

In [None]:
data.frame(Actual,Predicted)

---
## **1.11 Read Actual/Predicted file and calculate the model evaluation parameters**
---

In [None]:
# Get the Current Working Directory, Similar to pwd in linux
getwd()

In [None]:
# Read the datafile
dataset <- read.csv("SampleData.csv")
print(head(dataset))

In [None]:
# Get the column data from datafile
cat ("Actual-->", head(dataset$Actual))
cat ("\nPredicted-->", head(dataset$Predicted))


In [None]:
# Calcuate the model evaluation parameters and write to file

# Correlation
COR = round(cor(Actual,Predicted),2)

# R_Square
R_Square = round(COR * COR,2)

# Mean Absolute Error (MAE)
MAE = round(mean(abs(Actual-Predicted)),2)

# Accuracy with 0.1 acceptable error
Accuracy = round(mean(abs(Actual-Predicted) > 0.1),2)*100

# Writing the modle evaluation parameters to a file
write.csv(data.frame(COR, R_Square, MAE, Accuracy), file="ResultFile.csv", row.names=FALSE)
# Go to the path and check for file

---
# **2. Basics of Data frame**
---
## **2.1 Generate random matrices**
---

In [None]:
# Generate a matrix of 100 by 3

NRows = 100
NCols = 3

m1 <- matrix(runif(NCols * NRows, min=1, max=10), ncol=NCols)
class(m1)
head(m1)

In [None]:
# Generate two more matrices
NCols = 4
m2 <- matrix(runif(NCols*NRows, min=10, max=50), ncol=NCols)
class(m2)
print(head(m2))

NCols = 3
m3 <- matrix(runif(NCols*NRows, min=100, max=200), ncol=NCols)
class(m3)
print(head(m3))

---
## **2.2 Merging matrices**
---

In [None]:
# Merging matrices
dataset <- round(data.frame(m1, m2, m3),2)
print(head(dataset))

---
## **2.3 Get the column names**
---

In [None]:
# Column names
names(dataset)


---
## **2.4 Get the total number of columns**
---

In [None]:
# Total number of columns
NCols = ncol(m1) + ncol(m2) + ncol(m3)
NCols

In [None]:
# Total number of columns in dataset
NCols = length(names(dataset))
NCols

---
## **2.5 Rename of columns**
---

In [None]:
# Generate the sequence of numbers
c(1:NCols)

In [None]:
# Concatinate with sequence
paste("F", c(1:NCols),sep='')

In [None]:
# Rename of columns
names(dataset)<- paste("F",c(1:NCols),sep='')
print(head(dataset))

---
## **2.6 Writing dataset to the file**
---

In [None]:
# Writing to the file
write.csv(dataset, file="NewDataset.csv", row.names=FALSE)
# Go to the path and check for file

---
## **2.7 Plotting the dataset**
---

In [None]:
# Plot the dataset
plot(dataset)

---
## **2.8 Summary the dataset**
---

In [None]:
summary(dataset)

---
# **3. Data selection in dataframe**
---
## **3.1 Read and head the dataset**
---

In [None]:
# Read the dataset file
dataset = read.csv("NewDataset.csv")
head(dataset)

---
### **3.2 Select top 10 rows of F4, F6, F7 columns**
---

In [None]:
dataset[1:10,c("F4","F6","F7")]  # returns a data.frame

---
### **3.3 Select bottom 10 rows of F4, F6, F7 columns**
---

In [None]:
n = 10
s = (nrow(dataset)-n+1):nrow(dataset)
s

In [None]:
dataset[s,c("F4","F6","F7")]  # returns a data.frame

---
### **3.4 Select random columns in dataframe**
---

In [None]:
head(sample(dataset,3))
# Run the code again 2-3 times

In [None]:
#To return 5 random rows
index <- sample(1:nrow(dataset), 5)
index
dataset[index,]

# Run the code again 2-3 times

---
### **3.5 Shuffle the data row wise.**
---

In [None]:
dataset <- dataset[sample(nrow(dataset)),]
head(dataset)
# Run the code again 2-3 times

---
### **3.6 Divide the data into train and test**
---

In [None]:
dataset = read.csv("NewDataset.csv")
totalDataset = nrow(dataset)
totalDataset
training = 70
training

In [None]:
trainDataset <- dataset[1:(totalDataset * training/100),]
nrow(trainDataset)
head(trainDataset)

In [None]:
testDataset <- dataset[(totalDataset * training/100):totalDataset,]
nrow(testDataset)
head(testDataset)

---
# **4. Plotting**
---
## **4.1 Basics of Plotting**

In [None]:
# Generate random nubers
x <- round(runif(100, min = 1, max = 5),2)
x

In [None]:
# Plot
plot(x)

In [None]:
# Title in Plot
plot(x, main = "Plot of x against index 1, 2, ..., length(x)")


In [None]:
# X,Y Labels
plot(x, main = "Plot of x against index 1, 2, ..., length(x)", xlab = "X Axis", ylab="Y Axis")

---
## **4.2 Bivariate Plot**
---

In [None]:
# Bivariate 'scatter plot' of y vs x
y <- x^2 + runif(100)
plot(x, y, main = "Bivariate 'scatter plot' of y vs x")

---
## **4.3 Saving the Plot**
---

In [None]:
# Saving the Plot to a file
getwd()   # Get the Current Working Directory, Similar to pwd in linux
png(filename="MyPlot.png")
plot(x, y, main = "Bivariate 'scatter plot' of y vs x",xlab = "X Axis", ylab="Y Axis")
dev.off()
# Go to the path and check for file

---
## **4.4 Other Plots**
---

In [None]:
z1 <- list(x = x, y = y)
z1
class(z1)

In [None]:
# Plot
plot(z1)

In [None]:
# Variation in Plot
plot(z1, type = "l")  # l is for lines


---
## **4.5 Increasing Order Plotting**
---

In [None]:
# Generate ording
print(x)
ord <- order(x)
print(ord)

In [None]:
# Merge x, y
z <- list(x = x[ord], y = y[ord])
z

In [None]:
# lines
plot(z, type = "l", main= "X vs Y", xlab = "X Axis", ylab="Y Axis")

In [None]:
# Points and lines overlayed
plot(z, type = "o")

In [None]:
# steps
plot(z, type = "s")

In [None]:
# histogram-like
plot(z, type = "h")

---
## **4.6 Grid and a curve plotting**
---

In [None]:
plot(z, type = "n")

# Uncomment each line and run
grid(lty = 1, lwd = 2)
curve(x^2, col = "blue", add = TRUE)
curve(x^2 + 1, col = "blue", add = TRUE)
points(z, pch = 20)

---
## **4.7 Grphical Parameters**
---

In [None]:
plot(z, type = "o", col = 'red', pch = 16, cex = 2)

In [None]:
plot(z, col = c('red', 'blue'), pch = "+", cex = 2)

---
## **4.8 Colors Plotting**
---

In [None]:
# List of colors
colors()

In [None]:
# default colours (palette)
palette()

In [None]:
# Change palette colors
palette(sample(colors(), 10))
plot(runif(50), col = rep(1:10, each = 5), pch = 16, cex = 2)

# Re-run the code again

---
## **4.9 Histogram Plotting - Frequency Plotting**
---

In [None]:
# create a grouping variable of length 100
a <- factor(sample(1:5, 100, replace = TRUE), levels = 1:5)
a
levels(a) <- LETTERS[1:5]
a

In [None]:
# Histogram
plot(a)

In [None]:
# Box Plot
plot(y ~ a)

---
## **4.10 Function Plotting**
---

In [None]:
plot(sin, from = -2 * pi, to = 2 * pi)

In [None]:
plot(cos, from = -2 * pi, to = 2 * pi)

In [None]:
 # Sign function
 damped_sin <- function(x) sin(5 * x) * exp(-x^2)

In [None]:
class(damped_sin)

In [None]:
plot(damped_sin, from = -pi, to = pi)

---
# **5. Working with iris Dataset using dplyr**
---
## **5.1 Import the iris**

In [None]:
data(iris)

In [None]:
# Plot iris
plot(iris)

In [None]:
# color by Species
plot(iris[1:4], col = as.numeric(iris$Species))

---
## **5.2 Library inclusion**
---

In [None]:
# install.packages("dplyr")
library(dplyr)

# Re-run again

---
## **5.3 Working with Select()**
---

In [None]:
#To select the following columns
selected <- select(iris, Sepal.Length, Sepal.Width, Petal.Length)
head(selected)

In [None]:
#To select all columns from Sepal.Length to Petal.Length
selected1 <- select(iris, Sepal.Length:Petal.Length)
head(selected1)

In [None]:
#To select columns with numeric indexes
selected1 <- select(iris,c(3:5))
head(selected1)

In [None]:
#We use(-)to hide a particular column
selected <- select(iris, -Sepal.Length, -Sepal.Width)
head(selected)

---
## **5.4 Working with filter()**
---

In [None]:
#To select the first 3 rows with Species as setosa
filtered <- filter(iris, Species == "setosa" )
head(filtered)

In [None]:
#To select the last 5 rows with Species as versicolor and Sepal width more than 3
filtered1 <- filter(iris, Species == "versicolor", Sepal.Width > 3)
tail(filtered1)

---
## **5.5 Working with Mutate()**
---

In [None]:
#To create a column “Greater.Half” which stores TRUE if given condition is TRUE
col1 <- mutate(iris, Greater.Half = Sepal.Width > 0.5 * Sepal.Length)
tail(col1)

In [None]:
#To check how many flowers satisfy this condition
table(col1$Greater.Half)

---
## **5.6 Working with Arrange()**
---

In [None]:
#To arrange Sepal Width in ascending order
arranged <- arrange(col1, Sepal.Width)
head(arranged)

In [None]:
#To arrange Sepal Width in descending order
arranged <- arrange(col1, desc(Sepal.Width))
head(arranged)

---
## **5.7 Working with Arrange()**
---

In [None]:
#To arrange Sepal Width in ascending order
arranged <- arrange(col1, Sepal.Width)
head(arranged)

---
# **6. Dataset Manipulation**
---
## **6.1 Merging Datasets (cbind, Colunm wise)**

In [None]:
m1 <- matrix(c(1:9),c(3,3))
m2 <- matrix(c(10:18),c(3,3))
cbind(m1,m2)

---
## **6.2 Merging Datasets (rbind, Row wise)**
---

In [None]:
rbind(m1,m2)

---
## **6.3 Merge Datasets (merge, Row wise)**
---

In [None]:
names <- c('v1','v2','v3')
colnames(m1) <- names
colnames(m2) <- names
merge(m1,m2, by = names, all = TRUE)

---
## **6.4 Apply Datasets (apply())**
---

In [None]:
# Sum the row wise
m1
apply(m1, 1, sum)

In [None]:
# Sum the column wise
m1
apply(m1, 2, sum)

In [None]:
# Mean the column wise
m1
apply(m1, 2, mean)

In [None]:
# Square every value
square<-function(x) x * x

apply(m1, 2, square)

---
## **7. Model Evaluation Parameters for Regression and Classification using R**
---
- For Notebook **<a href="https://github.com/psrana/Model-Evaluation-Parameters-for-Regression-and-Classification" target="_blank"> Click Here</a>**


---
## **8. Multi-criteria decision making (MCDM) using Topsis**
---
- For Notebook **<a href="https://github.com/psrana/Topsis" target="_blank"> Click Here</a>**
