# CREATE DATA

In [1]:
(x1 <- 1:3)
(y  <- 1:9)

In [2]:
# Combine variables
(df1 <- cbind.data.frame(x1, y)) #will repeat x1 three times
typeof(df1$x1)
str(df1)

x1,y
<int>,<int>
1,1
2,2
3,3
1,4
2,5
3,6
1,7
2,8
3,9


'data.frame':	9 obs. of  2 variables:
 $ x1: int  1 2 3 1 2 3 1 2 3
 $ y : int  1 2 3 4 5 6 7 8 9


# AS.FACTOR

In [3]:
(x2  <- as.factor(c(1:3))) #define a variable to be a factor
(df2 <- cbind.data.frame(x2, y))
typeof(df2$x2)
str(df2)

x2,y
<fct>,<int>
1,1
2,2
3,3
1,4
2,5
3,6
1,7
2,8
3,9


'data.frame':	9 obs. of  2 variables:
 $ x2: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3
 $ y : int  1 2 3 4 5 6 7 8 9


## DEFINE EXISTING VARIABLE AS FACTOR

In [4]:
x3  <- c(1:3)
df3 <- cbind.data.frame(x3, y)
(df3$x3 <- factor(df3$x3,
  levels = c(1, 2, 3)))
typeof(df3$x3)
str(df3)

'data.frame':	9 obs. of  2 variables:
 $ x3: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3
 $ y : int  1 2 3 4 5 6 7 8 9


## LABELS FOR FACTOR

In [5]:
x4  <- c(1:3)
df4 <- cbind.data.frame(x4, y)
df4$x4 <- factor(df4$x4,
  levels = c(1, 2, 3),
  labels = c("macOS", "Windows", "Linux")) # define your numerical variables as factors for using your own analyses
df4
typeof(df4$x4)
str(df4)

x4,y
<fct>,<int>
macOS,1
Windows,2
Linux,3
macOS,4
Windows,5
Linux,6
macOS,7
Windows,8
Linux,9


'data.frame':	9 obs. of  2 variables:
 $ x4: Factor w/ 3 levels "macOS","Windows",..: 1 2 3 1 2 3 1 2 3
 $ y : int  1 2 3 4 5 6 7 8 9


## ORDERED FACTORS AND LABELS

In [6]:
x5  <- c(1:3)
df5 <- cbind.data.frame(x5, y)
(df5$x5 <- ordered(df5$x5,
  levels = c(3, 1, 2),
  labels = c("No", "Maybe", "Yes"))) # define your numerical variables as factors for using your own analyses
df5
typeof(df5$x5)
str(df5)

x5,y
<ord>,<int>
Maybe,1
Yes,2
No,3
Maybe,4
Yes,5
No,6
Maybe,7
Yes,8
No,9


'data.frame':	9 obs. of  2 variables:
 $ x5: Ord.factor w/ 3 levels "No"<"Maybe"<"Yes": 2 3 1 2 3 1 2 3 1
 $ y : int  1 2 3 4 5 6 7 8 9


# ACCESS DATA

## Assignment operator

In [11]:
# Assigns number 0 through 10 to x1
x1 <- 0:10
x1
# Descending order
x2 <- 10:0
x2

## SEQ

In [10]:
# Ascending values (duplicates 1:10)
(x3 <- seq(10))

# Specify change in values
(x4 <- seq(30, 0, by = -3))

## ENTER MULTIPLE VALUES WITH C

In [12]:
x5 <- c(5, 4, 1, 6, 7, 2, 2, 3, 2, 8)
x5

## SCAN

In [13]:
x6 <- scan()  # After running this command, go to console
# Hit return after each number in concole of R
# Hit return twice to stop
x6

## REP

In [17]:
# repication or repeat
x7 <- rep(TRUE, 5)
x7
# Repeats set
x8 <- rep(c(TRUE, FALSE), 5)
x8
# Repeats items in set
x9 <- rep(c(TRUE, FALSE), each = 5)
x9

# IMPORTING DATA

- Build in functions for importing data in formats of CSV, TXT, XLSX, JSON

In [1]:
library(datasets)  # Load base packages manually
require(rio)

Loading required package: rio



## IMPORTING WITH RIO

In [3]:
# CSV
rio_csv <- import("../CourseFiles/ImportingData_Datasets/mbb.csv")
head(rio_csv)

Unnamed: 0_level_0,Month,Mozart,Beethoven,Bach
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>
1,2004-01,12,8,15
2,2004-02,12,9,15
3,2004-03,12,9,14
4,2004-04,12,8,14
5,2004-05,11,9,13
6,2004-06,9,7,12


In [4]:
# TXT
rio_txt <- import("../CourseFiles/ImportingData_Datasets/mbb.txt")
head(rio_txt)

Unnamed: 0_level_0,Month,Mozart,Beethoven,Bach
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>
1,2004-01,12,8,15
2,2004-02,12,9,15
3,2004-03,12,9,14
4,2004-04,12,8,14
5,2004-05,11,9,13
6,2004-06,9,7,12


In [5]:
# Excel XLSX
rio_xlsx <- import("../CourseFiles/ImportingData_Datasets/mbb.xlsx")
head(rio_xlsx)

Unnamed: 0_level_0,Month,Mozart,Beethoven,Bach
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>
1,2004-01,12,8,15
2,2004-02,12,9,15
3,2004-03,12,9,14
4,2004-04,12,8,14
5,2004-05,11,9,13
6,2004-06,9,7,12


## DATA VIEWER

In [6]:
View(rio_csv)

Unnamed: 0_level_0,Month,Mozart,Beethoven,Bach
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>
1,2004-01,12,8,15
2,2004-02,12,9,15
3,2004-03,12,9,14
4,2004-04,12,8,14
5,2004-05,11,9,13
6,2004-06,9,7,12
7,2004-07,7,5,11
8,2004-08,7,6,11
9,2004-09,9,7,12
10,2004-10,11,8,13


In [9]:
# TEXT FILES

# Load a spreadsheet that has been saved as tab-delimited
# text file. Need to give complete address to file. This
# command gives an error on missing data but works on
# complete data.
r_txt1 <- read.table("../CourseFiles/ImportingData_Datasets/mbb.txt", 
    header = TRUE)
head(r_txt1)

"number of items read is not a multiple of the number of columns"


Unnamed: 0_level_0,Month,Mozart,Beethoven,Bach
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>
1,2004-01,12,8,15
2,2004-02,12,9,15
3,2004-03,12,9,14
4,2004-04,12,8,14
5,2004-05,11,9,13
6,2004-06,9,7,12


In [10]:
# This works with missing data by specifying the separator: 
# \t is for tabs, sep = "," for commas. R converts missing
# to "NA"
r_txt2 <- read.table("../CourseFiles/ImportingData_Datasets/mbb.txt", 
  header = TRUE, 
  sep = "\t")
head(r_txt2)

Unnamed: 0_level_0,Month,Mozart,Beethoven,Bach
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>
1,2004-01,12,8,15
2,2004-02,12,9,15
3,2004-03,12,9,14
4,2004-04,12,8,14
5,2004-05,11,9,13
6,2004-06,9,7,12


In [13]:
# CSV FILES
# Don't have to specify delimiters for missing data
# because CSV means "comma separated values"
trends.csv <- read.csv("../CourseFiles/ImportingData_Datasets/mbb.csv",
 header = TRUE)
head(trends.csv)

Unnamed: 0_level_0,Month,Mozart,Beethoven,Bach
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>
1,2004-01,12,8,15
2,2004-02,12,9,15
3,2004-03,12,9,14
4,2004-04,12,8,14
5,2004-05,11,9,13
6,2004-06,9,7,12


# CLEAN UP #################################################

In [14]:
rm(list = ls()) # Clear environment