# R

### R is a widely used statistical programming language. R works well with data, making it a great language for anyone interested in data analysis, data visualization, and data science.

## Print

In [3]:
print("Hello Everyone, I am printing in R")

[1] "Hello Everyone, I am printing in R"


## Calculation

In [1]:
25 * 4 + 9 /3

In [1]:
10 * 6 + (4 -1) /5

## Comments

In [2]:
#This is how you write a comment in R, just like Python

## Variables
#### Declaring variables in R is just like Python, but R programmers prefer to do it with arrow "<-"

In [None]:
first_name <- "Luke"
last_name <- "Roy"
#Can also use "=", like Python as well
first_name = "John"
last_name = "Smith"

## Data Types

#### R has 5 data types, which are classification we give to different kinds of information pieces.

* Numeric - Any number with or without a decimal
* Character - Any grouping of characters (letters, numbers, spaces, etc.)
* Logical - Like Boolean (True or False)
* Vectors - A list of related data that is all the same type
* NA - Represents the absence of a value

## Vectors
#### Vectors are list-like structures that contain items of same data type

In [2]:
winter_months <- c("December","January","February")

In [3]:
cars <- c("Buick","Toyota","Ford")

## Conditionals

In [4]:
#Note "TRUE" is in all caps.
if(TRUE) {
    print("Hello, Everyone!")
} else {
    print("This message won't print")
}


[1] "Hello, Everyone!"


## Comparison Operators

#### The same as Python's
* Less than: <
* Greater than: >
* Less than or equal to: <=
* Greater than or equal to: >=
* Is equal to: ==
* Is NOT equal to: !=

## Logical Operators

#### The same as Python's
* the AND operator (&)
* the OR operator (|)
* the NOT operator, otherwise known as the bang operator (!)

## Example - Loading Data into Data Frame

In [7]:
#Import R packages, these are two of the main ones for data analysis
library(readr)
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [10]:
#Load the data frame
#CSV file in use represents housing data from houses in Tallahassee, FL
zillow_data <- read_csv('/Users/luke/Downloads/zillow.csv') 

Parsed with column specification:
cols(
  Index = col_double(),
  `Living Space (sq ft)` = col_double(),
  Beds = col_double(),
  Baths = col_double(),
  Zip = col_double(),
  Year = col_double(),
  `List Price ($)` = col_double()
)


#### Inspect the data frame

In [13]:
zillow_data

Index,Living Space (sq ft),Beds,Baths,Zip,Year,List Price ($)
1,2222,3,3.5,32312,1981,250000
2,1628,3,2.0,32308,2009,185000
3,3824,5,4.0,32312,1954,399000
4,1137,3,2.0,32309,1993,150000
5,3560,6,4.0,32309,1973,315000
6,2893,4,3.0,32312,1994,699000
7,3631,4,3.0,32309,1996,649000
8,2483,4,3.0,32312,2016,399000
9,2400,4,4.0,32312,2002,613000
10,1997,3,3.0,32311,2006,295000


In [14]:
head(zillow_data)

Index,Living Space (sq ft),Beds,Baths,Zip,Year,List Price ($)
1,2222,3,3.5,32312,1981,250000
2,1628,3,2.0,32308,2009,185000
3,3824,5,4.0,32312,1954,399000
4,1137,3,2.0,32309,1993,150000
5,3560,6,4.0,32309,1973,315000
6,2893,4,3.0,32312,1994,699000


In [15]:
summary(zillow_data)

     Index       Living Space (sq ft)      Beds          Baths      
 Min.   : 1.00   Min.   :1128         Min.   :2.00   Min.   :1.000  
 1st Qu.: 5.75   1st Qu.:1905         1st Qu.:3.00   1st Qu.:2.000  
 Median :10.50   Median :2490         Median :4.00   Median :3.000  
 Mean   :10.50   Mean   :2646         Mean   :3.85   Mean   :3.175  
 3rd Qu.:15.25   3rd Qu.:3578         3rd Qu.:4.25   3rd Qu.:4.000  
 Max.   :20.00   Max.   :4892         Max.   :6.00   Max.   :6.000  
      Zip             Year      List Price ($)  
 Min.   :32301   Min.   :1954   Min.   : 89000  
 1st Qu.:32309   1st Qu.:1988   1st Qu.:233750  
 Median :32310   Median :1995   Median :340000  
 Mean   :32309   Mean   :1993   Mean   :383440  
 3rd Qu.:32312   3rd Qu.:2006   3rd Qu.:554675  
 Max.   :32312   Max.   :2016   Max.   :799900  

#### Selecting columns, filtering and arranging rows of housing data

In [23]:
#Selecting columns
select(zillow_data,Beds,Baths)

Beds,Baths
3,3.5
3,2.0
5,4.0
3,2.0
6,4.0
4,3.0
4,3.0
4,3.0
4,4.0
3,3.0


In [24]:
#using a "pipe" with select statement
zillow_data %>% select(Beds,Baths)

Beds,Baths
3,3.5
3,2.0
5,4.0
3,2.0
6,4.0
4,3.0
4,3.0
4,3.0
4,4.0
3,3.0


In [25]:
Bed_and_Bath_Info <- zillow_data %>% select(Beds,Baths)

In [26]:
#Exclude columns
select(zillow_data,-Beds,-Baths)

Index,Living Space (sq ft),Zip,Year,List Price ($)
1,2222,32312,1981,250000
2,1628,32308,2009,185000
3,3824,32312,1954,399000
4,1137,32309,1993,150000
5,3560,32309,1973,315000
6,2893,32312,1994,699000
7,3631,32309,1996,649000
8,2483,32312,2016,399000
9,2400,32312,2002,613000
10,1997,32311,2006,295000


In [36]:
#filter rows
#find houses that have 4 or more beds and were built after 1990.
filtered_houses <- filter(zillow_data, Beds >= "4", Year > "1990")

In [37]:
filtered_houses

Index,Living Space (sq ft),Beds,Baths,Zip,Year,List Price ($)
6,2893,4,3,32312,1994,699000
7,3631,4,3,32309,1996,649000
8,2483,4,3,32312,2016,399000
9,2400,4,4,32312,2002,613000
11,2097,4,3,32311,2016,290000
13,4892,5,6,32311,2005,799900
16,4242,4,5,32303,2007,569000
20,4010,5,3,32309,2002,549900


In [38]:
#arranging rows
#arrange Year by Ascending Order
zillow_data %>% arrange(Year)

Index,Living Space (sq ft),Beds,Baths,Zip,Year,List Price ($)
3,3824,5,4.0,32312,1954,399000
14,1128,2,1.0,32303,1955,89000
12,3200,5,4.0,32312,1964,465000
5,3560,6,4.0,32309,1973,315000
1,2222,3,3.5,32312,1981,250000
19,2497,4,4.0,32309,1990,289000
17,2533,3,2.0,32310,1991,365000
4,1137,3,2.0,32309,1993,150000
18,1158,3,2.0,32303,1993,155000
6,2893,4,3.0,32312,1994,699000


In [39]:
#arrange Year by Descending Order
zillow_data %>% arrange(desc(Year))

Index,Living Space (sq ft),Beds,Baths,Zip,Year,List Price ($)
8,2483,4,3.0,32312,2016,399000
11,2097,4,3.0,32311,2016,290000
2,1628,3,2.0,32308,2009,185000
16,4242,4,5.0,32303,2007,569000
10,1997,3,3.0,32311,2006,295000
15,1381,3,2.0,32301,2006,143000
13,4892,5,6.0,32311,2005,799900
9,2400,4,4.0,32312,2002,613000
20,4010,5,3.0,32309,2002,549900
7,3631,4,3.0,32309,1996,649000
