---
title: "R Programming Cheatsheet"
author: "Zhijun He"
format: 
  html:
    theme: cosmo
    toc: true
    code-fold: false
    page-layout: full
    css: styles.css
---

```{css echo=FALSE}
.columns {
  display: flex;
}

.column {
  flex: 1;
  padding: 0 10px;
}

h1, h2 {
  color: #006aa3;
}

h3 {
  color: #0078c0;
}

.code-box {
  background-color: #f0f0f0;
  border: 1px solid #cccccc;
  border-radius: 4px;
  font-family: monospace;
  padding: 5px;
  font-size: 0.85em;
  margin-bottom: 10px;
}

.box {
  border: 1px solid #ccc;
  padding: 10px;
  margin-bottom: 15px;
  border-radius: 5px;
  background-color: #f9f9f9;
}
```


## R Grammar & Basic Syntax {.box}

::::: columns
::: column
### Variable Assignment

``` r
x <- 90  # Preferred in tidyverse
y = 42   # Alternative syntax
```

### Basic Data Types

``` r
# Character
text <- "hello"
# Numeric
num <- 42
decimal <- 3.14
# Logical
bool <- TRUE
# Vector
vec <- c(1, 2, 3, 4, 5)
# Data frame
df <- data.frame(
  id = 1:3,
  name = c("A", "B", "C")
)
```
:::

::: column
### The Pipe Operator

``` r
# Instead of:
result <- function3(function2(function1(df)))
# Use:
result <- df %>% 
  function1() %>% 
  function2() %>% 
  function3()
```

### Logical Operators

`<`, `>`, `<=`, `>=`, `==`, `!=`, `&`, `|`, `!`
:::
:::::

## Data Import & Export {.box}

::::: columns
::: column
### File Paths

``` r
# Relative paths
df <- read_csv("data.csv")
df <- read_csv("data/data.csv")
df <- read_csv("../data/data.csv")
# Absolute paths
df <- read_csv("~/Desktop/project/data.csv")
```

### Reading Data

``` r
# CSV files
df <- read_csv("filename.csv")
# Excel files
library(readxl)
df <- read_excel("filename.xlsx")
# R Data
load("filename.RData")
```
:::

::: column
### Saving Data

``` r
# Save as CSV
write_csv(df, "filename.csv")
# Save as R object
save(df, file = "data.RData")
# Save plots
ggsave("plot.png", width = 8, height = 6)
```
:::
:::::

## Data Exploration {.box}

::::: columns
::: column
### Initial Data Inspection

``` r
# Structure and dimensions
str(df)
glimpse(df)
dim(df)
# View first/last rows
head(df, n = 10)
tail(df, n = 10)
# Summary statistics
summary(df)
# Missing values
sum(is.na(df))
colSums(is.na(df))
```
:::

::: column
### Basic Statistics

``` r
# Central tendency
mean(x, na.rm = TRUE)
median(x, na.rm = TRUE)
# Spread
sd(x, na.rm = TRUE)
IQR(x, na.rm = TRUE)
range(x, na.rm = TRUE)
# Correlation
cor(x, y)
```
:::
:::::

## Data Wrangling with dplyr {.box}

::::: columns
::: column
### Key Verbs

``` r
# Create/modify variables
mutate(new_var = old_var / 1000)
# Filter rows by condition
filter(age > 25)
# Select columns
select(name, age)
# Sort rows
arrange(age, desc(name))
# Summarize data
summarize(avg = mean(value, na.rm = TRUE))
# Group data
group_by(category)
# Count observations
count(category)
```
:::

::: column
### Missing Values

``` r
# Filter non-missing
filter(!is.na(column))  
# Replace missing
mutate(col_clean = replace_na(col, "refused"))
# Drop missing rows
drop_na()
```

### Common Patterns

``` r
# Group-level statistics
df %>% 
  group_by(category) %>%
  summarize(
    avg = mean(value, na.rm = TRUE),
    count = n()
  ) %>%
  arrange(desc(avg))
```
:::
:::::

## String Manipulation {.box}

``` r
# Extract substring
str_sub("macalester", 1, 3)  # "mac"
# Detect pattern
str_detect("El Taco", "Taco")  # TRUE
# Get string length
str_length("abc")  # 3
# Convert to lowercase
str_to_lower("ABC")  # "abc"
# Replace pattern
str_replace("Taco Taxi", "Taco", "Pizza")
```

## Data Reshaping with tidyr {.box}

::::: columns
::: column
### Wide to Long (pivot_longer)

``` r
# Before:
# year_2020 year_2021 year_2022
#    10        15        20
# After:
# year  value
# 2020   10
# 2021   15
# 2022   20
long_df <- pivot_longer(
  df,
  cols = c(year_2020, year_2021, year_2022),
  names_to = "year",
  values_to = "value"
)
```
:::

::: column
### Long to Wide (pivot_wider)

``` r
wide_df <- pivot_wider(
  long_df,
  names_from = year,
  values_from = value
)
```
:::
:::::

## Joining Data {.box}

::::: columns
::: column
### Types of Joins

``` r
# Keep matching rows
inner_join(table1, table2, by = "id")
# Keep all rows from left table
left_join(table1, table2, by = "id")
# Keep all rows from both tables
full_join(table1, table2, by = "id")
# Rows from left that match right
semi_join(table1, table2, by = "id")
# Rows from left that don't match right
anti_join(table1, table2, by = "id")
```
:::

::: column
### Join By Multiple Columns

``` r
inner_join(table1, table2, 
           by = c("id1" = "id2", "name1" = "name2"))
```
:::
:::::

## Data Visualization with ggplot2 {.box}

::::: columns
::: column
### Grammar of Graphics

ggplot2 builds plots in layers:

``` r
ggplot(data = df, mapping = aes(x, y)) +
  geom_*() +         # geometry layer
  scale_*() +        # scale transformations
  labs() +           # labels
  theme() +          # visual styling
  facet_*()          # faceting
```

### Common Geometries

``` r
# Scatter plot
ggplot(df, aes(x = x_var, y = y_var)) +
  geom_point()

# Line plot
ggplot(df, aes(x = x_var, y = y_var)) +
  geom_line()

# Bar chart (counts)
ggplot(df, aes(x = category)) +
  geom_bar()

# Bar chart (values)
ggplot(df, aes(x = category, y = value)) +
  geom_col()
```
:::

::: column
``` r
# Histogram
ggplot(df, aes(x = value)) +
  geom_histogram(bins = 30)

# Boxplot
ggplot(df, aes(x = category, y = value)) +
  geom_boxplot()

# Density plot
ggplot(df, aes(x = value)) +
  geom_density()
```

### Adding Color

``` r
# Color points by category
ggplot(df, aes(x = x_var, y = y_var, color = group)) +
  geom_point()

# Fill bars by category
ggplot(df, aes(x = x_var, fill = group)) +
  geom_bar()
```
:::
:::::

::::: columns
::: column
### Faceting

``` r
# Multiple plots by category
ggplot(df, aes(x = x_var, y = y_var)) +
  geom_point() +
  facet_wrap(~ category)

# Grid of plots by two variables
ggplot(df, aes(x = x_var, y = y_var)) +
  geom_point() +
  facet_grid(row_var ~ col_var)
```
:::

::: column
### Customizing Labels & Theme

``` r
ggplot(df, aes(x = x_var, y = y_var)) +
  geom_point() +
  labs(
    title = "Main Title",
    subtitle = "Subtitle here",
    x = "X-axis Label",
    y = "Y-axis Label",
    caption = "Data source: Example"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    axis.title = element_text(size = 12),
    legend.position = "bottom"
  )
```
:::
:::::

### Color Blindness-Friendly Palettes

``` r
# Discrete variables
scale_color_viridis_d()
scale_fill_viridis_d()

# Continuous variables
scale_color_viridis_c()
scale_fill_viridis_c()
```

## Spatial Visualization {.box}

::::: columns
::: column
### Basic Maps with Leaflet

``` r
library(leaflet)

# Create a map with default tiles
leaflet() %>%
  addTiles() %>%
  setView(lng = -93.1, lat = 44.9, zoom = 12)

# Add markers
leaflet(spatial_df) %>%
  addTiles() %>%
  addMarkers(~longitude, ~latitude, 
             popup = ~paste(name, "<br>", details))
```
:::

::: column
### Choropleth Maps

``` r
library(sf)
library(rnaturalearth)

# Get country boundaries
world <- ne_countries(scale = "medium", 
                      returnclass = "sf")

# Create choropleth map
ggplot(world) +
  geom_sf(aes(fill = pop_est)) +
  scale_fill_viridis_c() +
  theme_minimal()
```
:::
:::::

## Complete Example Scripts {.box}

### Data Analysis Example

``` r
# Load packages
library(tidyverse)

# Import data
penguins <- read_csv("penguins.csv")

# Clean data
penguins_clean <- penguins %>%
  # Remove missing values
  drop_na() %>%
  # Create new variable
  mutate(
    body_mass_kg = body_mass_g / 1000,
    bill_ratio = bill_length_mm / bill_depth_mm
  ) %>%
  # Filter rows
  filter(species != "Chinstrap") %>%
  # Select columns
  select(species, island, body_mass_kg)

# Summarize data
penguin_summary <- penguins_clean %>%
  group_by(species, island) %>%
  summarize(
    avg_mass = mean(body_mass_kg),
    n = n()
  ) %>%
  arrange(desc(avg_mass))

# Create visualization
ggplot(penguins_clean, 
       aes(x = island, y = body_mass_kg, 
           fill = species)) +
  geom_boxplot() +
  labs(
    title = "Penguin Body Mass by Island",
    x = "Island",
    y = "Body Mass (kg)",
    fill = "Species"
  ) +
  scale_fill_viridis_d() +
  theme_minimal()
```

### String Manipulation Example

``` r
# Load packages
library(tidyverse)

# Sample restaurant data
restaurants <- tibble(
  name = c("El Taco Riendo", "French Meadow", 
           "Shish", "Taco Taxi"),
  cuisine = c("Mexican", "American", 
              "Mediterranean", "Mexican"),
  price = c(15, 25, 18, 12)
)

# Find restaurants with "Taco" in the name
taco_places <- restaurants %>%
  filter(str_detect(name, "Taco"))

# Extract part of restaurant names
restaurants %>%
  mutate(
    short_name = str_sub(name, 1, 5),
    name_length = str_length(name),
    lower_name = str_to_lower(name),
    pizza_name = str_replace(name, "Taco", "Pizza")
  )
```

### Reshaping Example

``` r
# Race times in wide format
race_times_wide <- tibble(
  runner = c("Abiy", "Amy", "Carlos"),
  `2022` = c(96.6, 103.0, 88.5),
  `2023` = c(89.1, 99.6, 85.2),
  `2024` = c(92.3, 98.2, 82.7)
)

# Convert to long format
race_times_long <- race_times_wide %>%
  pivot_longer(
    cols = -runner,
    names_to = "year",
    values_to = "time"
  )

# Convert back to wide format
race_times_wide_again <- race_times_long %>%
  pivot_wider(
    names_from = year,
    values_from = time
  )
```

## RStudio Environment {.box}

-   Console: Try code interactively
-   Install packages: `install.packages("tidyverse")`
-   Load packages: `library(tidyverse)`
-   Get help: `?function_name`, `help(function_name)`