In [None]:
# load in metadata

p1971_metadata <- read_excel("~/Documents/GitHub/tephi_spatiotemporal_ac/metadata/p1971_metadata.xlsx")
p1976_metadata <- read_excel("~/Documents/GitHub/tephi_spatiotemporal_ac/metadata/p1976_metadata.xlsx")
p1980_Metadata <- read_excel("~/Documents/GitHub/tephi_spatiotemporal_ac/metadata/p1980_Metadata.xlsx")

allPtables <- rbind(p1971_metadata, p1976_metadata, p1980_Metadata)
allmetaTables <- rbind(p1971_metadata, p1976_metadata, p1980_Metadata)

In [None]:
#Trying to upload tax files that are .tsv

tax180<-read.delim("~/Documents/GitHub/tephi_spatiotemporal_ac/tax_profiles/p1980_TEPHI.detected_virus.combined.tax.tsv")
tax1976<-read.delim("~/Documents/GitHub/tephi_spatiotemporal_ac/tax_profiles/p1976_TEPHI.detected_virus.combined.tax.tsv")
tax1971<-read.delim("~/Documents/GitHub/tephi_spatiotemporal_ac/tax_profiles/p1971_TEPHI.detected_virus.combined.tax.tsv")

In [None]:
#Combining tax tables

alltaxTables <- rbind(tax1971, tax1976, tax180)


In [None]:
#Make new column in allmetaTable called 'sample_ID' with sample name (Sample_ID) and Pool ID (PoolID) to match 'sample_ID' in alltaxTable 
#use mutate


allmetaTabal2 <- allmetaTables %>% 
  mutate(sample_ID = paste0(Sample_ID,".", PoolID))


In [None]:
#Attempt to merge allmetaTables and alltaxTables

bothTables <- merge(allmetaTabal2, alltaxTables, by = "sample_ID")


In [None]:
#going to attempt separating data out by city virus(genus) and time

str(bothTables)


In [None]:
# filter out families to only cover Important Pathogens

selected_families <- c(
  "Adenoviridae", "Caliciviridae", "Coronaviridae", "Orthomyxoviridae",
  "Paramyxoviridae", "Parvoviridae", "Picornaviridae",
  "Pneumoviridae", "Poxviridae", "Reoviridae"
)


In [None]:

earliest_date <- min(bothTables$Date, na.rm = TRUE)
mrecent_date <- max(bothTables$Date, na.rm = TRUE)


In [None]:
#G El Paso more succinct

weeksTable <- bothTables %>%
  mutate(
    Date = as.Date(Date),
    week_start = floor_date(Date, unit = "week", week_start = 7)
  )



In [None]:
# count of species per week

countsSpeciesWeek <- weeksTable %>%
  filter(tolower(family) %in% tolower(selected_families)) %>%
  group_by(City, Site, species, week_start, family, sample_ID) %>%
  summarise(count = n(), .groups = "drop")  %>%
  group_by(Site, week_start) %>%
  mutate(species_reordered = reorder_within(x = species, by=count, within=list(Site, week_start), fun = sum)) %>%
  ungroup()

In [None]:
# Viral Frequence by Site in Texan Cities
## El Paso viral frequency by site

countsSpeciesWeek %>%
  filter(City == "El Paso, TX") %>%
ggplot(aes(x = week_start, y = count, color = species)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ Site, ncol = 2, nrow = 2, scales = "free_y") +
  labs(
    title = "Weekly Viral Frequency in El Paso by Site",
    x = "Week Starting (Sunday)",
    y = "Count",
    color = "Species"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),
    legend.text = element_text(size = 8),
    strip.text = element_text(size = 7),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.spacing = unit(2, "lines"),
    legend.position = "right",
    legend.direction = "vertical",
    legend.key.height = unit(0.45, "cm"),
    legend.spacing.y = unit(0.3, "cm")
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  guides(color = guide_legend(ncol = 1))



In [None]:
## Austin viral frequency by site

countsSpeciesWeek %>%
  filter(City == "Austin, TX") %>%
ggplot(aes(x = week_start, y = count, color = species)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ Site, ncol = 3, nrow = 1, scales = "free_y") +
  labs(
    title = "Weekly Viral Frequency in Austin by Site",
    x = "Week Starting (Sunday)",
    y = "Count",
    color = "Species"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),
    legend.text = element_text(size = 8),
    strip.text = element_text(size = 7),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.spacing = unit(2, "lines"),
    legend.position = "right",
    legend.direction = "vertical",
    legend.key.height = unit(0.45, "cm"),
    legend.spacing.y = unit(0.3, "cm")
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  guides(color = guide_legend(ncol = 1))

In [None]:
## Laredo viral frequency by site

countsSpeciesWeek %>%
  filter(City == "Laredo, TX") %>%
ggplot(aes(x = week_start, y = count, color = species)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ Site, ncol = 2, nrow = 1, scales = "free_y") +
  labs(
    title = "Weekly Viral Frequency in Laredo by Site",
    x = "Week Starting (Sunday)",
    y = "Count",
    color = "Species"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),
    legend.text = element_text(size = 8),
    strip.text = element_text(size = 7),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.spacing = unit(2, "lines"),
    legend.position = "right",
    legend.direction = "vertical",
    legend.key.height = unit(0.45, "cm"),
    legend.spacing.y = unit(0.3, "cm")
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  guides(color = guide_legend(ncol = 1))



In [None]:
## Lubbock viral frequency by site

countsSpeciesWeek %>%
  filter(City == "Lubbock, TX") %>%
ggplot(aes(x = week_start, y = count, color = species)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ Site, ncol = 2, nrow = 1, scales = "free_y") +
  labs(
    title = "Weekly Viral Frequency in Lubbock by Site",
    x = "Week Starting (Sunday)",
    y = "Count",
    color = "Species"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),
    legend.text = element_text(size = 8),
    strip.text = element_text(size = 7),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.spacing = unit(2, "lines"),
    legend.position = "right",
    legend.direction = "vertical",
    legend.key.height = unit(0.45, "cm"),
    legend.spacing.y = unit(0.3, "cm")
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  guides(color = guide_legend(ncol = 1))

In [None]:
## Dallas viral frequency

countsSpeciesWeek %>%
  filter(City == "Dallas, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 2) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Dallas",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))

In [None]:
## Fort Worth viral frequency by site

countsSpeciesWeek %>%
  filter(City == "Fort Worth, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 3) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Fort Worth",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))


In [None]:
## Katy viral frequency by site

countsSpeciesWeek %>%
  filter(City == "Katy, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 2) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Katy",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))


In [None]:
## Wichita viral frequency by site 

countsSpeciesWeek %>%
  filter(City == "Wichita Falls, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 3) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Wichita Falls",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))


In [None]:
## Waco viral frequency by site

countsSpeciesWeek %>%
  filter(City == "Waco, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 2) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Waco",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))

In [None]:
# viral frequency by site in Palestine

countsSpeciesWeek %>%
  filter(City == "Palestine, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 3) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Palestine",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))


In [None]:
# viral frequency by site in Athens

countsSpeciesWeek %>%
  filter(City == "Athens, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 2) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Athens",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))

In [None]:
# viral frequency by site in Humble

countsSpeciesWeek %>%
  filter(City == "Humble, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 3) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Humble",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))


In [None]:
# viral frequency by site in Baytown

countsSpeciesWeek %>%
  filter(City == "Baytown, TX") %>%
  ggplot(aes(x = species_reordered, y = count, fill = species, color = family)) +
  geom_col(show.legend = TRUE, linewidth = 1.0) +
  facet_wrap(~ week_start, scales = "free_y", ncol = 3) +
  scale_x_reordered() +    # Works with reorder_within()
  coord_flip() +
  labs(
    title = "Weekly Viral Frequency in Baytown",
    x = "Species (Ordered by Count per Week)",
    y = "Count",
    fill = "Species",
    color = "Family"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 8),
    strip.text = element_text(size = 10),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(0, NA))

In [None]:
# viral frequency by site in Houston

countsSpeciesWeek %>%
filter(City == "Houston, TX") %>%
ggplot(aes(x = week_start, y = count, color = species)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ Site, ncol = 10, nrow = 2, scales = "free_y") +
  labs(
    title = "Weekly Viral Frequency in Houston by Site",
    x = "Week Starting (Sunday)",
    y = "Count",
    color = "Species"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),
    legend.text = element_text(size = 8),
    strip.text = element_text(size = 7),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.spacing = unit(2, "lines"),
    legend.position = "right",
    legend.direction = "vertical",
    legend.key.height = unit(0.45, "cm"),
    legend.spacing.y = unit(0.3, "cm")
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  guides(color = guide_legend(ncol = 1))

In [None]:
# bar graph of selected families in cities over time

library(ggplot2)
library(dplyr)
library(lubridate)

bothTables %>%
  filter(family %in% selected_families) %>%
  group_by(City, Date, family) %>%
  summarise(daily_count = n(), .groups = "drop") %>%
  mutate(week_start = floor_date(Date, unit = "week", week_start = 7)) %>%
  group_by(City, week_start, family) %>%
  summarise(weekly_count = sum(daily_count), .groups = "drop") %>%
  

ggplot(aes(x = week_start, y = weekly_count, fill = family)) +
  geom_bar(stat = "identity", position = "stack") +
  facet_wrap(~ City, scales = "free_y", ncol = 3) +
  labs(
    title = "Weekly Virus Family Prominence by City",
    x = "Week Starting (Sunday)",
    y = "Count",
    fill = "Family"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


In [None]:
#make a bar for each family
#To ensure your plot has a separate bar for each virus family (rather than stacking them), you should use position = "dodge" in geom_bar() instead of "stack". This will place bars side-by-side per week per city, with each bar representing a different family.

bothTables %>%
  filter(family %in% selected_families) %>%
  group_by(City, Date, family) %>%
  summarise(daily_count = n(), .groups = "drop") %>%
  mutate(week_start = floor_date(Date, unit = "week", week_start = 7)) %>%
  group_by(City, week_start, family) %>%
  summarise(weekly_count = sum(daily_count), .groups = "drop") %>%
ggplot(aes(x = week_start, y = weekly_count, fill = family)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ City, scales = "free_y", ncol = 3) +
  labs(
    title = "Weekly Virus Family Prominence by City",
    x = "Week Starting (Sunday)",
    y = "Count",
    fill = "Family"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [None]:
#own bars per species

bothTables %>%
  filter(family %in% selected_families) %>%
  group_by(City, Date, species) %>%
  summarise(daily_count = n(), .groups = "drop") %>%
  mutate(week_start = floor_date(Date, unit = "week", week_start = 7)) %>%
  group_by(City, week_start, species) %>%
  summarise(weekly_count = sum(daily_count), .groups = "drop") %>%
ggplot(aes(x = week_start, y = weekly_count, fill = species)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ City, scales = "free_y", ncol = 3) +
  labs(
    title = "Weekly Species Prominence Over Time by City",
    x = "Week Starting (Sunday)",
    y = "Amount of Species",
    fill = "Species"
  ) +
  theme_minimal() +
  theme( 
    axis.text = element_text(size = 7),
    legend.text = element_text(size = 7),
    strip.text = element_text(size = 7),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.spacing = unit(1, "lines"),
    legend.position = "right",
    legend.direction = "vertical",
    legend.key.height = unit(0.38, "cm"),
    legend.spacing.y = unit(0.25, "cm")
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  guides(fill = guide_legend(ncol = 1))
