# Imports

In [12]:
import os
import pandas as pd

# Combine Data Files

In [13]:
data_frames = []

for file_name in os.listdir("./data/States"):
    file_path = os.path.join("./data/States", file_name)
    df = pd.read_csv(file_path, low_memory=False)
    data_frames.append(df)

for file_name in os.listdir("./data/Maryland"):
    file_path = os.path.join("./data/Maryland", file_name)
    df = pd.read_csv(file_path, low_memory=False)
    data_frames.append(df)

combined_df = pd.concat(data_frames, ignore_index=True)


# Add Link Field and Filter

In [14]:
combined_df['link'] = combined_df.apply(
    lambda row: f"https://openlittermap.com/global?lat={row['lat']}&lon={row['lon']}&zoom=17&photo={row['id']}",
    axis=1
)

# List of columns to check for values greater than 0
columns_to_check = [
    "cigaretteBox", "filterbox", "sweetWrappers", "paperFoodPackaging", "plasticFoodPackaging",
    "crisp_small", "crisp_large", "sauce_packet", "glass_jar", "glass_jar_lid", "foodOther", 
    "pizza_box", "coffeeCups", "coffeeOther", "beerCan", "beerBottle", "spiritBottle", 
    "wineBottle", "bottleTops", "paperCardAlcoholPackaging", "pint", "plasticAlcoholPackaging",
    "alcohol_plastic_cups", "alcoholOther", "waterBottle", "fizzyDrinkBottle", "bottleLid",
    "bottleLabel", "tinCan", "sportsDrink", "plastic_cups", "plastic_cup_tops", "milk_bottle", 
    "milk_carton", "paper_cups", "juice_cartons", "juice_bottles", "juice_packet", "ice_tea_bottles",
    "ice_tea_can", "energy_can", "softDrinkOther", "hand_sanitiser", "aadrink", "acadia", "adidas",
    "albertheijn", "aldi", "amazon", "amstel", "anheuser_busch", "apple", "applegreen", "asahi",
    "avoca", "bacardi", "ballygowan", "bewleys", "brambles", "budweiser", "bulmers", "bullit",
    "burgerking", "butlers", "cadburys", "cafe_nero", "calanda", "camel", "caprisun", "carlsberg",
    "centra", "coke", "circlek", "coles", "colgate", "corona", "costa", "doritos", "drpepper",
    "dunnes", "duracell", "durex", "evian", "esquires", "fanta", "fernandes", "fosters",
    "frank_and_honest", "fritolay", "gatorade", "gillette", "goldenpower", "guinness", "haribo",
    "heineken", "hertog_jan", "insomnia", "kellogs", "kfc", "lavish", "lego", "lidl", "lindenvillage",
    "lipton", "lolly_and_cookes", "loreal", "lucozade", "marlboro", "mars", "mcdonalds", "modelo",
    "molson_coors", "monster", "nero", "nescafe", "nestle", "nike", "obriens", "ok_", "pepsi",
    "powerade", "redbull", "ribena", "samsung", "sainsburys", "schutters", "seven_eleven",
    "slammers", "spa", "spar", "stella", "subway", "supermacs", "supervalu", "starbucks", "tayto",
    "tesco", "tim_hortons", "thins", "volvic", "waitrose", "walkers", "wendys", "woolworths",
    "wilde_and_greene", "winston", "wrigleys", "bags_litter", "books", "magazine", "batteries", "deodorant", "plastic_bags"
]

# Filter for rows where any of the specified columns have values greater than 0
condition1 = (combined_df[columns_to_check] > 0).any(axis=1)

# Get the columns after the 11th column
columns_after_11th = combined_df.columns[11:-4]

# Filter for rows where all columns after the 11th column are 0
condition2 = (combined_df[columns_after_11th].fillna(0) == 0).all(axis=1)

# Combine both conditions
combined_df = combined_df[condition1 | condition2]

# Save File

In [16]:
combined_df.to_csv('./data/combined_tabular_data.csv', index=False)