# Introduction: Light down Singapore
Light pollution is an often overlooked form of pollution that has many harmful effects on people and ecosystems. In this project, I analyze satellite data to understand how light pollution has affected Singapore and other countries.

## Step 1: Installing & importing the packages

In [1]:
!pip install pandas
!pip install numpy
!pip install rasterio
!pip install geopandas

import pandas as pd
import numpy as np
import rasterio
import geopandas as gpd
from shapely.geometry import Point
import os
import re



## Step 2: Preparing the data
We use rasterio to open the GeoTiff file, which contains the coordinates and brightness levels for the whole world.
We use a GeoJSON file, containing the coordinates of only Singapore to do a basic filtering and then create a dataframe to map the brightness to coordinates.

In [2]:
import geopandas as gpd
import rasterio
import numpy as np
import pandas as pd
import os
from shapely.geometry import Point

# Opening the JSON file
sgcoords = gpd.read_file(r"C:\Users\weisg\OneDrive\Documents\Y2S1\Projects\LightPollution\Data\sg.json")
sgcoords = sgcoords.to_crs("EPSG:4326")

# Function to generate a grid of coordinates over each location
def gencoords(polygon, spacing=0.001):
    minx, miny, maxx, maxy = polygon.bounds
    x_vals = np.arange(minx, maxx, spacing)
    y_vals = np.arange(miny, maxy, spacing)
    points = [Point(x, y) for x in x_vals for y in y_vals if polygon.contains(Point(x, y))]
    return points

grid_points = []
region_names = []

# Generate grid points for all polygons
for _, row in sgcoords.iterrows():
    polygon = row['geometry']
    name = row['properties']['name'] if 'properties' in row and 'name' in row['properties'] else row.get('name', 'unknown')
    points = gencoords(polygon)
    grid_points.extend(points)
    region_names.extend([name] * len(points))

# Prepare a base GeoDataFrame with all grid points
base_gdf = gpd.GeoDataFrame({'region': region_names, 'geometry': grid_points}, crs='EPSG:4326')

# Folder with GeoTIFF files
tif_folder = r"C:\Users\weisg\OneDrive\Documents\Y2S1\Projects\LightPollutiondata"
tif_files = [f for f in os.listdir(tif_folder) if f.endswith(".tif")]

all_years_data = []

for tif_file in tif_files:
    # Extract year from filename (e.g., lightpollution_2015.tif)
    year = ''.join(filter(str.isdigit, tif_file))
    if not year:
        continue  # Skip files with no year in name

    dataset = rasterio.open(os.path.join(tif_folder, tif_file))
    coords = [(point.x, point.y) for point in base_gdf.geometry]
    
    brightness = []
    for val in dataset.sample(coords):
        brightness.append(val[0] if val[0] > 0 else None)

    # Create a copy of base GeoDataFrame and add brightness + year
    year_gdf = base_gdf.copy()
    year_gdf['Brightness'] = brightness
    year_gdf['Year'] = int(year)

    # Filter out null brightness
    year_gdf = year_gdf[year_gdf['Brightness'].notnull()]
    all_years_data.append(year_gdf)

## Step 3: Create the dataframe
After cleaning and filtering the data, we just have to choose the rows we want in our dataframe and export it.

In [3]:
# Combine all years' data
final_gdf = pd.concat(all_years_data, ignore_index=True)

# Export to CSV
final_gdf.to_csv("brightness_by_year.csv", index=False)