In [3]:
import ee
import pandas as pd
import requests
from io import StringIO

# Initialize the Earth Engine module.
# ee.Authenticate()
ee.Initialize()

# URL of the CSV file
url = 'https://raw.githubusercontent.com/orwell2024/uscrnlib/main/extract_slides/2024stations_days.csv'

# Read the CSV file from the URL
response = requests.get(url)
data = pd.read_csv(StringIO(response.text))

# Limiting to first 10 locations for testing purposes
data = data.head(4)

# Function to create and export image for a given location
def create_and_export_image(row):
    station_name = row['Station']
    latitude = row['LATITUDE']
    longitude = row['LONGITUDE']
    
    point = ee.Geometry.Point([longitude, latitude])
    buffer = point.buffer(10000).bounds()  # 5000 meters buffer for 10 km x 10 km region
    
    collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
                    .filterBounds(buffer) \
                    .filterDate('2023-04-11', '2023-6-01') \
                    .sort('CLOUDY_PIXEL_PERCENTAGE') \
                    .first()
    
    # Get the date of the image
    date = ee.Date(collection.get('system:time_start')).format('YYYY-MM-dd').getInfo()
    
    # Select the relevant bands
    image = collection.select(['B4', 'B3', 'B2']).clip(buffer)
    
    # Export the image with smaller dimensions
    export_task = ee.batch.Export.image.toDrive(
        image=image,
        description=f"{station_name}_{date}",
        folder='GEE_Images',
        scale=10,
        region=buffer.getInfo()['coordinates'],
        fileFormat='GeoTIFF',  # Export as GeoTIFF
        maxPixels=1e8
    )
    
    export_task.start()

# Apply the function to each row in the dataframe
data.apply(create_and_export_image, axis=1)

print("Export tasks have been started.")


Export tasks have been started.


In [16]:
import ee
import pandas as pd
import requests
from io import StringIO

# Initialize the Earth Engine module.
# ee.Authenticate()
ee.Initialize()

# URL of the CSV file
url = 'https://raw.githubusercontent.com/orwell2024/uscrnlib/main/extract_slides/2024stations_days.csv'

# Read the CSV file from the URL
response = requests.get(url)
data = pd.read_csv(StringIO(response.text))

# Filter the data to include only stations from Alabama (AL)
data_al = data[data['Station'].str.startswith('TX_')].copy()

# Initialize lists to store results
built_1975_percent_list = []
built_2020_percent_list = []
percentage_change_list = []

# Function to create and export image for a given location
def process_location(row):
    station_name = row['Station']
    latitude = row['LATITUDE']
    longitude = row['LONGITUDE']
    sizeKm = 10  # Size of the cell in kilometers

    # Define a point for the center of the rectangle at the specified coordinates
    centerPoint = ee.Geometry.Point([longitude, latitude])

    # Create a bounding box around the center point
    halfSideLength = (sizeKm / 2) * 1000  # Convert km to meters
    cell = centerPoint.buffer(halfSideLength).bounds()

    # Load the built-up surface images for 1975 and 2020 from the JRC GHSL dataset
    image_1975 = ee.Image('JRC/GHSL/P2023A/GHS_BUILT_S/1975').select('built_surface')
    image_2020 = ee.Image('JRC/GHSL/P2023A/GHS_BUILT_S/2020').select('built_surface')

    # Clip the built-up images to the cell
    built_1975_clipped = image_1975.clip(cell)
    built_2020_clipped = image_2020.clip(cell)

    # Calculate the average built-up value for the cell in 1975
    mean1975 = built_1975_clipped.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=cell,
        scale=30,
        maxPixels=1e9
    ).get('built_surface').getInfo()

    # Calculate the average built-up value for the cell in 2020
    mean2020 = built_2020_clipped.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=cell,
        scale=30,
        maxPixels=1e9
    ).get('built_surface').getInfo()

    # Normalize to percentage of the area (1% = 10,000 square meters per hectare)
    percentage1975 = round((mean1975 / 10000) * 100, 2) if mean1975 is not None else 0
    percentage2020 = round((mean2020 / 10000) * 100, 2) if mean2020 is not None else 0

    # Calculate the percentage change
    percentage_change = round(((percentage2020 - percentage1975) / percentage1975) * 100, 2) if percentage1975 != 0 else None

    # Append results to lists
    built_1975_percent_list.append(percentage1975)
    built_2020_percent_list.append(percentage2020)
    percentage_change_list.append(percentage_change)

    # Export the 1975 image to Google Drive
    export_task_1975 = ee.batch.Export.image.toDrive(
        image=built_1975_clipped,
        description=f'Built_up_surface_1975_{station_name}_{sizeKm}km_cell',
        folder='GEE_Images',
        scale=30,
        region=cell,
        maxPixels=1e9
    )
    export_task_1975.start()

    # Export the 2020 image to Google Drive
    export_task_2020 = ee.batch.Export.image.toDrive(
        image=built_2020_clipped,
        description=f'Built_up_surface_2020_{station_name}_{sizeKm}km_cell',
        folder='GEE_Images',
        scale=30,
        region=cell,
        maxPixels=1e9
    )
    export_task_2020.start()

# Apply the function to each row in the filtered dataframe
data_al.apply(process_location, axis=1)

# Add the results to the dataframe
data_al.loc[:, 'Built_1975_percent'] = built_1975_percent_list
data_al.loc[:, 'Built_2020_percent'] = built_2020_percent_list
data_al.loc[:, 'Percentage_Change'] = percentage_change_list

# Save the updated dataframe to a new CSV file
output_csv_path = 'updated_stations_data_al.csv'
data_al.to_csv(output_csv_path, index=False)

print("Export tasks have been started and results have been written to the CSV file.")


Export tasks have been started and results have been written to the CSV file.


In [19]:
import ee
import pandas as pd
import requests
from io import StringIO

# Initialize the Earth Engine module.
# ee.Authenticate()
ee.Initialize()

# URL of the CSV file
url = 'https://raw.githubusercontent.com/orwell2024/uscrnlib/main/extract_slides/2024stations_days.csv'

# Read the CSV file from the URL
response = requests.get(url)
data = pd.read_csv(StringIO(response.text))

# Filter the data to include only stations from Texas (TX)
data_tx = data[data['Station'].str.startswith('TX_')].copy()

# Initialize lists to store results
results = {
    "Built_1975_50km_percent": [],
    "Built_2020_50km_percent": [],
    "Percentage_Change_50km": [],
    "Built_1975_10km_percent": [],
    "Built_2020_10km_percent": [],
    "Percentage_Change_10km": [],
    "Built_1975_2km_percent": [],
    "Built_2020_2km_percent": [],
    "Percentage_Change_2km": []
}

# Function to process location and calculate built-up surface percentages
def process_location(row, sizeKm):
    latitude = row['LATITUDE']
    longitude = row['LONGITUDE']

    # Define a point for the center of the rectangle at the specified coordinates
    centerPoint = ee.Geometry.Point([longitude, latitude])

    # Create a bounding box around the center point
    halfSideLength = (sizeKm / 2) * 1000  # Convert km to meters
    cell = centerPoint.buffer(halfSideLength).bounds()

    # Load the built-up surface images for 1975 and 2020 from the JRC GHSL dataset
    image_1975 = ee.Image('JRC/GHSL/P2023A/GHS_BUILT_S/1975').select('built_surface')
    image_2020 = ee.Image('JRC/GHSL/P2023A/GHS_BUILT_S/2020').select('built_surface')

    # Clip the built-up images to the cell
    built_1975_clipped = image_1975.clip(cell)
    built_2020_clipped = image_2020.clip(cell)

    # Calculate the average built-up value for the cell in 1975
    mean1975 = built_1975_clipped.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=cell,
        scale=30,
        maxPixels=1e9
    ).get('built_surface').getInfo()

    # Calculate the average built-up value for the cell in 2020
    mean2020 = built_2020_clipped.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=cell,
        scale=30,
        maxPixels=1e9
    ).get('built_surface').getInfo()

    # Normalize to percentage of the area (1% = 10,000 square meters per hectare)
    percentage1975 = round((mean1975 / 10000) * 100, 2) if mean1975 is not None else 0
    percentage2020 = round((mean2020 / 10000) * 100, 2) if mean2020 is not None else 0

    # Calculate the percentage change
    if percentage1975 == 0:
        percentage_change = 0
    else:
        percentage_change = round(((percentage2020 - percentage1975) / percentage1975) * 100, 2)

    return percentage1975, percentage2020, percentage_change

# Process each location for different cell sizes
for index, row in data_tx.iterrows():
    # Process for 50km cell
    result_50km = process_location(row, 50)
    results["Built_1975_50km_percent"].append(result_50km[0])
    results["Built_2020_50km_percent"].append(result_50km[1])
    results["Percentage_Change_50km"].append(result_50km[2])
    
    # Process for 10km cell
    result_10km = process_location(row, 10)
    results["Built_1975_10km_percent"].append(result_10km[0])
    results["Built_2020_10km_percent"].append(result_10km[1])
    results["Percentage_Change_10km"].append(result_10km[2])
    
    # Process for 2km cell
    result_2km = process_location(row, 2)
    results["Built_1975_2km_percent"].append(result_2km[0])
    results["Built_2020_2km_percent"].append(result_2km[1])
    results["Percentage_Change_2km"].append(result_2km[2])

# Add the results to the dataframe
for key, value in results.items():
    data_tx[key] = value

# Save the updated dataframe to a new CSV file
output_csv_path = 'updated_stations_data_tx.csv'
data_tx.to_csv(output_csv_path, index=False)

print("Processing is complete and results have been written to the CSV file.")


Processing is complete and results have been written to the CSV file.


In [None]:
import ee
import pandas as pd
import requests
from io import StringIO
import re
import time, random
from datetime import datetime

# Initialize the Earth Engine module.
# ee.Authenticate()
ee.Initialize()

# URL of the data
url = 'https://data.giss.nasa.gov/gistemp/station_data_v4_globe/v4.temperature.inv.txt'

# Fetch the data from the URL
response = requests.get(url)
data_text = response.text

# Clean up the data format
data_text = data_text.replace("Station Name", "Station")
data_text = re.sub(r"[ \t]+", ";", data_text)

while ';;' in data_text:
    data_text = data_text.replace(";;", ";")

data_lines = data_text.split('\n')
data_lines = [line.rstrip(';') for line in data_lines]

cleaned_data_text = "\n".join(data_lines)

if not cleaned_data_text.startswith("ID;Lat;Lon;Elev-m;Station;BI"):
    cleaned_data_text = cleaned_data_text.replace("D;Lat;Lon;Elev-m;Station;BI", "ID;Lat;Lon;Elev-m;Station;BI", 1)

data_io = StringIO(cleaned_data_text)

valid_rows = []

for line in data_io:
    fields = line.split(';')
    if len(fields) == 6:
        valid_rows.append(fields)

data = pd.DataFrame(valid_rows, columns=["ID", "Lat", "Lon", "Elev-m", "Station", "BI"])

data['Lat'] = pd.to_numeric(data['Lat'], errors='coerce')
data['Lon'] = pd.to_numeric(data['Lon'], errors='coerce')
data['BI'] = pd.to_numeric(data['BI'], errors='coerce')

data = data.dropna(subset=['Lat', 'Lon', 'BI'])

# Initialize lists to store results
results = {
    "ID": [],
    "Station": [],
    "BI": [],
    "Built_1975_50km_percent": [],
    "Built_2020_50km_percent": [],
    "Percentage_Change_50km": [],
    "Built_1975_10km_percent": [],
    "Built_2020_10km_percent": [],
    "Percentage_Change_10km": [],
    "Built_1975_2km_percent": [],
    "Built_2020_2km_percent": [],
    "Percentage_Change_2km": []
}

# Function to process location and calculate built-up surface percentages
def process_location(row, sizeKm):
    latitude = row['Lat']
    longitude = row['Lon']

    # Define a point for the center of the rectangle at the specified coordinates
    centerPoint = ee.Geometry.Point([longitude, latitude])

    # Create a bounding box around the center point
    halfSideLength = (sizeKm / 2) * 1000  # Convert km to meters
    cell = centerPoint.buffer(halfSideLength).bounds()

    # Load the built-up surface images for 1975 and 2020 from the JRC GHSL dataset
    image_1975 = ee.Image('JRC/GHSL/P2023A/GHS_BUILT_S/1975').select('built_surface')
    image_2020 = ee.Image('JRC/GHSL/P2023A/GHS_BUILT_S/2020').select('built_surface')

    # Clip the built-up images to the cell
    built_1975_clipped = image_1975.clip(cell)
    built_2020_clipped = image_2020.clip(cell)

    # Calculate the average built-up value for the cell in 1975
    mean1975 = built_1975_clipped.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=cell,
        scale=30,
        maxPixels=1e9
    ).get('built_surface').getInfo()

    # Calculate the average built-up value for the cell in 2020
    mean2020 = built_2020_clipped.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=cell,
        scale=30,
        maxPixels=1e9
    ).get('built_surface').getInfo()

    # Normalize to percentage of the area (1% = 10,000 square meters per hectare)
    percentage1975 = round((mean1975 / 10000) * 100, 2) if mean1975 is not None else 0
    percentage2020 = round((mean2020 / 10000) * 100, 2) if mean2020 is not None else 0

    # Calculate the percentage change
    if percentage1975 == 0:
        percentage_change = 0
    else:
        percentage_change = round(((percentage2020 - percentage1975) / percentage1975) * 100, 2)

    return percentage1975, percentage2020, percentage_change

# Function to process a batch of locations
def process_batch(batch_data):
    for index, row in batch_data.iterrows():
        # Process for 50km cell
        result_50km = process_location(row, 50)
        results["ID"].append(row["ID"])
        results["Station"].append(row["Station"])
        results["BI"].append(row["BI"])
        results["Built_1975_50km_percent"].append(result_50km[0])
        results["Built_2020_50km_percent"].append(result_50km[1])
        results["Percentage_Change_50km"].append(result_50km[2])
        
        # Process for 10km cell
        result_10km = process_location(row, 10)
        results["Built_1975_10km_percent"].append(result_10km[0])
        results["Built_2020_10km_percent"].append(result_10km[1])
        results["Percentage_Change_10km"].append(result_10km[2])
        
        # Process for 2km cell
        result_2km = process_location(row, 2)
        results["Built_1975_2km_percent"].append(result_2km[0])
        results["Built_2020_2km_percent"].append(result_2km[1])
        results["Percentage_Change_2km"].append(result_2km[2])

    # Create a DataFrame for the batch results
    batch_df = pd.DataFrame(results)
    
    # Save the batch results to a CSV file
    batch_df.to_csv('updated_stations_data_tx.csv', mode='a', index=False, header=not pd.io.common.file_exists('updated_stations_data_tx.csv'))

    # Clear the results for the next batch
    for key in results.keys():
        results[key].clear()

# Process the data in batches of 1000
batch_size = 100
for start_index in range(0, len(data), batch_size):
    print ("starting batch  ", start_index, "  ", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    end_index = min(start_index + batch_size, len(data))
    batch_data = data.iloc[start_index:end_index]
    process_batch(batch_data)
    sleep_time = random.randint(20, 100)
    time.sleep(sleep_time)  # Wait for 1 minute before processing the next batch

print("Processing is complete and results have been written to the CSV file.")


starting batch   0    2024-07-02 14:42:39
starting batch   100    2024-07-02 14:46:08
starting batch   200    2024-07-02 14:48:43
starting batch   300    2024-07-02 14:52:49
starting batch   400    2024-07-02 14:56:50
starting batch   500    2024-07-02 15:00:37
starting batch   600    2024-07-02 15:05:24
starting batch   700    2024-07-02 15:09:34
starting batch   800    2024-07-02 15:13:28
starting batch   900    2024-07-02 15:17:18
starting batch   1000    2024-07-02 15:21:04
starting batch   1100    2024-07-02 15:25:22
starting batch   1200    2024-07-02 15:29:20
starting batch   1300    2024-07-02 15:33:04
starting batch   1400    2024-07-02 15:36:55
starting batch   1500    2024-07-02 15:40:29
starting batch   1600    2024-07-02 15:44:47
starting batch   1700    2024-07-02 15:48:54
starting batch   1800    2024-07-02 15:53:36
starting batch   1900    2024-07-02 15:58:32
starting batch   2000    2024-07-02 16:03:46


In [27]:
from datetime import datetime

# Print the current time
print()


2024-07-02 14:40:04
