In [1]:
import os
import glob 
import re

import earthpy as et
import geojson
import pandas as pd
import geopandas as gpd

In [2]:
pwd

'/Users/robynmarowitz/projects/tempo-site/data/textile-source'

In [3]:
os.chdir("../monthly")
# print(glob.glob('*'))

In [4]:
file_list = glob.glob("*.geojson")
file_list

['HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_022024_15Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_062024_01Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_082023_11Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_082024_18Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_052024_18Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_072024_13Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_112023_19Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_012024_22Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_042024_16Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_082023_23Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_102023_17Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_072024_21Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_052024_11Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_082024_11Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_082023_18Z_V3.geojson',
 'HAQ_TEMPO_NO2_CONUS_QA75_L3_Monthly_032024_20Z_V3.geojson',
 'HAQ_TE

In [5]:
def extract_date_from_filename(filename):
    # Use regular expression to capture the date between 'Monthly_' and '_V3'
    match = re.search(r'Monthly_(\d{6})', filename)
    if match:
        return match.group(1)
    else:
        return None  # In case the date is not found

In [6]:
def extract_time_from_filename(filename):
    match = re.search(r'(\d{2}Z)', filename)
    if match:
        return match.group(1)
    else:
        return None  # In case the time is not found

In [7]:
gdfs = []
for file in file_list:
    # print(file)
    gdf = gpd.read_file(file)
    # Extract the date from the filename
    date_str = extract_date_from_filename(file)
    time_str = extract_time_from_filename(file)
    gdf['date'] = pd.to_datetime(date_str, format='%m%Y')
    gdf['time'] = time_str
    gdfs.append(gdf)

gdfs

gdfs_cleaned = [gdf.dropna(axis=1, how='all') for gdf in gdfs]
gdfs_cleaned
# monthly_gdf = pd.concat(gdfs, ignore_index=True)


[     OBJECTID         FIPS  FIPS_new  field_avg  \
 0           5  08035014300      8035   0.211645   
 1           6  08035014107      8035   0.841271   
 2           7  08035013904      8035   1.481537   
 3           8  08035014503      8035   1.074619   
 4          11  08035014108      8035   0.684989   
 ..        ...          ...       ...        ...   
 863      1435  08001008526      8001   2.576503   
 864      1436  08001060100      8001   2.317013   
 865      1437  08001060200      8001   2.380379   
 866      1444  08001008538      8001   5.211911   
 867      1445  08001015000      8001   3.605650   
 
                                               geometry       date time  
 0    POLYGON ((-105.32923 39.1297, -105.32946 39.12... 2024-02-01  15Z  
 1    POLYGON ((-104.98894 39.5574, -104.98891 39.55... 2024-02-01  15Z  
 2    POLYGON ((-104.78338 39.52756, -104.78339 39.5... 2024-02-01  15Z  
 3    POLYGON ((-104.86821 39.41264, -104.86813 39.4... 2024-02-01  15Z  
 4  

In [8]:
monthly_gdf = pd.concat(gdfs_cleaned, ignore_index=True)
monthly_gdf

Unnamed: 0,OBJECTID,FIPS,FIPS_new,field_avg,geometry,date,time
0,5,08035014300,8035,0.211645,"POLYGON ((-105.32923 39.1297, -105.32946 39.12...",2024-02-01,15Z
1,6,08035014107,8035,0.841271,"POLYGON ((-104.98894 39.5574, -104.98891 39.55...",2024-02-01,15Z
2,7,08035013904,8035,1.481537,"POLYGON ((-104.78338 39.52756, -104.78339 39.5...",2024-02-01,15Z
3,8,08035014503,8035,1.074619,"POLYGON ((-104.86821 39.41264, -104.86813 39.4...",2024-02-01,15Z
4,11,08035014108,8035,0.684989,"POLYGON ((-104.99159 39.55006, -104.99174 39.5...",2024-02-01,15Z
...,...,...,...,...,...,...,...
159707,1435,08001008526,8001,4.162874,"POLYGON ((-104.98812 39.95758, -104.9881 39.95...",2024-01-01,17Z
159708,1436,08001060100,8001,4.244451,"POLYGON ((-105.01685 39.95312, -105.01687 39.9...",2024-01-01,17Z
159709,1437,08001060200,8001,4.249065,"POLYGON ((-105.03442 39.92048, -105.03442 39.9...",2024-01-01,17Z
159710,1444,08001008538,8001,4.441342,"POLYGON ((-104.81852 39.89877, -104.81847 39.8...",2024-01-01,17Z


In [9]:
# I only want fips 8013 and and 8031
fips_to_keep = ["8013", "8031"] 
monthly_gdf["FIPS_new"] = monthly_gdf["FIPS_new"].astype(str)
filtered_gdf = monthly_gdf[monthly_gdf["FIPS_new"].isin(fips_to_keep)]
filtered_gdf

Unnamed: 0,OBJECTID,FIPS,FIPS_new,field_avg,geometry,date,time
18,61,08013013703,8013,,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",2024-02-01,15Z
19,62,08013013705,8013,,"POLYGON ((-105.69436 39.95016, -105.69437 39.9...",2024-02-01,15Z
20,63,08013013706,8013,0.279966,"POLYGON ((-105.51202 40.02009, -105.51144 40.0...",2024-02-01,15Z
23,67,08013012206,8013,1.383906,"POLYGON ((-105.27278 40.01727, -105.27277 40.0...",2024-02-01,15Z
24,68,08013012107,8013,1.492140,"POLYGON ((-105.2727 40.03642, -105.2727 40.036...",2024-02-01,15Z
...,...,...,...,...,...,...,...
159686,1414,08031008305,8031,3.920340,"POLYGON ((-104.83571 39.79165, -104.83572 39.7...",2024-01-01,17Z
159687,1415,08031008306,8031,3.573776,"POLYGON ((-104.8471 39.78928, -104.8471 39.789...",2024-01-01,17Z
159703,1431,08031011902,8031,2.439372,"POLYGON ((-105.07198 39.65996, -105.07198 39.6...",2024-01-01,17Z
159704,1432,08031011903,8031,1.968481,"POLYGON ((-105.08147 39.66858, -105.08146 39.6...",2024-01-01,17Z


In [10]:
census_gdf = gpd.read_file('../preprocess/Colorado_Census_Tract_Boundaries.geojson')
census_gdf

Unnamed: 0,OBJECTID,FIPS,geometry
0,1,08041002300,"POLYGON ((-104.83616 38.823, -104.83614 38.822..."
1,2,08041006600,"POLYGON ((-104.88196 38.8566, -104.882 38.8564..."
2,3,08041006100,"POLYGON ((-104.75753 38.83877, -104.75753 38.8..."
3,4,08041006200,"POLYGON ((-104.74872 38.82611, -104.74872 38.8..."
4,5,08035014300,"POLYGON ((-105.32923 39.1297, -105.32946 39.12..."
...,...,...,...
1442,1443,08111972600,"POLYGON ((-107.97507 37.68365, -107.97465 37.6..."
1443,1444,08001008538,"POLYGON ((-104.81852 39.89877, -104.81847 39.8..."
1444,1445,08001015000,"POLYGON ((-104.98885 39.79109, -104.98886 39.7..."
1445,1446,08097000500,"POLYGON ((-106.98097 39.33399, -106.98093 39.3..."


In [11]:
df = pd.read_csv('../preprocess/state_and_county_fips_master.csv')
df

Unnamed: 0,fips,name,state
0,0,UNITED STATES,
1,1000,ALABAMA,
2,1001,Autauga County,AL
3,1003,Baldwin County,AL
4,1005,Barbour County,AL
...,...,...,...
3190,56037,Sweetwater County,WY
3191,56039,Teton County,WY
3192,56041,Uinta County,WY
3193,56043,Washakie County,WY


In [12]:
co_df = df[df['state']=='CO'] # Filter to only Colorado
# Create new column with County FIPS from tract fips
census_gdf['FIPS_new'] = census_gdf['FIPS'].str[:5].str.lstrip('0').astype(int)
census_gdf

Unnamed: 0,OBJECTID,FIPS,geometry,FIPS_new
0,1,08041002300,"POLYGON ((-104.83616 38.823, -104.83614 38.822...",8041
1,2,08041006600,"POLYGON ((-104.88196 38.8566, -104.882 38.8564...",8041
2,3,08041006100,"POLYGON ((-104.75753 38.83877, -104.75753 38.8...",8041
3,4,08041006200,"POLYGON ((-104.74872 38.82611, -104.74872 38.8...",8041
4,5,08035014300,"POLYGON ((-105.32923 39.1297, -105.32946 39.12...",8035
...,...,...,...,...
1442,1443,08111972600,"POLYGON ((-107.97507 37.68365, -107.97465 37.6...",8111
1443,1444,08001008538,"POLYGON ((-104.81852 39.89877, -104.81847 39.8...",8001
1444,1445,08001015000,"POLYGON ((-104.98885 39.79109, -104.98886 39.7...",8001
1445,1446,08097000500,"POLYGON ((-106.98097 39.33399, -106.98093 39.3...",8097


In [16]:
filtered_gdf = filtered_gdf.to_crs(census_gdf.crs)
filtered_gdf

census_gdf['FIPS_new'] = census_gdf['FIPS_new'].astype('int64')
filtered_gdf['FIPS_new'] = filtered_gdf['FIPS_new'].astype('int64')

In [14]:
joined_gdf_1 = gpd.sjoin(filtered_gdf, census_gdf, how='inner', predicate='intersects')  # or use 'within', 'contains', etc.
joined_gdf_1

Unnamed: 0,OBJECTID_left,FIPS_left,FIPS_new_left,field_avg,geometry,date,time,index_right,OBJECTID_right,FIPS_right,FIPS_new_right
18,61,08013013703,8013,,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",2024-02-01,15Z,62,63,08013013706,8013
18,61,08013013703,8013,,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",2024-02-01,15Z,61,62,08013013705,8013
18,61,08013013703,8013,,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",2024-02-01,15Z,103,104,08013013704,8013
18,61,08013013703,8013,,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",2024-02-01,15Z,60,61,08013013703,8013
18,61,08013013703,8013,,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",2024-02-01,15Z,653,654,08013013602,8013
...,...,...,...,...,...,...,...,...,...,...,...
159705,1433,08031012001,8031,2.80282,"POLYGON ((-105.07682 39.63897, -105.07681 39.6...",2024-01-01,17Z,892,893,08059011904,8059
159705,1433,08031012001,8031,2.80282,"POLYGON ((-105.07682 39.63897, -105.07681 39.6...",2024-01-01,17Z,1430,1431,08031011902,8031
159705,1433,08031012001,8031,2.80282,"POLYGON ((-105.07682 39.63897, -105.07681 39.6...",2024-01-01,17Z,656,657,08031004801,8031
159705,1433,08031012001,8031,2.80282,"POLYGON ((-105.07682 39.63897, -105.07681 39.6...",2024-01-01,17Z,1387,1388,08031005503,8031


In [18]:
joined_gdf = census_gdf.merge(filtered_gdf, on='FIPS_new', how='inner')  # Change 'inner' to 'left', 'right', or 'outer' if needed

joined_gdf.head()

Unnamed: 0,OBJECTID_x,FIPS_x,geometry_x,FIPS_new,OBJECTID_y,FIPS_y,field_avg,geometry_y,date,time
0,61,8013013703,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",8013,61,8013013703,,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",2024-02-01,15Z
1,61,8013013703,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",8013,62,8013013705,,"POLYGON ((-105.69436 39.95016, -105.69437 39.9...",2024-02-01,15Z
2,61,8013013703,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",8013,63,8013013706,0.279966,"POLYGON ((-105.51202 40.02009, -105.51144 40.0...",2024-02-01,15Z
3,61,8013013703,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",8013,67,8013012206,1.383906,"POLYGON ((-105.27278 40.01727, -105.27277 40.0...",2024-02-01,15Z
4,61,8013013703,"POLYGON ((-105.65185 40.02495, -105.6528 40.02...",8013,68,8013012107,1.49214,"POLYGON ((-105.2727 40.03642, -105.2727 40.036...",2024-02-01,15Z


NameError: name 'date' is not defined