In [20]:
import pandas as pd
from long_lat_multifamily_la2 import longitude_latitude_dict
from panoclusterref import pano_to_cluster_dict
from clusterpanoref import cluster_to_pano_dict
from control import projecttocontrolclusters_dict
from treatment import projecttotreatmentclusters_dict

In [21]:
filteredprojects_df = pd.read_csv('filtered_projects_2014_2021.csv')

filteredprojects_df

Unnamed: 0,project,location,status,earliest_date,month_year,year
0,f98fc467_1600-1616-e-florence-avenue,"1600 Florence Ave Los Angeles, CA 90001",Under construction,2016-10-14,2016-10,2016.0
1,a89d0157_1043-1049-s-harvard-boulevard,"1043 S Harvard Blvd Los Angeles, CA 90006",Under construction,2020-02-05,2020-02,2020.0
2,389bc14c_4804-oakwood-avenue,"4804 Oakwood Ave Los Angeles, CA 90004",Under construction,2019-01-31,2019-01,2019.0
3,b7a0d5b3_7617-santa-monica,"7617 Santa Monica Blvd West Hollywood, CA 9...",Under construction,2018-07-27,2018-07,2018.0
4,942f9c21_the-residences-on-jamboree-2801-kelvin,"2801 Kelvin Avenue Irvine, CA 92614",Completed,2016-03-31,2016-03,2016.0
...,...,...,...,...,...,...
1017,3b9e3e30_14540-erwin-street,"14540 Erwin St Los Angeles, CA 91411",Under construction,2019-10-24,2019-10,2019.0
1018,e2e38561_425-palm-drive,"425 N Palm Dr Beverly Hills, CA 90210",Under construction,2019-07-19,2019-07,2019.0
1019,e374e0f1_blossom-plaza-900-n-broadway,"900 North Broadway Los Angeles, CA 90012",Completed,2014-02-10,2014-02,2014.0
1020,f2e13a5e_metamorphosis-on-foothill,"13574 Foothill Blvd Los Angeles, CA 91342",Under construction,2019-10-16,2019-10,2019.0


In [22]:
treecoveragestats_df = pd.read_csv('treecoveragestats.csv')
treecoveragestats_df

Unnamed: 0,panoid,tree,terrain,car
0,9doxnenYxPJYu4e4V4nHKQ,0.000600,0.000000,13.164266
1,WpVaExkx_14HmAvJA7Zszw,18.982510,1.742198,13.564729
2,zT-h45giZrhkQnIhKZzTog,9.653549,0.421725,3.379630
3,w6fz4TzZt5HX2_w9PY39yg,36.615398,3.932442,15.089249
4,oFPRw68XZZKHz5HO_He-0Q,8.288237,0.227366,6.129887
...,...,...,...,...
145242,6XhYDmU7PAzvqeTvDD98ug,5.483025,1.140261,2.911008
145243,ifSC0lJjuj9uddVewkiO_Q,9.778721,0.016204,0.000000
145244,rx04nMQb9ekN_sWeMrOklQ,0.156121,0.000000,7.843621
145245,K-X0uLwW_hrO-N0LMO8RJA,0.988340,0.000000,0.021433


In [23]:
panoswithdates_df = pd.read_csv('panoswithdates.csv')
panoswithdates_df


Unnamed: 0,pano_id,lat,lon,date
0,bUGfUXM-ojSorfEF7bwIBQ,34.036371,-118.454006,
1,hRw1KV1SLiM3EXYZCCwNlg,34.036445,-118.454069,
2,42xb66Sof9Y3pPVs1Cuncw,34.036298,-118.453943,
3,p1beIhVTANaYc69iUhsBTg,34.035921,-118.453612,
4,oSlFbp50Wb7VW2fCB9ed6A,34.035995,-118.453676,
...,...,...,...,...
2666124,6nSIiCOfwhl8h5TSjCDQRQ,33.999007,-118.444062,2015-03
2666125,0KWfB3XQ1cYOjBCswbXDWg,33.999014,-118.444049,2016-02
2666126,K93NB1VzLXCSqAZrQhXK5w,33.999000,-118.444074,2017-11
2666127,jbDsPVmDsj8rCbB0PQ6Z7Q,33.999001,-118.444069,2019-04


In [24]:
# Create an empty DataFrame to store the merged data
merged_data = pd.DataFrame(columns=[
    'pano_id', 
    'project_id', 
    'year_pano', 
    'year_project', 
    'tree_coverage', 
    'terrain_coverage', 
    'car_coverage', 
    'distance_to_project', 
    'treatment_control',
    'treated'
])

# Export the empty DataFrame to CSV
merged_data.to_csv('merged_dataset.csv', index=False)


In [25]:
# make pano data lookup table
# Create dictionaries for tree, terrain, and car data with panoid as the key
panoid_to_tree_dict = dict(zip(treecoveragestats_df['panoid'], treecoveragestats_df['tree']))
panoid_to_terrain_dict = dict(zip(treecoveragestats_df['panoid'], treecoveragestats_df['terrain']))
panoid_to_car_dict = dict(zip(treecoveragestats_df['panoid'], treecoveragestats_df['car']))


# Create a dictionary for dates with pano_id as the key
# Convert date format from YYYY-MM to YYYY or keep as NaN
panoid_to_date_dict = {}
for pano_id, date in zip(panoswithdates_df['pano_id'], panoswithdates_df['date']):
    if pd.notna(date) and isinstance(date, str) and len(date) >= 4:
        # Extract just the year (first 4 characters) from the date string
        panoid_to_date_dict[pano_id] = date[:4]
    else:
        panoid_to_date_dict[pano_id] = float('nan')



# Iterate through the locations in filteredprojects_df
for index, row in filteredprojects_df.iterrows():
    print(f"processing project {index}")
    location = row['location']
    project_name = row['project']
    status = row['status']
    year = row['year']

    coords = longitude_latitude_dict[location]
    lat = coords[0]
    lon = coords[1]

    try:
        treatment_cluster_coords_ls = projecttotreatmentclusters_dict[(lat, lon)]
    except KeyError:
        # Handle case where coordinates are not in the dictionary
        treatment_cluster_coords_ls = []  # Empty list if no treatment clusters exist
    #get all the clusters in treatment zone of project
    for treatment_cluster_coords in treatment_cluster_coords_ls:
        treatment_cluster_coord = treatment_cluster_coords[0]
        distance_toproject = treatment_cluster_coords[1]
        lat = treatment_cluster_coord[0]
        lon = treatment_cluster_coord[1]

        treatment_cluster_pano_ls = cluster_to_pano_dict[(lat, lon)]

        #get all the panos in cluster
        for pano in treatment_cluster_pano_ls:
            pano_id = pano
            project_id = index
            year_pano = panoid_to_date_dict[pano]
            year_project = year
            tree_coverage = panoid_to_tree_dict[pano]
            terrain_coverage = panoid_to_terrain_dict[pano]
            car_coverage = panoid_to_car_dict[pano]
            treatment_control = 1
            if pd.notna(year_pano) and pd.notna(year_project) and float(year_pano) >= float(year_project):
                treated = 1
            else:
                treated = 0

            # Add the treatment data to the results
            # Append to results list for later dataframe creation
    
            
            # Also directly append to merged_dataset.csv using pandas
            pd.DataFrame([{
                'pano_id': pano_id,
                'project_id': project_id,
                'year_pano': year_pano,
                'year_project': year_project,
                'tree_coverage': tree_coverage,
                'terrain_coverage': terrain_coverage,
                'car_coverage': car_coverage,
                'treatment_control': treatment_control,
                'treated': treated,
                'distance_to_project': distance_toproject
            }]).to_csv('merged_dataset.csv', mode='a', header=False, index=False)
            
    # Now handle the control clusters for this project
    try:
        control_cluster_coords_ls = projecttocontrolclusters_dict[(lat, lon)]
    except KeyError:
        # Handle case where coordinates are not in the dictionary
        control_cluster_coords_ls = []  # Empty list if no control clusters exist
    # Get all the clusters in control zone of project
    for control_cluster_coords in control_cluster_coords_ls:
        control_cluster_coord = control_cluster_coords[0]
        distance_toproject = control_cluster_coords[1]
        lat = control_cluster_coord[0]
        lon = control_cluster_coord[1]
        
        control_cluster_pano_ls = cluster_to_pano_dict[(lat, lon)]
        
        # Get all the panos in control cluster
        for pano in control_cluster_pano_ls:
            pano_id = pano
            project_id = index
            year_pano = panoid_to_date_dict[pano]
            year_project = year
            tree_coverage = panoid_to_tree_dict[pano]
            terrain_coverage = panoid_to_terrain_dict[pano]
            car_coverage = panoid_to_car_dict[pano]
            treatment_control = 0  # This is a control
            treated = 0
                
            pd.DataFrame([{
                'pano_id': pano_id,
                'project_id': project_id,
                'year_pano': year_pano,
                'year_project': year_project,
                'tree_coverage': tree_coverage,
                'terrain_coverage': terrain_coverage,
                'car_coverage': car_coverage,
                'treatment_control': treatment_control,
                'treated': treated,
                'distance_to_project': distance_toproject
            }]).to_csv('merged_dataset.csv', mode='a', header=False, index=False)
            




    



KeyError: (33.97286491671424, -118.2455040182438)

In [26]:
projecttocontrolclusters_dict[(33.97286491671424, -118.2455040182438)]

KeyError: (33.97286491671424, -118.2455040182438)