In [1]:
import altair as alt
from IPython.display import display
from vega_datasets import data
from shapely.geometry import Point,LineString,Polygon
import geopandas as gpd
import json
import pandas as pd
import matplotlib.pyplot as plt
import random
from shapely.geometry import shape, mapping

In [10]:
chicago_geojson = 'VEGALITE_DATA/chicago.geojson'
chicago_gdf = gpd.read_file(chicago_geojson)
#fix timestamp issue
chicago_gdf['created_at'] = chicago_gdf['created_at'].dt.strftime('%Y-%m-%d %H:%M:%S')
chicago_gdf['updated_at'] = chicago_gdf['updated_at'].dt.strftime('%Y-%m-%d %H:%M:%S')
new_dataframe_json = chicago_gdf.to_dict(orient='records')

# Load the TopoJSON file using geopandas
topojson_data = gpd.read_file("VEGALITE_DATA/chicago.topojson")
topojson_data.crs = 'EPSG:4326'
topojson_data['created_at'] = topojson_data['created_at'].dt.strftime('%Y-%m-%d %H:%M:%S')
topojson_data['updated_at'] = topojson_data['updated_at'].dt.strftime('%Y-%m-%d %H:%M:%S')
# Convert the TopoJSON data to a GeoJSON-like format
geojson_data = topojson_data.to_crs(epsg='4326').to_json()
print(geojson_data)

{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"id": null, "name": "Grand Boulevard", "cartodb_id": 1, "created_at": "2013-02-02 23:59:44", "updated_at": "2013-02-02 23:59:44"}, "geometry": {"type": "MultiPolygon", "coordinates": [[[[-87.6067081010941, 41.81681411268711], [-87.60670518925218, 41.81657906643606], [-87.6067001975232, 41.8163390997321], [-87.60669603774903, 41.81609913302813], [-87.6066897980878, 41.81581185427685], [-87.60668397440396, 41.81556583163083], [-87.6066769027879, 41.815300127173124], [-87.60667981462981, 41.81499392360292], [-87.6066818945169, 41.81471989222489], [-87.60667191105891, 41.81428197191497], [-87.606664007488, 41.81399507166007], [-87.6066561039171, 41.81366085935786], [-87.6066511121881, 41.81342089265389], [-87.60664695241395, 41.81317297752597], [-87.60664279263979, 41.812954963611965], [-87.60663488906889, 41.81265595147295], [-87.60662906538506, 41.81240387288487], [-87.6066249056109, 41.8122411194421

In [11]:
# Parse the GeoJSON data
data = json.loads(geojson_data)

# Extract 'cartodb_id' values
cartodb_ids = []
for feature in data['features']:
    if 'properties' in feature:
        properties = feature['properties']
        if 'cartodb_id' in properties:
            cartodb_ids.append({properties['cartodb_id'],properties['name']})

            
# Assuming topojson_data is your data structure
for feature in topojson_data["features"]["properties"]["geometry"]:
    # Extract the arcs for the current geometry
    arcs = topojson_data["arcs"]

    # Create a Shapely geometry object from the arcs
    geometry = shape({"type": feature["type"], "arcs": feature["arcs"]})

    # Calculate the centroid of the geometry
    centroid = list(geometry.centroid.coords[0])

    # Add the "centroid" property to the feature
    feature["properties"]["centroid"] = centroid
    
# Print the list of 'cartodb_id' values
print(cartodb_ids)

KeyError: 'features'

In [None]:
with open('VEGALITE_DATA/Community-Chicago.geojson', 'r') as f:
    geojson_data = json.load(f)

for feature in geojson_data['features']:
    properties = feature['properties']
    community = properties.get('community', 'N/A')  # Use 'N/A' if the key is not present
    area_num = properties.get('area_num_1', 'N/A')  # Use 'N/A' if the key is not present
    #print(f"Community: {community}, Area Num: {area_num}")

print(topojson_data)

In [None]:



spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",


    "width": 1000,
    "height": 1000,
    
    
    "data": {
        "values":topojson_data,
        "format": {"type": "geojson", "feature": "chicago"}
    },
      "projection": {"type": "mercator"},
      "mark": "geoshape",
      "encoding": {      
          "color": {"field": "properties.cartodb_id", "type": "nominal"},
          "tooltip": {"field": "properties.name", "title": "hello"}
      }
    
    }
display(alt.display.html_renderer(spec), raw=True)

In [None]:
#filter out the non chicago TRI datapoints
chicago_boundary_geojson = 'VEGALITE_DATA/Boundaries-Chicago.geojson'
chicago_boundary_gdf = gpd.read_file(chicago_boundary_geojson)
chicago_boundary_gdf.crs = "EPSG:4326"
#print(chicago_boundary_gdf)

org_data = pd.read_csv('VEGALITE_DATA/2021_TRI_FACILITIES.csv')


required_data = ['12. LATITUDE', '13. LONGITUDE','20. INDUSTRY SECTOR']
new_dataframe = org_data[required_data].copy()
new_dataframe.columns = ['lat', 'long','sector']


points = [Point(xy) for xy in zip(new_dataframe['long'],new_dataframe['lat'])] 
new_dataframe = gpd.GeoDataFrame(new_dataframe,geometry=points, crs="EPSG:4326")
#print(new_dataframe)
# Perform the spatial join to find points within the polygons
within_points = gpd.sjoin(new_dataframe, chicago_boundary_gdf, op='within')

print(within_points.columns)



# Create a Matplotlib figure
fig, ax = plt.subplots()

# Plot the Chicago boundary GeoJSON
chicago_boundary_gdf.plot(ax=ax, color='lightgray', edgecolor='black', linewidth=1)

# Plot the data points on top of the boundary
new_dataframe.plot(ax=ax, color='red', markersize=2)
within_points.plot(ax=ax, color='blue', markersize=5)

# Set axis labels
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_aspect('equal')

# Show the Matplotlib plot
plt.show()


required_data = ['lat', 'long','sector']
new_dataframe = within_points[required_data].copy()
new_dataframe.columns = ['lat', 'long','sector']

new_geodataframe = within_points[['sector', 'geometry']].copy()
print(new_geodataframe)
# Print or further process the 'within_points' GeoDataFrame
#print("printing points df")
#print(new_dataframe)

In [None]:
result = gpd.sjoin(new_geodataframe, chicago_gdf, how='left', op='within')
point_counts = result.groupby('cartodb_id').size().reset_index(name='point_count')
#print(point_counts)
print(type(point_counts))
# Create a dictionary to map cartodb_id to point_count
point_count_dict = {} 
for index, row in point_counts.iterrows():
    point_count_dict[row['cartodb_id']] = row['point_count']




    


In [None]:
def assign_range(x,y):
    if (0 <= y < 30) and (0 <= x < 30):
        return '1-1'
    elif (0 <= y < 30) and (30 <= x < 60):
        return '2-1'
    elif (0 <= y < 30) and (60 <= x < 100):
        return '3-1'
    elif (30 <= y < 60) and (0 <= x < 30):
        return '1-2'
    elif (30 <= y < 60) and (30 <= x < 60):
        return '2-2'
    elif (30 <= y < 60) and (60 <= x < 100):
        return '3-2'
    elif (60 <= y < 100) and (0 <= x < 30):
        return '1-3'
    elif (60 <= y < 100) and (30 <= x < 60):
        return '2-3'
    elif (60 <= y < 100) and (60 <= x < 100):
        return '3-3'
    
    else:
        return None

In [None]:
#complaint_counts = data['COMPLAINT TYPE'].value_counts().reset_index()
#complaint_counts.columns = ['complaint_type', 'count']
    
new_dataframe_json = new_dataframe.to_dict(orient='records')

with open('VEGALITE_DATA/chicago.topojson', 'r') as f:
    topojson_data = json.load(f)



# Update the JSON data with point_count
for feature in topojson_data["objects"]["chicago"]["geometries"]:
    cartodb_id = feature["properties"]["cartodb_id"]
    point_count_str = str(point_count_dict.get(cartodb_id, 0))  #make point_count JSON serializable
    feature["properties"]["point_count"] = point_count_str
    feature["properties"]["random_data_count"] = random.randint(1, 100)
    feature["properties"]["random_data_count_2"] = random.randint(1, 100)
    
# Add a new column 'new_column' based on the mapping function
for item in topojson_data["objects"]["chicago"]["geometries"]:
    prop = item["properties"]
    prop['chloropeth_colour'] = assign_range(int(prop['random_data_count_2']),int(prop['random_data_count']))

# Convert the modified JSON data back to a JSON string
#result_json = json.dumps(topojson_data)
print(topojson_data)

In [None]:
''''
custom_pal3 <- c(
  "1-1" = "#d3d3d3", # low x, low y
  "2-1" = "#ba8890",
  "3-1" = "#9e3547", # high x, low y
  "1-2" = "#8aa6c2",
  "2-2" = "#7a6b84", # medium x, medium y
  "3-2" = "#682a41",
  "1-3" = "#4279b0", # low x, high y
  "2-3" = "#3a4e78",
  "3-3" = "#311e3b" # high x, high y
)
'''

In [None]:
spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",

    "width": 500,
    "height": 500,
    
    "layer": [

    {
        "data": {
            #"url":'https://raw.githubusercontent.com/siddhantxshirguppe/VEGALITE_DATA/main/chicago.topojson',
            "values":topojson_data,
            "format": {"type": "topojson", "feature": "chicago"}
        },
          "projection": {"type": "mercator"},
          "mark": "geoshape",
          "encoding": {

           "color": {
                  "field": "properties.chloropeth_colour",
                  "type": "nominal",
                  #"scale": {"type": "linear",  "range": ["silver","purple","teal","blue"]}
                   #"scale": {"type": "linear",  "range": ["#e7e1ef", "#c994c7", "#dd1c77", "#980043"]}
                 "scale": {
                        "domain": ["1-1", "2-1", "3-1", "1-2", "2-2", "3-2", "1-3", "2-3", "3-3"],
                        "range": ["#d3d3d3", "#ba8890", "#9e3547", "#8aa6c2", "#7a6b84", "#682a41", "#4279b0","#3a4e78","#311e3b"]
                      }
                },
              
            "tooltip": {"field": "properties.name", "title": "hello"}
          }
    },
          
    {
        "data": {
            "values": new_dataframe_json
          },
        "mark": "circle",
        "encoding": {
            "color": {"field": "sector", "type": "nominal"},
            "longitude": {
              "field": "long",
              "type": "quantitative"
            },
            "latitude": {
              "field": "lat",
              "type": "quantitative"
            },
            "projection": {"type": "mercator"},
            "size": {"value": 50}
        }

    }
          
  ]
    
    

}

display(alt.display.html_renderer(spec), raw=True)

In [13]:
with open('VEGALITE_DATA/chicago.geojson', 'r') as f:
    geojson_data = json.load(f)

features = geojson_data['features']

for feature in features:
    geometry = shape(feature['geometry'])
    if geometry.type == 'MultiPolygon':
        # Calculate the centroid
        centroid = geometry.centroid
        # Get the latitude and longitude of the centroid
        centroid_latitude = centroid.y
        centroid_longitude = centroid.x
        # Add the 'centroid' property with 'latitude' and 'longitude' children
        feature['properties']['centroid'] = {
            'latitude': centroid_latitude,
            'longitude': centroid_longitude
        }
for feature in geojson_data['features']:
    feature['properties']['poverty_percent'] = random.randint(1, 100)

    
    
print(geojson_data)
        

  if geometry.type == 'MultiPolygon':


{'type': 'FeatureCollection', 'features': [{'type': 'Feature', 'properties': {'name': 'Grand Boulevard', 'cartodb_id': 1, 'created_at': '2013-02-02T23:59:44.022Z', 'updated_at': '2013-02-02T23:59:44.295Z', 'centroid': {'latitude': 41.8129494082601, 'longitude': -87.61785971664077}, 'poverty_percent': 33}, 'geometry': {'type': 'MultiPolygon', 'coordinates': [[[[-87.606708, 41.816814], [-87.606705, 41.816579], [-87.6067, 41.816339], [-87.606696, 41.816099], [-87.60669, 41.815812], [-87.606684, 41.815566], [-87.606677, 41.8153], [-87.60668, 41.814994], [-87.606682, 41.81472], [-87.606672, 41.814282], [-87.606664, 41.813995], [-87.606656, 41.813661], [-87.606651, 41.813421], [-87.606647, 41.813173], [-87.606643, 41.812955], [-87.606635, 41.812656], [-87.606629, 41.812404], [-87.606625, 41.812241], [-87.606623, 41.812134], [-87.606619, 41.811936], [-87.606613, 41.811736], [-87.606609, 41.811578], [-87.606605, 41.811354], [-87.606598, 41.810991], [-87.606591, 41.810697], [-87.606586, 41.8104

In [18]:
        
spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",

    "width": 500,
    "height": 500,
    
     "data": {
            "values":geojson_data['features'],
            "format": {"type": "json"}
        },

    "layer": [

    {
       
          "projection": {"type": "mercator"},
              "mark": {
                  "type": "geoshape",
                  #"fill": "#eee",
                  "stroke": "#757575",
                  "strokeWidth": 0.8
                },
        
          "encoding": {

           "color": {
                  "field": "properties.poverty_percent",
                  "type": "quantitative",
                  #"scale": {"type": "linear",  "range": ["silver","purple","teal","blue"]}
                   #"scale": {"type": "linear",  "range": ["#e7e1ef", "#c994c7", "#dd1c77", "#980043"]}
               #"scale": {"type": "linear",  "range": ["#1c9800", "#1c9800", "#000a98", "#980023"]}
               "scale": {"type": "quantile", "range": ["red", "yellow", "green", "blue"], "domain": [0, 25, 50, 75,100]}
               #"scale": {"type": "quantile", "scheme": [["red", "blue"],["yellow","green"]], "domain": [[1, 100],[1,100]]}
                },
              
              
              
            "tooltip": {"field": "properties.cartodb_id", "type": "quantative"}
          }
    },
        
    {
  

        "mark": "circle",
        "encoding": {
            
            "color": {
                  "field": "properties.cartodb_id",
                  "type": "quantitative",
                  #"scale": {"type": "linear",  "range": ["silver","purple","teal","blue"]}
                   #"scale": {"type": "linear",  "range": ["#e7e1ef", "#c994c7", "#dd1c77", "#980043"]}
               "scale": {"type": "quantile",  "range": ["#1c9800", "#1c9800", "#000a98", "#980023"]}
                },
            "size": {"field": "properties.poverty_percent", "type": "quantitative"},
            "longitude": {
              "field": "properties.centroid.longitude",
              "type": "quantitative"
            },
            "latitude": {
              "field": "properties.centroid.latitude",
              "type": "quantitative"
            },
            "projection": {"type": "mercator"},
           
        }

    }
          
  ]
    
    

}

display(alt.display.html_renderer(spec), raw=True)

In [31]:
        
spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",

    "width": 500,
    "height": 500,
    
     "data": {
            "values":geojson_data['features'],
            "format": {"type": "json"}
        },
    
      "concat": [
    {
      "title": "Chart 1",
      "facet": {
        "column": {"field": "properties.cartodb_id", "bin": [0, 50,100]},
      },
      "spec": {
  
        "mark": "circle",
        "encoding": {
            
            "color": {
                  "field": "properties.cartodb_id",
                  "type": "quantitative",
                  #"scale": {"type": "linear",  "range": ["silver","purple","teal","blue"]}
                   #"scale": {"type": "linear",  "range": ["#e7e1ef", "#c994c7", "#dd1c77", "#980043"]}
               "scale": {"type": "linear",  "range": ["#1c9800", "#1c9800", "#000a98", "#980023"]}
                },
            "size": {"field": "properties.poverty_percent", "type": "quantitative"},
            "longitude": {
              "field": "properties.centroid.longitude",
              "type": "quantitative"
            },
            "latitude": {
              "field": "properties.centroid.latitude",
              "type": "quantitative"
            },
            "projection": {"type": "mercator"},
           
        }

    }
          
  
      }
    
  ],
    
    


    
    

}

display(alt.display.html_renderer(spec), raw=True)

In [49]:
        
spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",

    "width": 500,
    "height": 500,
    
     "data": {
            "values":geojson_data['features'],
            "format": {"type": "json"}
        },
    
      "concat": [
    {
      "title": "Chart 1",
      "facet": {
        "column": {"field": "properties.cartodb_id", "bin": [0, 50,100]},
      },
      "spec": {
    "layer": [

    {
       
          
          "mark": {
              "type": "geoshape"
            },
        
          "encoding": {

           "color": {
                  "field": "properties.cartodb_id",
                  "type": "quantitative",
                   "scale": {"type": "linear",  "range": ["#1c9800", "#1c9800", "#000a98", "#980023"]}
                },
              
            "tooltip": {"field": "properties.cartodb_id", "type": "quantative"}
          },
        "projection": {"type": "mercator"},
    },
        
    {
  
        "mark": "circle",
        "encoding": {
            
            "color": {
                  "field": "properties.cartodb_id",
                  "type": "quantitative",
               "scale": {"type": "linear",  "range": ["#1c9800", "#1c9800", "#000a98", "#980023"]}
                },
            "size": {"field": 0, "type": "quantitative"},
            "longitude": {
              "field": "properties.centroid.longitude",
              "type": "quantitative"
            },
            "latitude": {
              "field": "properties.centroid.latitude",
              "type": "quantitative"
            },
            "projection": {"type": "mercator"},
           
        }

    }
          
  ]
      }
    }
  ],
    
    


    
    

}

display(alt.display.html_renderer(spec), raw=True)