## states.json

## Data loading

### Subtask:
Load the "states.json" file into a GeoDataFrame.


In [120]:
import geopandas as gpd
import pandas as pd
import plotly.express as px
import json
from IPython.display import display

try:
    gdf = gpd.read_file("/content/states.json")
    display(gdf.head())
    display(gdf.info())
except FileNotFoundError:
    print("Error: 'states.json' not found. Please ensure the file is in the current directory or provide the correct path.")
except Exception as e:
    print(f"An error occurred: {e}")

Unnamed: 0,OBJECTID_1,ObjectID,AREA,STATE_NAME,STATE_FIPS,SUB_REGION,STATE_ABBR,POP2000,POP2003,POP00_SQMI,...,NO_FARMS97,AVG_SIZE97,CROP_ACR97,AVG_SALE97,Shape_Length,Shape_Area,raster,x,y,geometry
0,1,0,6380.614,Hawaii,15,Pacific,HI,1211537,1242249,190,...,5473,263,292107,90.8,11.952816,1.429291,,"156° 19' 37.60523410"" W","20° 14' 25.62028096"" N","MULTIPOLYGON (((-160.0738 22.00418, -160.04971..."
1,2,1,576594.104,Alaska,2,Pacific,AK,626932,645110,1,...,548,1608,94810,44.98,407.570945,277.52408,,"152° 34' 16.70401446"" W","64° 19' 01.81815150"" N","MULTIPOLYGON (((-161.33378 58.73325, -161.3824..."
2,3,65536,67290.061,Washington,53,Pacific,WA,5894121,6137285,88,...,29011,523,7913709,164.34,34.956834,20.750762,,"120° 25' 40.89998346"" W","47° 22' 36.33943823"" N","MULTIPOLYGON (((-122.40202 48.22522, -122.4628..."
3,4,65537,83343.643,Idaho,16,Mtn,ID,1293953,1377016,16,...,22314,530,6308877,149.94,28.529685,24.391584,,"114° 39' 42.71746078"" W","44° 23' 09.22635453"" N","POLYGON ((-117.02629 43.67903, -117.02379 43.7..."
4,5,131072,147244.653,Montana,30,Mtn,MT,902195,916257,6,...,24279,2414,17629001,77.05,34.5281,45.134304,,"109° 39' 09.12888984"" W","47° 01' 56.16181351"" N","POLYGON ((-111.47543 44.70216, -111.4808 44.69..."


<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 55 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   OBJECTID_1    51 non-null     int32   
 1   ObjectID      51 non-null     int32   
 2   AREA          51 non-null     float64 
 3   STATE_NAME    51 non-null     object  
 4   STATE_FIPS    51 non-null     object  
 5   SUB_REGION    51 non-null     object  
 6   STATE_ABBR    51 non-null     object  
 7   POP2000       51 non-null     int32   
 8   POP2003       51 non-null     int32   
 9   POP00_SQMI    51 non-null     int32   
 10  WHITE         51 non-null     int32   
 11  BLACK         51 non-null     int32   
 12  AMERI_ES      51 non-null     int32   
 13  ASIAN         51 non-null     int32   
 14  HAWN_PI       51 non-null     int32   
 15  OTHER         51 non-null     int32   
 16  MULT_RACE     51 non-null     int32   
 17  HISPANIC      51 non-null     int32   
 18  MALE

None

## Data exploration


In [121]:
# Examine the first few rows
display(gdf.head())

# Get a summary of the DataFrame
display(gdf.info())

# Check the coordinate reference system
print(gdf.crs)

# Identify potential columns for visualization
relevant_columns = ['STATE_NAME','STATE_FIPS','POP2000', 'POP2003', 'POP00_SQMI', 'WHITE', 'BLACK', 'AMERI_ES', 'ASIAN', 'HAWN_PI', 'OTHER', 'MULT_RACE', 'HISPANIC', 'MALES', 'FEMALES','geometry']  # Example columns
gdf = gdf[relevant_columns]
# Check for missing values in relevant columns
display(gdf[relevant_columns].isnull().sum())

# Summarize findings (This will be done in the finish_task command)

Unnamed: 0,OBJECTID_1,ObjectID,AREA,STATE_NAME,STATE_FIPS,SUB_REGION,STATE_ABBR,POP2000,POP2003,POP00_SQMI,...,NO_FARMS97,AVG_SIZE97,CROP_ACR97,AVG_SALE97,Shape_Length,Shape_Area,raster,x,y,geometry
0,1,0,6380.614,Hawaii,15,Pacific,HI,1211537,1242249,190,...,5473,263,292107,90.8,11.952816,1.429291,,"156° 19' 37.60523410"" W","20° 14' 25.62028096"" N","MULTIPOLYGON (((-160.0738 22.00418, -160.04971..."
1,2,1,576594.104,Alaska,2,Pacific,AK,626932,645110,1,...,548,1608,94810,44.98,407.570945,277.52408,,"152° 34' 16.70401446"" W","64° 19' 01.81815150"" N","MULTIPOLYGON (((-161.33378 58.73325, -161.3824..."
2,3,65536,67290.061,Washington,53,Pacific,WA,5894121,6137285,88,...,29011,523,7913709,164.34,34.956834,20.750762,,"120° 25' 40.89998346"" W","47° 22' 36.33943823"" N","MULTIPOLYGON (((-122.40202 48.22522, -122.4628..."
3,4,65537,83343.643,Idaho,16,Mtn,ID,1293953,1377016,16,...,22314,530,6308877,149.94,28.529685,24.391584,,"114° 39' 42.71746078"" W","44° 23' 09.22635453"" N","POLYGON ((-117.02629 43.67903, -117.02379 43.7..."
4,5,131072,147244.653,Montana,30,Mtn,MT,902195,916257,6,...,24279,2414,17629001,77.05,34.5281,45.134304,,"109° 39' 09.12888984"" W","47° 01' 56.16181351"" N","POLYGON ((-111.47543 44.70216, -111.4808 44.69..."


<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 55 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   OBJECTID_1    51 non-null     int32   
 1   ObjectID      51 non-null     int32   
 2   AREA          51 non-null     float64 
 3   STATE_NAME    51 non-null     object  
 4   STATE_FIPS    51 non-null     object  
 5   SUB_REGION    51 non-null     object  
 6   STATE_ABBR    51 non-null     object  
 7   POP2000       51 non-null     int32   
 8   POP2003       51 non-null     int32   
 9   POP00_SQMI    51 non-null     int32   
 10  WHITE         51 non-null     int32   
 11  BLACK         51 non-null     int32   
 12  AMERI_ES      51 non-null     int32   
 13  ASIAN         51 non-null     int32   
 14  HAWN_PI       51 non-null     int32   
 15  OTHER         51 non-null     int32   
 16  MULT_RACE     51 non-null     int32   
 17  HISPANIC      51 non-null     int32   
 18  MALE

None

None


Unnamed: 0,0
STATE_NAME,0
STATE_FIPS,0
POP2000,0
POP2003,0
POP00_SQMI,0
WHITE,0
BLACK,0
AMERI_ES,0
ASIAN,0
HAWN_PI,0


## Data preparation

### Subtask:
Prepare the GeoDataFrame `gdf` for visualization by handling the CRS and any necessary data transformations.


In [122]:
# Convert relevant columns to numeric types
for col in ['POP2000', 'POP2003', 'POP00_SQMI', 'WHITE', 'BLACK', 'AMERI_ES', 'ASIAN', 'HAWN_PI', 'OTHER', 'MULT_RACE', 'HISPANIC', 'MALES', 'FEMALES']:
  if col in gdf.columns:
      try:
          gdf[col] = pd.to_numeric(gdf[col], errors='coerce')
      except Exception as e:
          print(f"Error converting {col} to numeric: {e}")

display(gdf.head())
display(gdf.info())

Unnamed: 0,STATE_NAME,STATE_FIPS,POP2000,POP2003,POP00_SQMI,WHITE,BLACK,AMERI_ES,ASIAN,HAWN_PI,OTHER,MULT_RACE,HISPANIC,MALES,FEMALES,geometry
0,Hawaii,15,1211537,1242249,190,294102,22003,3535,503868,113539,15147,259343,87699,608671,602866,"MULTIPOLYGON (((-160.0738 22.00418, -160.04971..."
1,Alaska,2,626932,645110,1,434534,21787,98043,25116,3309,9997,34146,25852,324112,302820,"MULTIPOLYGON (((-161.33378 58.73325, -161.3824..."
2,Washington,53,5894121,6137285,88,4821823,190267,93301,322335,23953,228923,213519,441509,2934300,2959821,"MULTIPOLYGON (((-122.40202 48.22522, -122.4628..."
3,Idaho,16,1293953,1377016,16,1177304,5456,17645,11889,1308,54742,25609,101690,648660,645293,"POLYGON ((-117.02629 43.67903, -117.02379 43.7..."
4,Montana,30,902195,916257,6,817229,2692,56068,4691,470,5315,15730,18081,449480,452715,"POLYGON ((-111.47543 44.70216, -111.4808 44.69..."


<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   STATE_NAME  51 non-null     object  
 1   STATE_FIPS  51 non-null     object  
 2   POP2000     51 non-null     int32   
 3   POP2003     51 non-null     int32   
 4   POP00_SQMI  51 non-null     int32   
 5   WHITE       51 non-null     int32   
 6   BLACK       51 non-null     int32   
 7   AMERI_ES    51 non-null     int32   
 8   ASIAN       51 non-null     int32   
 9   HAWN_PI     51 non-null     int32   
 10  OTHER       51 non-null     int32   
 11  MULT_RACE   51 non-null     int32   
 12  HISPANIC    51 non-null     int32   
 13  MALES       51 non-null     int32   
 14  FEMALES     51 non-null     int32   
 15  geometry    51 non-null     geometry
dtypes: geometry(1), int32(13), object(2)
memory usage: 3.9+ KB


None

In [123]:
# Set the CRS to EPSG:3857
gdf.crs = "EPSG:3857"

display(gdf.head())
display(gdf.crs)

Unnamed: 0,STATE_NAME,STATE_FIPS,POP2000,POP2003,POP00_SQMI,WHITE,BLACK,AMERI_ES,ASIAN,HAWN_PI,OTHER,MULT_RACE,HISPANIC,MALES,FEMALES,geometry
0,Hawaii,15,1211537,1242249,190,294102,22003,3535,503868,113539,15147,259343,87699,608671,602866,"MULTIPOLYGON (((-160.074 22.004, -160.05 21.98..."
1,Alaska,2,626932,645110,1,434534,21787,98043,25116,3309,9997,34146,25852,324112,302820,"MULTIPOLYGON (((-161.334 58.733, -161.382 58.7..."
2,Washington,53,5894121,6137285,88,4821823,190267,93301,322335,23953,228923,213519,441509,2934300,2959821,"MULTIPOLYGON (((-122.402 48.225, -122.463 48.2..."
3,Idaho,16,1293953,1377016,16,1177304,5456,17645,11889,1308,54742,25609,101690,648660,645293,"POLYGON ((-117.026 43.679, -117.024 43.754, -1..."
4,Montana,30,902195,916257,6,817229,2692,56068,4691,470,5315,15730,18081,449480,452715,"POLYGON ((-111.475 44.702, -111.481 44.691, -1..."


<Projected CRS: EPSG:3857>
Name: WGS 84 / Pseudo-Mercator
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: World between 85.06°S and 85.06°N.
- bounds: (-180.0, -85.06, 180.0, 85.06)
Coordinate Operation:
- name: Popular Visualisation Pseudo-Mercator
- method: Popular Visualisation Pseudo Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

## Data visualization



In [124]:
import plotly.express as px
import plotly.graph_objects as go

# Map 1: Basic State Outlines
fig1 = px.choropleth_mapbox(gdf, geojson=gdf.geometry, locations=gdf.index,
                           color=None,  # No color-coding
                           mapbox_style="carto-positron",
                           zoom=3, center={"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'STATE_NAME':'State'},
                           title="US States Outlines")
fig1.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig1.write_html("map1.html")
fig1.show()

# Map 2: Population in 2000
fig2 = px.choropleth_mapbox(gdf, geojson=gdf.geometry, locations=gdf.index,
                           color="POP2000",
                           color_continuous_scale="Viridis",
                           mapbox_style="carto-positron",
                           zoom=3, center={"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'POP2000':'Population in 2000'},
                           title="Population in 2000")
fig2.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig2.write_html("map2.html")
fig2.show()

# Map 3: Interactive Map with Hover Data
fig3 = px.choropleth_mapbox(gdf, geojson=gdf.geometry, locations=gdf.index,
                           color="POP2003",
                           color_continuous_scale="Viridis",
                           mapbox_style="carto-positron",
                           zoom=3, center={"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           hover_name="STATE_NAME",
                           hover_data={"POP2003": True, "STATE_NAME": False, "STATE_FIPS":False},
                           labels={'POP2003':'Population in 2003'},
                           title="Population in 2003 (Interactive)")
fig3.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig3.write_html("map3.html")
fig3.show()

# Map 4: Customized Choropleth
fig4 = px.choropleth_mapbox(gdf, geojson=gdf.geometry, locations=gdf.index,
                           color="WHITE",
                           color_continuous_scale="Plasma",
                           mapbox_style="carto-positron",
                           zoom=3, center={"lat": 37.0902, "lon": -95.7129},
                           opacity=0.7,
                           labels={'WHITE':'White Population'},
                           title="White Population in US States",
                           hover_name="STATE_NAME")
fig4.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
                  coloraxis_colorbar=dict(
                      title="White Population"
                  ))
fig4.write_html("map4.html")
fig4.show()

In [125]:
import plotly.express as px

# Map 5: Black Population
fig5 = px.choropleth_mapbox(gdf, geojson=gdf.geometry, locations=gdf.index,
                           color="BLACK",
                           color_continuous_scale="Viridis",
                           mapbox_style="carto-positron",
                           zoom=3, center={"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'BLACK':'Black Population'},
                           title="Black Population in US States",
                           hover_name="STATE_NAME")
fig5.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig5.write_html("map5.html")
fig5.show()

# Map 6: Male Population
fig6 = px.choropleth_mapbox(gdf, geojson=gdf.geometry, locations=gdf.index,
                           color="MALES",
                           color_continuous_scale="Viridis",
                           mapbox_style="carto-positron",
                           zoom=3, center={"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'MALES':'Male Population'},
                           title="Male Population in US States",
                           hover_name="STATE_NAME")
fig6.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig6.write_html("map6.html")
fig6.show()

# Map 7: Female Population
fig7 = px.choropleth_mapbox(gdf, geojson=gdf.geometry, locations=gdf.index,
                           color="FEMALES",
                           color_continuous_scale="Viridis",
                           mapbox_style="carto-positron",
                           zoom=3, center={"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'FEMALES':'Female Population'},
                           title="Female Population in US States",
                           hover_name="STATE_NAME")
fig7.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig7.write_html("map7.html")
fig7.show()

In [126]:
import plotly.express as px
import warnings
gdf = gdf.to_crs("EPSG:3857")


# Bubble Map for Total Population (POP2000)
fig_pop2000 = px.scatter_mapbox(
    gdf,
    lat=gdf.geometry.centroid.y,
    lon=gdf.geometry.centroid.x,
    size="POP2000",
    color="POP2000",
    color_continuous_scale="Viridis",
    size_max=50,  # Adjust size_max for bubble size
    zoom=3,
    center={"lat": 37.0902, "lon": -95.7129},
    mapbox_style="carto-positron",
    title="Population in 2000 (Bubble Map)",
    hover_name="STATE_NAME",
)
fig_pop2000.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig_pop2000.show()

# Bubble Map for Black Population (BLACK)
fig_black = px.scatter_mapbox(
    gdf,
    lat=gdf.geometry.centroid.y,
    lon=gdf.geometry.centroid.x,
    size="BLACK",
    color="BLACK",
    color_continuous_scale="Viridis",
    size_max=50,  # Adjust size_max for bubble size
    zoom=3,
    center={"lat": 37.0902, "lon": -95.7129},
    mapbox_style="carto-positron",
    title="Black Population (Bubble Map)",
    hover_name="STATE_NAME",
)
fig_black.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig_black.show()

# Bubble Map for Male Population (MALES)
fig_males = px.scatter_mapbox(
    gdf,
    lat=gdf.geometry.centroid.y,
    lon=gdf.geometry.centroid.x,
    size="MALES",
    color="MALES",
    color_continuous_scale="Viridis",
    size_max=50,  # Adjust size_max for bubble size
    zoom=3,
    center={"lat": 37.0902, "lon": -95.7129},
    mapbox_style="carto-positron",
    title="Male Population (Bubble Map)",
    hover_name="STATE_NAME",
)
fig_males.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig_males.show()

# Bubble Map for Female Population (FEMALES)
fig_females = px.scatter_mapbox(
    gdf,
    lat=gdf.geometry.centroid.y,
    lon=gdf.geometry.centroid.x,
    size="FEMALES",
    color="FEMALES",
    color_continuous_scale="Viridis",
    size_max=50,  # Adjust size_max for bubble size
    zoom=3,
    center={"lat": 37.0902, "lon": -95.7129},
    mapbox_style="carto-positron",
    title="Female Population (Bubble Map)",
    hover_name="STATE_NAME",
)
fig_females.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig_females.show()


with warnings.catch_warnings():
    warnings.simplefilter("ignore")  # Ignore all warnings within the context
    centroids = gdf.geometry.centroid  # Calculate centroids without warning