In [1]:
!pip install plotly --upgrade



In [2]:
!pip install --upgrade geopandas mapclassify folium
!pip install census contextily adjustText 



In [3]:
import warnings
warnings.filterwarnings('ignore')

import os
import pandas as pd
import geopandas as gpd
import matplotlib as mpl
import folium
from folium import plugins
from matplotlib import pyplot as plt
import matplotlib.patheffects as pe
import mapclassify
from census import Census
import contextily as ctx 
from adjustText import adjust_text

plt.rcParams["figure.figsize"] = (10,10) # set this once for a default plot size

In [4]:
#Read my dataset
file_path = 'nuclear_power_plants.csv'
df = gpd.read_file(file_path)

In [5]:
df

Unnamed: 0,Id,Name,Latitude,Longitude,Country,CountryCode,Status,ReactorType,ReactorModel,ConstructionStartAt,OperationalFrom,OperationalTo,Capacity,LastUpdatedAt,Source,IAEAId,geometry
0,1,Ågesta,59.206000,18.082900,Sweden,SE,Shutdown,PHWR,,1957-12-01,1964-05-01,1974-06-02,9,2015-05-24T04:51:37+03:00,WNA/IAEA,528,
1,2,Akademik Lomonosov-1,69.709579,170.306250,Russia,RU,Operational,PWR,KLT-40S 'Floating',2007-04-15,2020-05-22,,30,2021-05-31,WNA/IAEA/Google Maps,895,
2,3,Akademik Lomonosov-2,69.709579,170.306250,Russia,RU,Operational,PWR,KLT-40S 'Floating',2007-04-15,2020-05-22,,30,2021-05-31,WNA/IAEA/Google Maps,896,
3,4,Akhvaz-1,,,Iran,IR,Planned,,,,,,,,WNA,,
4,5,Akhvaz-2,,,Iran,IR,Planned,,,,,,,,WNA,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,815,BREST-OD-300,56.637414,84.904510,Russia,RU,Under Construction,FBR,BREST-OD-300,2021-06-08,,,300,2022-01-05T00:04:08+02:00,IAEA/WNA,1106,
799,816,Sanaocun-2,27.201487,120.517974,China,CN,Under Construction,PWR,HRP1000,2021-12-30,,,1117,2022-04-21T20:26:19+03:00,IAEA,1107,
800,817,El Dabaa-2,31.044167,28.497778,Egypt,EG,Under Construction,PWR,VVER-1200,2022-11-19,,,1100,2023-08-28T08:01:45+03:00,IAEA/Wikipedia,138,
801,818,El Dabaa-3,31.044167,28.497778,Egypt,EG,Under Construction,PWR,VVER-1200,2023-05-03,,,1100,2023-08-28T08:24:00+03:00,IAEA/Wikipedia,1120,


In [6]:
df = df[df['Country'] == 'United States']
df

Unnamed: 0,Id,Name,Latitude,Longitude,Country,CountryCode,Status,ReactorType,ReactorModel,ConstructionStartAt,OperationalFrom,OperationalTo,Capacity,LastUpdatedAt,Source,IAEAId,geometry
16,17,Arkansas Nuclear One-1 (ANO-1),35.310000,-93.230000,United States,US,Operational,PWR,B&W LLP (DRYAMB),1968-10-01,1974-12-19,,850,2017-02-10T23:58:30+02:00,WNA/IAEA,652,
17,18,Arkansas Nuclear One-2 (ANO-2),35.310000,-93.229000,United States,US,Operational,PWR,CE 2LP (DRYAMB),1968-12-06,1980-03-26,,912,2017-02-10T23:58:53+02:00,WNA/IAEA,689,
37,38,Beaver Valley-1,40.624000,-80.432000,United States,US,Operational,PWR,WH 3LP (DRYSUB),1970-06-26,1976-10-01,,835,2017-02-10T23:58:44+02:00,WNA/IAEA,669,
38,39,Beaver Valley-2,40.624000,-80.432000,United States,US,Operational,PWR,WH 3LP (DRYSUB),1974-05-03,1987-11-17,,836,2017-02-10T23:58:59+02:00,WNA/IAEA,712,
56,57,Big Rock Point,45.359000,-85.195000,United States,US,Shutdown,BWR,,1960-05-01,1963-03-29,1997-08-29,72,2015-05-24T04:51:41+03:00,WNA/IAEA,601,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
727,737,Watts Bar-2,35.601000,-84.789000,United States,US,Operational,PWR,WH 4LP (ICECND),1973-09-01,2016-10-19,,1165,2017-02-10T23:58:57+02:00,WNA/IAEA,700,
730,740,Wolf Creek,38.239000,-95.691000,United States,US,Operational,PWR,WH 4LP (DRYAMB),1977-05-31,1985-09-03,,1170,2017-02-10T23:59:19+02:00,WNA/IAEA,751,
754,764,Yankee Rowe,42.728000,-72.929000,United States,US,Shutdown,PWR,PWR,1957-11-01,1961-07-01,1991-10-01,175,2015-05-24T04:51:43+03:00,WNA/IAEA,638,
762,778,Zion-1,42.446000,-87.801000,United States,US,Shutdown,PWR,WH 4LP,1968-12-01,1973-12-31,1998-02-13,1040,2017-02-10T23:58:20+02:00,WNA/IAEA,640,


In [7]:
df['Capacity'] = df['Capacity'].replace('', '0').astype(int)

In [8]:
df['Latitude'] = df['Latitude'].replace('', '0.0').astype(float)

In [9]:
df['Longitude'] = df['Longitude'].replace('', '0.0').astype(float)

In [10]:
df = df.loc[(df['Latitude'] != 0.0) & (df['Longitude'] != 0.0)]

In [11]:
df['Status'] = df['Status'].astype(str)

In [12]:
df['Country'] = df['Country'].astype(str)

In [13]:
df['Name'] = df['Name'].astype(str)

In [14]:
df.shape

(137, 17)

In [18]:
center = [37.0902, -95.7129]

# creating map
map = folium.Map(location = center, zoom_start = 4.2)
for i, j in df.iterrows():
    location = [j['Latitude'], j['Longitude']]
    folium.Marker(location, popup = f'Name: {j["Name"]}').add_to(map)
map.save('Distribution_map.html')

In [20]:
import folium

# Assuming 'Status', 'Capacity', and 'ReactorType' are columns in your DataFrame df
# Add a 'layer' column to distinguish between operational and non-operational
df['layer'] = df['Status'].apply(lambda x: 'Operational' if x == 'Operational' else 'Non-Operational')

# Define the center coordinates for the United States
center_us = [37.0902, -95.7129]

# Create a base map with the new center coordinates
map = folium.Map(location=center_us, zoom_start=4)

# Create layers for operational, non-operational, low capacity, high capacity, and reactor type labels
operational_layer = folium.FeatureGroup(name='Operational')
non_operational_layer = folium.FeatureGroup(name='Non-Operational')
low_capacity_layer = folium.FeatureGroup(name='Low Capacity')
high_capacity_layer = folium.FeatureGroup(name='High Capacity')
reactor_type_label_layer = folium.FeatureGroup(name='Reactor Type (Labels)')

# Add markers to the respective layers
for i, j in df.iterrows():
    location = [j['Latitude'], j['Longitude']]
    popup_text = f'Country: {j["Country"]}, Status: {j["Status"]}, Capacity: {j["Capacity"]}, Reactor Type: {j["ReactorType"]}'

    if j['Status'] == 'Operational':
        folium.Marker(location, popup=popup_text, icon=folium.Icon(color='green')).add_to(operational_layer)
    else:
        folium.Marker(location, popup=popup_text, icon=folium.Icon(color='red')).add_to(non_operational_layer)

    # Define colors for low and high capacity
    if j['Capacity'] < 50:
        folium.CircleMarker(location, radius=8, color='red', fill=True, fill_color='lightred', fill_opacity=0.7).add_to(low_capacity_layer)
    else:
        folium.CircleMarker(location, radius=8, color='Darkgreen', fill=True, fill_color='lightgreen', fill_opacity=0.7).add_to(high_capacity_layer)

    # Add reactor type labels
    label = folium.Marker(location, icon=folium.DivIcon(html=f'<div style="font-size: 10pt; color: black">{j["ReactorType"]}</div>'))
    label.add_to(reactor_type_label_layer)

# Add layers to the map
operational_layer.add_to(map)
non_operational_layer.add_to(map)
low_capacity_layer.add_to(map)
high_capacity_layer.add_to(map)
reactor_type_label_layer.add_to(map)

# Add layer control to toggle between layers
folium.LayerControl().add_to(map)

# Display the map
map.save('map_with_layers.html')

In [58]:
import plotly.express as px
import pandas as pd

df.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)

color_scale = [(0, 'orange'), (1,'red')]

fig = px.scatter_mapbox(df, 
                        lat="Latitude", 
                        lon="Longtitude", 
                        hover_name="Address", 
                        hover_data=["Address", "Listed"],
                        color="Listed",
                        color_continuous_scale=color_scale,
                        size="Listed",
                        zoom=8, 
                        height=800,
                        width=800)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

TypeError: You cannot set both the how and thresh arguments at the same time.

In [22]:
c = Census("")
#Picking the state of NJ
nj_census = c.acs5.state_county_tract(fields = ('NAME', 'B01001_001E', 'B01001_001M', 'B19055_001E', 'B19055_001M','GEO_ID'),
                                      state_fips = '34', #I chose New York
                                      county_fips = "*", # an asterisk means all options; use it here to get all the counties within New York
                                      tract = "*", # get all the tracts within these counties
                                      year = 2019)

CensusException: error: wildcard not allowed for 'state' in geography heirarchy

In [14]:
nj_census

[{'NAME': 'Census Tract 5012.10, Gloucester County, New Jersey',
  'B01001_001E': 3452.0,
  'B01001_001M': 249.0,
  'B19055_001E': 1159.0,
  'B19055_001M': 57.0,
  'GEO_ID': '1400000US34015501210',
  'state': '34',
  'county': '015',
  'tract': '501210'},
 {'NAME': 'Census Tract 5013.01, Gloucester County, New Jersey',
  'B01001_001E': 3570.0,
  'B01001_001M': 155.0,
  'B19055_001E': 1384.0,
  'B19055_001M': 69.0,
  'GEO_ID': '1400000US34015501301',
  'state': '34',
  'county': '015',
  'tract': '501301'},
 {'NAME': 'Census Tract 5013.03, Gloucester County, New Jersey',
  'B01001_001E': 2594.0,
  'B01001_001M': 197.0,
  'B19055_001E': 977.0,
  'B19055_001M': 72.0,
  'GEO_ID': '1400000US34015501303',
  'state': '34',
  'county': '015',
  'tract': '501303'},
 {'NAME': 'Census Tract 5019, Gloucester County, New Jersey',
  'B01001_001E': 4135.0,
  'B01001_001M': 21.0,
  'B19055_001E': 1320.0,
  'B19055_001M': 141.0,
  'GEO_ID': '1400000US34015501900',
  'state': '34',
  'county': '015',
  

In [15]:
# dataframe from the census data
nj_census_df = pd.DataFrame(nj_census)

# Show the dataframe first few 5 
nj_census_df.head()

Unnamed: 0,NAME,B01001_001E,B01001_001M,B19055_001E,B19055_001M,GEO_ID,state,county,tract
0,"Census Tract 5012.10, Gloucester County, New J...",3452.0,249.0,1159.0,57.0,1400000US34015501210,34,15,501210
1,"Census Tract 5013.01, Gloucester County, New J...",3570.0,155.0,1384.0,69.0,1400000US34015501301,34,15,501301
2,"Census Tract 5013.03, Gloucester County, New J...",2594.0,197.0,977.0,72.0,1400000US34015501303,34,15,501303
3,"Census Tract 5019, Gloucester County, New Jersey",4135.0,21.0,1320.0,141.0,1400000US34015501900,34,15,501900
4,"Census Tract 5003, Gloucester County, New Jersey",2959.0,25.0,1087.0,65.0,1400000US34015500300,34,15,500300


In [16]:
def remove_after_county(value):
    parts = value.split('County', 1)  # Split at the first occurrence of 'County'
    return 'County'.join(parts[:-1])  # Join all parts before the 'County'

# Apply the function to the 'Location' column
nj_census_df['NAME'] = nj_census_df['NAME'].apply(remove_after_county)

In [17]:
nj_census_df

Unnamed: 0,NAME,B01001_001E,B01001_001M,B19055_001E,B19055_001M,GEO_ID,state,county,tract
0,"Census Tract 5012.10, Gloucester",3452.0,249.0,1159.0,57.0,1400000US34015501210,34,015,501210
1,"Census Tract 5013.01, Gloucester",3570.0,155.0,1384.0,69.0,1400000US34015501301,34,015,501301
2,"Census Tract 5013.03, Gloucester",2594.0,197.0,977.0,72.0,1400000US34015501303,34,015,501303
3,"Census Tract 5019, Gloucester",4135.0,21.0,1320.0,141.0,1400000US34015501900,34,015,501900
4,"Census Tract 5003, Gloucester",2959.0,25.0,1087.0,65.0,1400000US34015500300,34,015,500300
...,...,...,...,...,...,...,...,...,...
2005,"Census Tract 235.02, Bergen",5222.0,588.0,2225.0,140.0,1400000US34003023502,34,003,023502
2006,"Census Tract 236.01, Bergen",2868.0,355.0,1278.0,92.0,1400000US34003023601,34,003,023601
2007,"Census Tract 236.02, Bergen",4885.0,474.0,1420.0,111.0,1400000US34003023602,34,003,023602
2008,"Census Tract 321.03, Bergen",5154.0,372.0,2488.0,86.0,1400000US34003032103,34,003,032103


In [18]:
def remove_before_comma(value):
    parts = value.split(',', 1)  # Split at the first occurrence of ','
    return parts[-1].strip()  # Take the part after ',' and remove leading/trailing spaces

# Apply the function to the 'Location' column
nj_census_df['NAME'] = nj_census_df['NAME'].apply(remove_before_comma)


In [19]:
nj_census_df

Unnamed: 0,NAME,B01001_001E,B01001_001M,B19055_001E,B19055_001M,GEO_ID,state,county,tract
0,Gloucester,3452.0,249.0,1159.0,57.0,1400000US34015501210,34,015,501210
1,Gloucester,3570.0,155.0,1384.0,69.0,1400000US34015501301,34,015,501301
2,Gloucester,2594.0,197.0,977.0,72.0,1400000US34015501303,34,015,501303
3,Gloucester,4135.0,21.0,1320.0,141.0,1400000US34015501900,34,015,501900
4,Gloucester,2959.0,25.0,1087.0,65.0,1400000US34015500300,34,015,500300
...,...,...,...,...,...,...,...,...,...
2005,Bergen,5222.0,588.0,2225.0,140.0,1400000US34003023502,34,003,023502
2006,Bergen,2868.0,355.0,1278.0,92.0,1400000US34003023601,34,003,023601
2007,Bergen,4885.0,474.0,1420.0,111.0,1400000US34003023602,34,003,023602
2008,Bergen,5154.0,372.0,2488.0,86.0,1400000US34003032103,34,003,032103


In [20]:
nj_census_df["GEOID"] = nj_census_df["state"] + nj_census_df["county"] + nj_census_df["tract"]

# delete the state and tract columns from the dataframe
nj_census_df = nj_census_df.drop(columns = ["state", "tract", "county"])

In [21]:
url_wv = 'https://www2.census.gov/geo/tiger/TIGER2019/TRACT/tl_2019_34_tract.zip'
map_data = gpd.read_file(url_wv)

URLError: <urlopen error [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1002)>

In [None]:
map_data