In [1]:
# Insert code here.
# Dependencies
import pandas as pd
import re
import plotly.graph_objects as go

%matplotlib inline

In [2]:
# URL for the source of the data on Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_nuclear_power_stations'

In [3]:
# Use pandas to get the tables from the Wikipedia page and create a list
# of dictionaries
tables = pd.read_html(url,encoding='utf-8')
tables

[                                          0
 0  Map all coordinates using: OpenStreetMap
 1        Download coordinates as: KML · GPX,
     Power station # units[note 1] Net capacity[note 2](MWe)        Country  \
 0         Almaraz               2                      2017          Spain   
 1           Angra               2             1,884[note 3]         Brazil   
 2             ANO               2                      1839  United States   
 3            Ascó               2                      1992          Spain   
 4          Atucha               2                      1027      Argentina   
 ..            ...             ...                       ...            ...   
 146     Watts Bar               2                      2288  United States   
 147    Wolf Creek               1                      1160  United States   
 148      Wolseong      5[note 45]                      3835    South Korea   
 149     Yangjiang               6                      6000          Chin

In [4]:
# Isolate the second table from the list of dictionaries and make that dict a dataframe
df = tables[1]

In [5]:
# View the column names
df.columns

Index(['Power station', '# units[note 1]', 'Net capacity[note 2](MWe)',
       'Country', 'Location', 'Refs'],
      dtype='object')

In [6]:
# Change the column names
df.columns = ['Station', 'Units', 'Capacity_MWe', 'Country', 
              'Location', 'Refs']

In [7]:
# Both DMS and Geopoint Lat and Long data is in one column 
# we are only interested in the Geopoint Lat/Long 
coordinates = df["Location"].str.split(" / ", n = 1, expand = True)

In [8]:
# Create a new column in the dataframe
df["GP_Lat_Long"]= coordinates[1]

In [9]:
# Dropping old Location and Refs columns 
df.drop(columns =["Location","Refs"], inplace = True) 

In [10]:
# Split the column of data GP_Lat_long that contains two coordinates into 
# two separate columns
DMS_coordinates = df["GP_Lat_Long"].str.split(" ", n = 1, expand = True)
df["GP_Lat"]= DMS_coordinates[0] 
df["GP_Long"]= DMS_coordinates[1]

In [11]:
# Create a function to transform the Latitude and Longitude points from the format
# 41.2000°S to -41.2000 for example 
def dms2dd(s):
    # example: s = """0°51'56.29"S"""
    coord, direction = re.split('°', s)
    coord = coord.replace(u'\ufeff','')
    coord = float(coord)
    if direction in ('S','W'):
        coord*= -1
    return coord

In [12]:
# Transform the Latitude and Longitude values to be either negative or positive
# Values for South and West are negative and North and East are positive
df['Latitude'] = df['GP_Lat'].apply(dms2dd)
df['Longitude'] = df['GP_Long'].apply(dms2dd)

In [13]:
# Remove the columns that are no longer necessary
df.drop(columns =["GP_Lat_Long","GP_Lat","GP_Long"], inplace = True) 

In [14]:
# Create a United States only slice of the dataframe
us = df[df["Country"] == "United States"]

In [43]:
# Add column "Type" with the type of point name
us['Type'] = 'Nuclear Site'
# Add column "text" with a descrition of the point for plottin in plotly
us['text'] = us['Type'].astype(str) + ' : ' + us['Station'].astype(str)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [44]:
us.head()

Unnamed: 0,Station,Units,Capacity_MWe,Country,Latitude,Longitude,Type,text
2,ANO,2,1839,United States,35.31028,-93.23139,Nuclear Site,Nuclear Site : ANO
6,Beaver Valley,2,1738,United States,40.62333,-80.43056,Nuclear Site,Nuclear Site : Beaver Valley
10,Braidwood,2,2330,United States,41.24361,-88.22917,Nuclear Site,Nuclear Site : Braidwood
12,Browns Ferry,3,3300,United States,34.70389,-87.11861,Nuclear Site,Nuclear Site : Browns Ferry
14,Brunswick,2,1858,United States,33.95833,-78.01028,Nuclear Site,Nuclear Site : Brunswick


In [45]:
import plotly.graph_objects as go

import pandas as pd

# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_february_us_airport_traffic.csv')
# df['text'] = df['airport'] + '' + df['city'] + ', ' + df['state'] + '' + 'Arrivals: ' + df['cnt'].astype(str)

fig = go.Figure(data=go.Scattergeo(
        lon = us['Longitude'],
        lat = us['Latitude'],
        text = us['Station'],
        mode = 'markers',
        #marker_color = us['Capacity_MWe'],
        #size = us['Capacity_MWe']
        ))

fig.update_layout(
        title = 'Nuclear Facilities<br>(Hover for facility names)',
        geo_scope='usa',
    )
fig.show()

In [18]:
# Add column "Type" with the type of point name
us['Type'] = 'Nuclear Site'



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [19]:
us.head()

Unnamed: 0,Station,Units,Capacity_MWe,Country,Latitude,Longitude,Type
2,ANO,2,1839,United States,35.31028,-93.23139,Nuclear Site
6,Beaver Valley,2,1738,United States,40.62333,-80.43056,Nuclear Site
10,Braidwood,2,2330,United States,41.24361,-88.22917,Nuclear Site
12,Browns Ferry,3,3300,United States,34.70389,-87.11861,Nuclear Site
14,Brunswick,2,1858,United States,33.95833,-78.01028,Nuclear Site


In [38]:
#us['text'] = f'{us["Type"].astype(str)} : {us["Station"].astype(str)}'
us['text'] = us['Type'].astype(str) + ' : ' + us['Station'].astype(str)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [39]:
us.head()

Unnamed: 0,Station,Units,Capacity_MWe,Country,Latitude,Longitude,Type,text
2,ANO,2,1839,United States,35.31028,-93.23139,Nuclear Site,Nuclear Site : ANO
6,Beaver Valley,2,1738,United States,40.62333,-80.43056,Nuclear Site,Nuclear Site : Beaver Valley
10,Braidwood,2,2330,United States,41.24361,-88.22917,Nuclear Site,Nuclear Site : Braidwood
12,Browns Ferry,3,3300,United States,34.70389,-87.11861,Nuclear Site,Nuclear Site : Browns Ferry
14,Brunswick,2,1858,United States,33.95833,-78.01028,Nuclear Site,Nuclear Site : Brunswick
