In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from functools import reduce
from geojson_rewind import rewind
import geojson
import geodaisy.converters as convert
from pygeoif import geometry

pd.options.display.max_columns = None

In [4]:
filename = "Data/Template - Data Model - 20210127.xls"
xls = pd.read_excel(filename, engine="xlrd", sheet_name=None)
frames = {}

  


In [5]:
sheet_names = xls.keys()
categoricals = {
    "Sample": ["type", "collection", "pooled"],
    "WWMeasure": ["fractionAnalyzed", "type", "unit", "aggregation", "access"],
    "SiteMeasure": ["type", "aggregation", "unit", "access"],
    "CPHD": ["type", "typeDate"],
    "Site": ["type"],
    "AssayMethod": ["unit"],
    "Instrument": ["type"],
    "Polygon": ["type"]
}
def parse_dt(x,y):
    if pd.isna(x) and pd.isna(y):
        return pd.NaT 
    elif pd.isna(x) and not pd.isna(y):
        return y 
    elif not pd.isna(x) and pd.isna(y):
        return x 
    else:
        return pd.NaT

def parse_text(x,y):
    if x == "" and y == "":
        return "" 
    elif x == "" and y != "":
        return y 
    elif x != "" and y == "":
        return x 
    else:
        return ";".join([x, y])

def parse_nums(x,y):
    if pd.isna(x) and pd.isna(y):
        return np.nan
    elif pd.isna(x) and not pd.isna(y):
        return y 
    elif not pd.isna(x) and pd.isna(y):
        return x 
    else:
        return x+y/2;

def agg_by_type(series):
    data_type = str(series.dtype)
    name = series.name
    if "datetime" in data_type:
        return reduce(parse_dt, series)
    
    if "object" in data_type:
        return reduce(parse_text, series)

    if "float64" in data_type:
        return reduce(parse_nums, series)
    else:
        raise TypeError(f"could notr parse series {name}") 

## Parsing WWMeasure

In [6]:
# breaking down the parameters in the wwmeasure table
df_name = "WWMeasure"

df = xls[df_name]

# Parsing date columns into the datetime format
date_column_names = ["analysisDate", "reportDate"]
for col in date_column_names:
    df[col] = pd.to_datetime(df[col])
#storing data column names according to the type inside a dict
columns = {}

#making a copy of the df I can iterate over while I modify the original DataFrame
df_copy = df.copy(deep=True)
for i, row in df_copy.iterrows():
    value = row["value"]
    value_fraction = row["fractionAnalyzed"]
    value_type = row["type"]
    value_unit = row["unit"].replace("/", "_")
    value_aggregate = row["aggregation"]
    value_issue = row["qualityFlag"]
    notes = row["notes"]
    analysisDate = row["analysisDate"]
    reportDate = row["reportDate"]
    combined_name = ".".join([value_fraction, value_type, value_unit, value_aggregate, value_issue])
    combined_value_name = ".".join([combined_name, "value"])
    combined_notes_name = ".".join([combined_name, "notes"])
    combined_analysisDate_name = ".".join([combined_name, "analysisDate"])
    combined_reportDate_name = ".".join([combined_name, "reportDate"])
    
    if combined_value_name not in df.columns.tolist():
        df[combined_value_name] = np.nan
    df[combined_value_name].iloc[i] = value

    if combined_notes_name not in df.columns.tolist():
        df[combined_notes_name] = ""
    df[combined_notes_name].iloc[i] = notes

    if combined_analysisDate_name not in df.columns.tolist():
            df[combined_analysisDate_name] = pd.NaT
    df[combined_analysisDate_name].iloc[i] = analysisDate

    if combined_reportDate_name not in df.columns.tolist():
            df[combined_reportDate_name] = pd.NaT
    df[combined_reportDate_name].iloc[i] =reportDate

del df_copy 
df.drop(["reporterID", "WWMeasureID", "notes", "analysisDate", "reportDate", "labID", "assayID", "value","fractionAnalyzed", "type", "aggregation", "unit", "index", "qualityFlag", "accessToPublic", "accessToAllOrg", "accessToPHAC", "accessToLocalHA", "accessToProvHA", "accessToOtherProv", "accessToDetails"], axis=1, inplace=True)

df = df.groupby("sampleID").agg(agg_by_type)
frames[df_name] = df
df.shape

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


(2, 20)

## Parsing SiteMeasure

In [7]:
# breaking down the parameters in the wwmeasure table
df_name = "SiteMeasure"

df = xls[df_name]

# Parsing date columns into the datetime format
date_column_names = ["dateTime"]
for col in date_column_names:
    df[col] = pd.to_datetime(df[col])
#storing data column names according to the type inside a dict
columns = {}

#making a copy of the df I can iterate over while I modify the original DataFrame
df_copy = df.copy(deep=True)
for i, row in df_copy.iterrows():
    value = row["value"]
    
    value_type = row["type"]
    value_unit = row["unit"].replace("/", "_")
    value_aggregate = row["aggregation"]
    
    notes = row["notes"]
    dateTime = row["dateTime"]
    
    combined_name = ".".join([value_type, value_unit, value_aggregate])
    combined_value_name = ".".join([combined_name, "value"])
    combined_notes_name = ".".join([combined_name, "notes"])
    combined_dateTime_name = ".".join([combined_name, "dateTime"])
    
    if combined_value_name not in df.columns.tolist():
        df[combined_value_name] = np.nan
    df[combined_value_name].iloc[i] = value

    if combined_notes_name not in df.columns.tolist():
        df[combined_notes_name] = ""
    df[combined_notes_name].iloc[i] = notes

    if combined_dateTime_name not in df.columns.tolist():
            df[combined_dateTime_name] = pd.NaT
    df[combined_dateTime_name].iloc[i] = dateTime


del df_copy 
df.drop(["instrumentID","reporterID", "dateTime", "type", "aggregation", "aggregationDesc", "value", "unit", "accessToPublic", "accessToAllOrgs", "accessToPHAC", "accessToLocalHA", "accessToProvHA", "accessToOtherProv", "accessToDetails", "notes"], axis=1, inplace=True)

df = df.groupby("siteID").agg(agg_by_type)
frames[df_name] = df
df.shape

(1, 10)

## Parsing Sample

In [36]:
# breaking down the parameters in the wwmeasure table
df_name = "Sample"

df = xls[df_name]
# Parsing date columns into the datetime format
date_column_names = ["dateTime", "dateTimeStart", "dateTimeEnd"]
for col in date_column_names:
    df[col] = df[col].apply(lambda x: pd.NaT if x == "None" else x)
    df[col] = pd.to_datetime(df[col])

df_copy = df.copy(deep=True)
for i, row in df_copy.iterrows():
    sites = row["siteID"]
    if ";" in sites:
        ids = [ x.strip() for x in sites.split(";")]
        df["siteID"].iloc[i] = ids.pop(0)
        for ii in ids:
            new_row = df.iloc[i].copy()
            new_row["siteID"] = ii
            df = df.append(new_row, ignore_index=True)

frames[df_name] = df
df.shape

(4, 18)

In [9]:
df_name = "Site"

df = xls[df_name]
frames["Site"] = df


In [59]:
df_name = "Polygon"
df = xls[df_name]

def convert_geojson(s):
    if s == "-": 
        return None  #{"type":"Polygon", "coordinates":None}
    else:    
        from_wkt = geometry.from_wkt(s)
        geo_interface = from_wkt.__geo_interface__
    geojson_feature = convert.geo_interface_to_geojson(geo_interface)
    geojson_feature = rewind(geojson_feature, rfc7946=False)
    return geojson_feature

geo = {
    "type": "FeatureCollection", 
    "features": []
}

df["features"] = df["wkt"].apply(lambda x: convert_geojson(x))

df["features"]
for i, row in df.iterrows():
    if row["features"] is None:
        continue
    new_feature = {
        "type": "Feature",
        "geometry": row["features"],
        "properties":{
            "polygonID":row["polygonID"],
        }, 
        "id":i
    }
    geo["features"].append(new_feature)

from shapely.geometry import asShape # manipulating geometry
def get_map_center(geo_json):
    x_s = []
    y_s = []
    for feat in geo_json["features"]:
        # convert the geometry to shapely
        geom = asShape(feat["geometry"])
        # obtain the coordinates of the feature's centroid
        x_s.append(geom.centroid.x)
        y_s.append(geom.centroid.y)
    x_m = sum(x_s)/len(x_s)
    y_m = sum(y_s)/len(y_s)
    return {"lat":y_m, "lon":x_m}

center = get_map_center(geo)


In [60]:
center

{'lat': 43.57967587035404, 'lon': -79.81670989279911}

In [66]:

fig = px.choropleth_mapbox(df, geojson=geo, locations="polygonID", featureidkey="properties.polygonID", color='name',
                           labels={'name':'Sewershed'},
                            center=center,zoom=9)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [16]:

with open("j.json", "w") as f:
    f.write(geojson.dumps(b))

In [18]:
help(px.choropleth_mapbox)

Help on function choropleth_mapbox in module plotly.express._chart_types:

choropleth_mapbox(data_frame=None, geojson=None, featureidkey=None, locations=None, color=None, hover_name=None, hover_data=None, custom_data=None, animation_frame=None, animation_group=None, category_orders={}, labels={}, color_discrete_sequence=None, color_discrete_map={}, color_continuous_scale=None, range_color=None, color_continuous_midpoint=None, opacity=None, zoom=8, center=None, mapbox_style=None, title=None, template=None, width=None, height=None)
        In a Mapbox choropleth map, each row of `data_frame` is represented by a
        colored region on a Mapbox map.
        
    Parameters
    ----------
    data_frame: DataFrame or array-like or dict
        This argument needs to be passed for column names (and not keyword
        names) to be used. Array-like and dict are tranformed internally to a
        pandas DataFrame. Optional: if missing, a DataFrame gets constructed
        under the hood usi

In [53]:
sites = xls["Site"]
poly = xls["Polygon"]


           siteID                         name  \
0   GE_booth_wwpt   G.E. Booth (Lakeview) WWTP   
1  GE_booth_wwpt2  G.E. Booth (Oceanview) WWTP   

                                         description     type   geoLat  \
0  The G.E. Booth Wastewater Treatment Plant (WWT...  wwtpMuC  43.5789   
1  The G.E. Booth Wastewater Treatment Plant (WWT...  wwtpMuS  43.5789   

   geoLong            polygonID  \
0 -79.6583   toronto_lakeview_1   
1 -79.6583  toronto_oceanview_1   

                                sewerNetworkFileLink sewerNetworkFileBLOB  \
0  https://mcgill.sharepoint.com/:i:/r/sites/MiCE...                 None   
1  https://mcgill.sharepoint.com/:i:/r/sites/MiCE...                 None   

  notes  
0  None  
1  None  


In [16]:
fig

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})

import plotly.express as px

fig = px.choropleth(df, geojson=counties, locations='fips', color='unemp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           scope="usa",
                           labels={'unemp':'unemployment rate'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)


In [None]:
counties["features"][0]

In [None]:
shapely.geometry.MultiPolygon().__geo_interface__

In [None]:
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
# Load Data
df = px.data.tips()
# Build App
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("JupyterDash Demo"),
    dcc.Graph(id='graph'),
    html.Label([
        "colorscale",
        dcc.Dropdown(
            id='colorscale-dropdown', clearable=True,
            value='plasma', options=[
                {'label': c, 'value': c}
                for c in px.colors.named_colorscales()
            ])
    ]),
])# Define callback to update graph
@app.callback(
    Output('graph', 'figure'),
    [Input("colorscale-dropdown", "value")]
)
def update_figure(colorscale):
    return px.scatter(
        df, x="total_bill", y="tip", color="size"
        color_continuous_scale=colorscale,
        render_mode="webgl", title="Tips"
    )# Run app and display result inline in the notebook
app.run_server(mode='external') # inline