In [1]:
from sqlalchemy import create_engine
from graph_gen import pollution_epa

from config import mapbox_api_key

import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline

import pandas as pd

In [2]:
engine = create_engine("mysql://root:password@localhost/AQI")

In [3]:
# df = pd.read_csv('california_pollution.csv')

In [4]:
# df.to_sql(name='california_pollution',con=engine)

In [5]:
query = '''
select * from california_pollution
'''

In [6]:
df2 = pd.read_sql_query(query, engine)
df2.head()

Unnamed: 0,index,cities,lat,lng
0,0,Adelanto,34.58277,-117.409215
1,1,Agoura Hills,34.15334,-118.761676
2,2,Alameda,37.765206,-122.241636
3,3,Albany,37.88687,-122.297747
4,4,Alhambra,34.095287,-118.127015


In [7]:
def df_geodata(filename=None,query=None):    
    """
    Returns dataframe with pollution data from city list dataframe.
    """
    
    if filename and query:
        return "You must pass either a csv filename or a SQL query. You cannot pass two arguments."
    elif filename == None and query == None:
        return "Error: No arguments were passed to this function."
    elif filename:
        df = pd.read_csv(filename)
    else:
        df = pd.read_sql_query(query)
    
    aqi = []
    category = []
    dominant_pollutant = []
    date = []


    for lat,lng in zip(df['lat'],df['lng']):
        data = pollution_epa(lat,lng)
        
        if data != None:
            index = data['data']['indexes']['usa_epa']

            air_quality = index['aqi']
            categories = index['category']
            dom_pollutant = index['dominant_pollutant']
            datetime = data['data']['datetime']

            aqi.append(air_quality)
            category.append(categories)
            dominant_pollutant.append(dom_pollutant)
            date.append(datetime)
            
        else:
            aqi.append("NaN")
            category.append("NaN")
            dominant_pollutant.append("NaN")
            date.append("NaN")          

    df['aqi'] = aqi
    df['category'] = category
    df['dominant_pollutant'] = dominant_pollutant
    df['datetime'] = date
    
    return df.dropna()

In [8]:
def marker_text(df):
    '''
    Generates list of text for markers on scattermap.
    '''
    text = []
    for row in df.itertuples():
        a = getattr(row, "cities")
        b = getattr(row, "category")
        c = getattr(row, "aqi")
        d = getattr(row, "dominant_pollutant")
        string = f"{a}<br>{b}<br>AQI: {c}<br>Dominant pollutant: {d}"
        text.append(string)
    return text

In [14]:
def generate_map(df):
    '''
    Generates HTML for map from pollution data.
    '''
    data = [
    go.Scattermapbox(
        lat=round(df['lat'],3),
        lon=round(df['lng'],3),
        mode='markers',
        marker=dict(
            size=df['aqi']/10,
            color= df['aqi'],
            colorscale = 'Jet',
        ),
        text= marker_text(df)
        )
    ]

    layout = go.Layout(
        autosize=True,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_api_key,
            bearing=0,
            center=dict(
                lat=36,
                lon=-119
            ),
            style='dark',
            pitch=0,
            zoom=4
        ),
    )

    fig = dict(data=data, layout=layout)

    map_html = plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')
    
    return map_html

In [10]:
# generate_map(df2)
df = df_geodata('california_cities.csv')
df.to_sql(name='california_pollution',con=engine,if_exists='replace')

In [30]:
pd.read_sql_query(query,engine).set_index('index').head()

Unnamed: 0_level_0,cities,lat,lng,aqi,category,dominant_pollutant,datetime
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Adelanto,34.58277,-117.409215,39,Good air quality,pm25,2018-11-29T20:00:00Z
1,Agoura Hills,34.15334,-118.761676,49,Good air quality,pm25,2018-11-29T20:00:00Z
2,Alameda,37.765206,-122.241636,58,Moderate air quality,pm25,2018-11-29T20:00:00Z
3,Albany,37.88687,-122.297747,56,Moderate air quality,pm25,2018-11-29T20:00:00Z
4,Alhambra,34.095287,-118.127015,56,Moderate air quality,pm25,2018-11-29T20:00:00Z


In [29]:
# df.head()
# poop=pd.read_csv('california_cities.csv')
# poop.head()
# poop.to_sql(name='california_pollution',con=engine,if_exists='replace')
# pd.read_sql_query(query,engine).set_index('index')

In [39]:
p = 'poop'
v = None

if p and v:
    print('yes')
else:
    print('no')

no
