# Data-Viz Project
## Something with this weather data


### Notes:

#### Units for the Frames:
1. Temperature (Kelvin)
2. Humidity (%)
3. Pressure (hPa)
4. Wind Direction (meteorological degrees)
5. Wind Speed (m/s)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#Preprocessing
from sklearn.preprocessing import FunctionTransformer

import plotly.express as px
import plotly.graph_objects as go

#import the Dash package
from dash import Dash, html, dcc, Input, Output
from jupyter_dash import JupyterDash


In [2]:
#List the names of the individual files from the WeatherData folder
file_names = ["city_attributes.csv","temperature.csv","wind_speed.csv","humidity.csv"
              ,"weather_description.csv","pressure.csv","wind_direction.csv"]

#List the names of the data frames to be imported
df_names = ['city data','temp','ws','humid','descr', 'press','wd']

#put the (file_name, df_name) in a dictionary
name_dct = {k:v for k,v in zip(file_names, df_names)}

#dictionary of df's to be populated
df_dct = {}

for k in name_dct:
    df_dct[name_dct[k]] = pd.read_csv("WeatherData/" + k)
    
# extract only the data for US cities: Includes datetime feature!
cities = df_dct['city data']
US_cities = (cities[cities.Country == 'United States'].City).values
feature_lst = np.append(US_cities, 'datetime')

for k in df_dct:
    if k != 'city data':
        df = df_dct[k]
        df_dct[k] = df[feature_lst]


        

## Location

In [3]:
# Plot the locations of the US/Canada cities
locations = df_dct['city data']
locations = locations[(locations.Country == 'United States')]


In [4]:
fig = go.Figure(data = go.Scattergeo(lat = locations['Latitude'],
                                    lon = locations['Longitude'],
                                    text = locations['City'],
                                    mode='markers'
                                    ))

fig.update_layout({"title":'USA City Locations <br>(Hover for names)',
                  'geo_scope':'usa',
                  })

## Temperature

In [5]:
tmp = df_dct['temp']
tmp = tmp.fillna(method = 'ffill') #fills missing data with latest non-missing value
tmp.loc[:,'datetime'] = pd.to_datetime(tmp.loc[:,'datetime'])


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



In [6]:
# Converts Temperature from Kelvin to Farenheit
def to_F(K):
    return 9/5 * (K - 273) + 32

to_F = FunctionTransformer(to_F)

def convert_temp(df):
    datetime = df['datetime']
    df = to_F.transform(df.loc[:, US_cities])
    df['datetime'] = datetime
    return df
tmp_F = convert_temp(tmp)

In [7]:
SF_temp = tmp_F.loc[:,['San Francisco','datetime']]
SF_temp['MA'] = SF_temp['San Francisco'].rolling(window = 90).mean()

In [8]:
SF_temp.min()

San Francisco                  30.74
datetime         2012-10-01 12:00:00
MA                            38.636
dtype: object

In [9]:
sf_temp_fig = go.Figure(layout = {"width": 1000, 'height': 600,
                                 'title': 'San Francisco Temperatures'})
sf_temp_fig.add_trace(go.Scatter(x = SF_temp['datetime'],
                                   y = SF_temp['San Francisco'],
                                   mode = 'lines',
                                name = 'Temperature',))
sf_temp_fig.add_trace(go.Scatter(x = SF_temp['datetime'],
                                y = SF_temp['MA'],
                                mode = 'lines',
                                name = '90 Day M.A.'))

sf_temp_fig.update_traces(selector = {'name':'90 Day M.A.'},
                         line = {'width':1})
sf_temp_fig.update_traces(selector = {'name':'Temperature'},
                         line = {'width':1.5,
                                "simplify":True})

sf_temp_fig.update_xaxes(title = {"text":"Date",
                                 'font':{'size':14}},
                        ticks = 'outside',
                        nticks = 20)
sf_temp_fig.update_yaxes(title = {"text":"° Farenheit",
                                 'font':{'size':14}})

sf_temp_fig.show()

### Temperature Interactivity
1. Choose the city
2. Choose the starting date
3. Choose the ending date
4. Choose the MA period

In [10]:
def select_citydates(city, start, end, period):
    """
    Parameters:
    -----------
        city: str
            String which denotes the city to be selected.
        start: str
            String which denotes the start date (inclusive)
        end: str
            String which denotes the end date (non-inclusive)
        period: int
            Integer denoting the Moving Average period.
            Note: period < start - end
        
    Returns:
    --------
    city_tmp: pandas.DataFrame
        DataFrame whose columns are [<city_name>, 'datetime', 'MA(period)']
            <city_name>: float
                Original temperature data
            datetime: datetime
                Original timestamp of temperature recording between 
                the date range of [start, end]
            MA(period): float
                Moving average over the past <period> observations
    """
    
    start_dt = pd.to_datetime(start)
    end_dt = pd.to_datetime(end)
    
    data = tmp_F[(tmp_F['datetime'] >= start_dt) & (tmp_F['datetime'] < end_dt)].copy()
    data = data[[city, 'datetime']]
    data['MA{}'.format(period)] =data[city].rolling(window = period).mean()
    
    return data
    
    

In [11]:
def make_figure(city, start, end, period):
    city_df = select_citydates(city, start, end, period)
    
    fig = go.Figure(layout = {"width": 1000, 'height': 600,
                                 'title': city + ' Temperatures'})
    fig.add_trace(go.Scatter(x = city_df['datetime'],
                                       y = city_df[city],
                                       mode = 'lines',
                                    name = 'Temperature',))
    fig.add_trace(go.Scatter(x = city_df['datetime'],
                                    y = city_df['MA{}'.format(period)],
                                    mode = 'lines',
                                    name = '{}-Day M.A.'.format(period)))

    fig.update_traces(selector = {'name':'{}-Day M.A.'.format(period)},
                             line = {'width':1})
    fig.update_traces(selector = {'name':'Temperature'},
                             line = {'width':1.5,
                                    "simplify":True})

    fig.update_xaxes(title = {"text":"Date",
                                     'font':{'size':14}},
                            ticks = 'outside',
                            nticks = 20)
    fig.update_yaxes(title = {"text":"° Farenheit",
                                     'font':{'size':14}})

#     fig.show()
    
    return fig


In [12]:
fig = make_figure('Chicago', '1/1/12','11/1/17 23:00:00', 60)

app = JupyterDash(__name__)

app.layout = html.Div(dcc.Graph(id = 'TimeSeries',
                               figure = fig))

app.run_server(mode = 'inline', port = '8120')

In [13]:
tmp_F['Month'] = tmp_F['datetime'].dt.month
tmp_F['Day'] = tmp_F['datetime'].dt.day

In [14]:
city_names = tmp_F.columns[:-2]
agg_dct = {k: np.mean for k in city_names}

In [15]:
group_obj = tmp_F.groupby('Month')
group_obj.agg(agg_dct)

Unnamed: 0_level_0,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,Denver,San Antonio,...,Atlanta,Detroit,Jacksonville,Charlotte,Miami,Pittsburgh,Philadelphia,New York,Boston,datetime
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,38.719731,50.821496,40.260767,54.608861,54.399665,45.157279,52.164918,33.653405,30.810406,51.529738,...,43.58519,25.495138,56.452195,41.403518,69.635539,29.20308,31.610689,31.289799,30.237281,2015-01-16 16:18:00.000000000
2,43.086971,53.578392,43.008409,56.71617,56.404834,51.480814,57.869249,40.56043,33.622475,57.308305,...,45.873825,24.562273,58.300814,43.748613,70.318873,29.475108,32.173907,31.242854,30.144201,2015-02-17 20:51:42.127659520
3,47.656146,56.175955,46.533782,60.142427,59.211352,60.32186,65.8764,49.130378,41.280117,62.809928,...,53.85043,33.761995,62.767997,51.09503,72.534058,38.354598,39.680683,38.771434,35.517916,2015-03-16 21:06:00.000000000
4,51.470828,58.026478,50.600865,61.928932,60.702604,65.456925,70.819428,54.774105,46.377776,69.592227,...,63.256833,47.371477,70.448289,61.931964,76.95242,52.60558,53.579465,51.912212,48.024735,2015-04-16 09:06:00.000000000
5,58.487427,60.785034,57.365075,64.390429,63.002832,73.518596,78.115837,61.465075,54.102128,74.295145,...,69.586361,60.192874,75.162469,68.642376,79.441076,61.903054,62.861513,62.091324,57.952862,2015-05-16 21:06:00.000000000
6,63.506444,64.410963,61.855715,69.8389,67.09285,87.755953,90.55871,74.638036,67.861523,81.151883,...,76.834032,68.730229,80.449226,76.332842,81.68475,69.428702,71.815015,70.92053,66.511696,2015-06-16 09:06:00.000000000
7,68.854739,67.319063,66.298613,74.165957,71.577017,91.883426,93.630678,74.354045,72.115923,84.219816,...,78.65768,72.827888,82.159381,78.411301,83.458071,72.181408,76.341188,76.67844,73.306658,2015-07-16 21:06:00.000000000
8,69.680522,66.218611,66.805748,74.074834,71.937959,87.308608,90.516255,69.968994,68.738004,84.182759,...,77.462775,71.273967,81.762422,76.457344,83.639536,70.321095,73.071849,74.100452,71.091361,2015-08-16 21:06:00.000000000
9,62.573917,66.411056,60.504974,73.209073,71.288043,79.045793,84.382887,65.894299,63.183047,79.497516,...,72.929725,65.596147,78.858991,71.629986,82.288174,65.21595,67.755702,68.517187,65.746675,2015-09-16 09:06:00.000000000
10,53.291162,61.872739,52.497967,66.251033,65.813742,65.300433,73.092661,54.889551,49.204741,72.055192,...,63.210358,54.224222,72.638649,61.499114,79.291193,54.734786,57.125919,58.18936,56.018651,2015-04-19 19:32:15.849056768


In [23]:
td = tmp_F['datetime'].to_dict()

tmp_F.loc[tmp_F['datetime']==str(choices[2])]

Unnamed: 0,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,Denver,San Antonio,...,Jacksonville,Charlotte,Miami,Pittsburgh,Philadelphia,New York,Boston,datetime,Month,Day
2,48.349854,61.654987,47.83499,65.962734,65.360302,68.725654,74.495315,53.878205,52.89315,61.346568,...,77.369414,60.17031,80.118532,46.444581,54.793774,59.445817,57.534966,2012-10-01 14:00:00,10,1


In [19]:
## Creating the app


sizes = [300 for i in range(27)]
choices=tmp_F['datetime'].to_dict()

app = JupyterDash(__name__)

## Setting the layout
app.layout = html.Div(
children=[
    ## This adds our RadioItems
    dcc.Slider(1, 2, 1, 
               value=0,
                 marks=choices,
                                      ## specifying the initial selection
        id='date-select')  ,
    dcc.Graph(id='plot')
]
)

## setting the callback
@app.callback(
    ## Our Output, 
    ## This selects the last html.div then changes its 'children' according to 
    ## the update_output_div function
    Output(component_id='plot', component_property='figure'),
    
    ## Our Input
    ## selects the dropdown
    ## and takes whatever the selected value was
    Input(component_id='date-select', component_property='value')
)

def make_plot(date):  ## Our callback function where we make the plotly figure
    ## Makes the scatter plot


    fig = px.scatter_geo(lon=locations['Longitude'],
                        lat=locations['Latitude'], 
                        width=1000,
                        height=600, 
                        size=sizes,
                        color=np.array(tmp_F.loc[tmp_F['datetime']==str(choices[date]),'Portland':'Boston'])[0],
                        color_continuous_scale=['darkblue', 'white', 'red'],
                        range_color=[0, 110],
                        opacity=1,
                        # hover_name='Temperature',
                        # hover_data=list(np.array(tmp_F.loc[tmp_F['datetime']=='2017-11-29 21:00:00'].drop(columns='datetime'))[0])
                        ) 

    fig.update_layout({"title":'USA City Locations <br>(Hover for names)',
                    'geo_scope':'usa',
                    })

    return fig

## Runs the app
## Note you must change the port from the earlier port values
app.run_server(mode='inline', port = 8061)



In [None]:
## Creating the app


sizes = [300 for i in range(27)]
choices={
        0: '2014-01-01 00:00:00',
        1: '2014-04-01 00:00:00',
        2: '2014-07-01 00:00:00',
        3: '2014-10-01 00:00:00'
    }

app = JupyterDash(__name__)

## Setting the layout
app.layout = html.Div(
children=[
    ## This adds our RadioItems
    dcc.Slider(0, 3, 1, 
               value=0,
                 marks=choices,
                                      ## specifying the initial selection
        id='date-select')  ,
    dcc.Graph(id='plot')
]
)

## setting the callback
@app.callback(
    ## Our Output, 
    ## This selects the last html.div then changes its 'children' according to 
    ## the update_output_div function
    Output(component_id='plot', component_property='figure'),
    
    ## Our Input
    ## selects the dropdown
    ## and takes whatever the selected value was
    Input(component_id='date-select', component_property='value')
)

def make_plot(date):  ## Our callback function where we make the plotly figure
    ## Makes the scatter plot


    fig = px.scatter_geo(lon=locations['Longitude'],
                        lat=locations['Latitude'], 
                        width=1000,
                        height=600, 
                        size=sizes,
                        color=np.array(tmp_F.loc[tmp_F['datetime']==choices[date],'Portland':'Boston'])[0],
                        color_continuous_scale=['darkblue', 'white', 'red'],
                        range_color=[0, 110],
                        opacity=1,
                        # hover_name='Temperature',
                        # hover_data=list(np.array(tmp_F.loc[tmp_F['datetime']=='2017-11-29 21:00:00'].drop(columns='datetime'))[0])
                        ) 

    fig.update_layout({"title":'USA City Locations <br>(Hover for names)',
                    'geo_scope':'usa',
                    })

    return fig

## Runs the app
## Note you must change the port from the earlier port values
app.run_server(mode='inline', port = 8061)