# KMSO notebook
* this notebook utilizes weather data from KMSO exclusively

For each 24 hour day (midnight tomidnight) we'd like to know the following:
* Temperature and barometric pressure values at sunrise and sunset
* Difference between temp and pressure at sunrise and sunset
* Maximum, minimum and averages for temp and pressure for both of the time frames of sunset to sunrise and sunrise to sunset
* Differences between the max, min and average for both of these time frames.


# Load libraries

In [267]:
# pandas for data structure
import pandas as pd
import datetime
import numpy as np

# Load data

#### Weather Data

* data collected from [Wunderground](https://www.wunderground.com/weather/api/)
* hosted file: [Google Drive](https://drive.google.com/file/d/1eS0gGM14g7iFulUeqz3XwbKb5OtK9aSI/view)

In [107]:
# local file
filename_wunderground = '../data/KMSO-190209.csv'

In [108]:
# load data into dataframes
wund = pd.read_csv(filename_wunderground, parse_dates=['utc_date'])

In [109]:
wund['utc_date'] = wund['utc_date'].dt.tz_localize('utc')

In [110]:
# localize datetime make local_date column
wund['local_date'] = pd.to_datetime(wund.loc[:, 'utc_date']).dt.tz_convert('US/Mountain')

In [111]:
wund = wund.set_index('local_date')

In [112]:
wund = wund[['station_id', 'pressurei', 'pressurem', 'tempi', 'tempm', 'utc_date']]

In [113]:
wund.columns = ['station_id', 'pressi', 'pressm', 'tempi', 'tempm', 'utc_date']

In [114]:
wund.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3463 entries, 2017-07-01 00:53:00-06:00 to 2017-10-31 23:53:00-06:00
Data columns (total 6 columns):
station_id    3463 non-null object
pressi        3463 non-null float64
pressm        3463 non-null float64
tempi         3463 non-null float64
tempm         3463 non-null float64
utc_date      3463 non-null datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), float64(4), object(1)
memory usage: 189.4+ KB


In [115]:
wund.describe()

Unnamed: 0,pressi,pressm,tempi,tempm
count,3463.0,3463.0,3463.0,3463.0
mean,27.146572,1014.063817,44.775253,0.706382
std,170.425072,187.304652,382.391115,380.426406
min,-9999.0,-9999.0,-9999.0,-9999.0
25%,29.92,1013.2,44.35,6.85
50%,30.03,1016.8,59.0,15.0
75%,30.14,1020.5,73.0,22.8
max,30.81,1043.2,100.0,37.8


#### Sunset Sunrise Data

In [124]:
# Load Sunset Sunrise data
sun_filename = '../data/sunrise_sunset-KMSO_190209.csv'
sun = pd.read_csv(sun_filename, parse_dates=['sunrise', 'sunset'])

In [125]:
# Select a subset of loaded DataFrame
sun = sun[['station_id', 'sunrise', 'sunset']]

In [126]:
# Rename columns
sun.columns = ['station_id', 'sunrise_utc', 'sunset_utc']

In [127]:
# Localize datetime to UTC
sun['sunrise_utc'] = sun['sunrise_utc'].dt.tz_localize('utc')
sun['sunset_utc'] = sun['sunset_utc'].dt.tz_localize('utc')

In [128]:
# Create US/Mountain datetimes
sun['sunrise_local'] = pd.to_datetime(sun.loc[:, 'sunrise_utc']).dt.tz_convert('US/Mountain')
sun['sunset_local'] = pd.to_datetime(sun.loc[:, 'sunset_utc']).dt.tz_convert('US/Mountain')

In [129]:
# Reorder columns
sun = sun[['station_id', 'sunrise_local', 'sunset_local', 'sunrise_utc', 'sunset_utc']]

In [130]:
# Drop rows which start 2017-06-30
sun = sun[sun.sunset_local.dt.month != 6]

In [123]:
# Drop rows which start with 2017-11-01 
sun[sun.sunset_local.dt.month == 11]

Unnamed: 0,station_id,sunrise_local,sunset_local,sunrise_utc,sunset_utc


In [131]:
# Reset index
sun = sun.reset_index(drop=True)

In [133]:
sun.tail(2)

Unnamed: 0,station_id,sunrise_local,sunset_local,sunrise_utc,sunset_utc
121,KMSO,2017-10-30 08:16:57-06:00,2017-10-30 18:22:14-06:00,2017-10-30 14:16:57+00:00,2017-10-31 00:22:14+00:00
122,KMSO,2017-10-31 08:18:25-06:00,2017-10-31 18:20:42-06:00,2017-10-31 14:18:25+00:00,2017-11-01 00:20:42+00:00


# New DataFrame

In [134]:
# Column names
columns = ['date', 'station_id', 'rise_tempi','set_tempi','rise_pressi','set_pressi', 'sunrise', 'sunset']

In [135]:
calcs = pd.DataFrame(columns=columns)

#### Populate with stations and date range

In [136]:
# Create date range by day
start = datetime.date(2017, 7, 1)
end = datetime.date(2017, 10, 31)
date_index = pd.date_range(start, periods=123, freq='D')

In [137]:
# Append days and unique station ids
for station in sun.station_id.unique():
    for date in date_index:
        calcs = calcs.append({'station_id': station, 'date': date.strftime('%Y-%m-%d')},
                            ignore_index=True)

In [138]:
calcs = calcs.set_index('date')

In [139]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 123 entries, 2017-07-01 to 2017-10-31
Data columns (total 7 columns):
station_id     123 non-null object
rise_tempi     0 non-null object
set_tempi      0 non-null object
rise_pressi    0 non-null object
set_pressi     0 non-null object
sunrise        0 non-null object
sunset         0 non-null object
dtypes: object(7)
memory usage: 7.7+ KB


# Calculations

### Column for values at Sunrise and Sunset
* 'values' refers to temperature and pressure data
* indexed by day
* date will go sunrise to sunset
* columns = ['rise_tempi','set_tempi','rise_pressurei','set_pressurei']
* index = ['2017-07-01', .... '2017-10-31']

* Get closest wund.local_date to sun.sunrise_local
* [query the closest datetime index](https://stackoverflow.com/questions/42264848/pandas-dataframe-how-to-query-the-closest-datetime-index)

In [140]:
def find_closest_weather_sample(station, date):
    wund_station = wund[wund.station_id == station].copy()
    wund_station = wund_station.groupby(wund_station.index).first()
    idx = wund_station.index.get_loc(date, method='nearest')
    return wund_station.iloc[idx]

### Sunrise/Sunset Temp/Pressure

In [142]:
wund.head(1)

Unnamed: 0_level_0,station_id,pressi,pressm,tempi,tempm,utc_date
local_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-07-01 00:53:00-06:00,KMSO,29.96,1014.5,61.0,16.1,2017-07-01 06:53:00+00:00


In [143]:
# Pull date out of index
calcs = calcs.reset_index()

In [146]:
calcs.tail(2)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset
121,2017-10-30,KMSO,30.0,41,30.4,30.21,2017-10-30 08:16:57-06:00,2017-10-30 18:22:14-06:00
122,2017-10-31,KMSO,21.9,36,30.13,29.82,2017-10-31 08:18:25-06:00,2017-10-31 18:20:42-06:00


In [None]:
row_counter = 0
# Find weather samples for sunrise
for row in sun.iterrows():
    print(row_counter)
    row_counter+=1
    station_id = row[1]['station_id']
    sunrise_local = row[1]['sunrise_local']
    sunset_local = row[1]['sunset_local']

    sunrise_Ymd = sunrise_local.strftime('%Y-%m-%d')
    sunset_Ymd = sunset_local.strftime('%Y-%m-%d')
    
    sunrise_sample = find_closest_weather_sample(station_id, sunrise_local)
    sunset_sample = find_closest_weather_sample(station_id, sunset_local)
    
    sample_rise_temp = sunrise_sample.tempi
    sample_rise_press = sunrise_sample.pressi
    
    sample_set_temp = sunset_sample.tempi
    sample_set_press = sunset_sample.pressi
    
    # Insert data into new dataframe 
    sunrise_mask = (calcs.date == sunrise_Ymd) & (calcs.station_id == station_id)
    calcs.rise_tempi.loc[sunrise_mask] = sample_rise_temp
    calcs.rise_pressi.loc[sunrise_mask] = sample_rise_press
    
    sunset_mask = (calcs.date == sunset_Ymd) & (calcs.station_id == station_id)
    calcs.set_tempi.loc[sunset_mask] = sample_set_temp
    calcs.set_pressi.loc[sunset_mask] = sample_set_press
    print(calcs.loc[(calcs.date == sunset_Ymd) & (calcs.station_id == station_id)])
    calcs['sunrise'].loc[(calcs.date == sunset_Ymd) & (calcs.station_id == station_id)] = sunrise_local
    calcs['sunset'].loc[(calcs.date == sunset_Ymd) & (calcs.station_id == station_id)] = sunset_local
'''
    print(sunrise_local.strftime('%Y-%m-%d'))
    print('**********************************************')
    print('*', station_id)
    print('Sunrise:', sunrise_local, '\n')

    print('* Nearest Sunrise Weather Sample Datetime:')
    print(sunrise_sample)
    print('rise_temp :', sample_rise_temp)
    print('rise_press:', sample_rise_press)
    
    print('\n* Nearest Sunset Weather Sample Datetime:')
    print(sunset_sample)
    print('set_temp  :', sample_set_temp)
    print('set_press :', sample_set_press)

    print('\n* Calcs DataFrame Selected Row')
    print(calcs.loc[(calcs.date == sunrise_local.strftime('%Y-%m-%d')) & 
                     (calcs.station_id == station_id)])
    
    print('**********************************************')
    print('\n')
'''

In [55]:
calcs.head(1)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset
0,2017-07-01,KMSO,53.1,75.9,29.99,29.88,2017-07-01 05:46:08-06:00,2017-07-01 21:33:58-06:00


In [59]:
filename = '../data/2017-sunrise_set-temp_pressure-KMSO_190209_001.csv'
calcs.to_csv(filename, index=False)

### Difference sunrise - sunset, sunset - sunrise values

In [147]:
calcs.tail(2)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset
121,2017-10-30,KMSO,30.0,41,30.4,30.21,2017-10-30 08:16:57-06:00,2017-10-30 18:22:14-06:00
122,2017-10-31,KMSO,21.9,36,30.13,29.82,2017-10-31 08:18:25-06:00,2017-10-31 18:20:42-06:00


In [148]:
# remember to set -1 to account for no 11/1 value
for row in calcs.iterrows():
    # sunrise to sunset
    station_id = row[1].station_id
    
    # set variables
    rise_set_diff_tempi = row[1].set_tempi - row[1].rise_tempi
    rise_set_diff_pressi = row[1].set_pressi - row[1].rise_pressi
    set_rise_diff_tempi = calcs.loc[row[0]+1].rise_tempi - row[1].set_tempi
    set_rise_diff_pressi = calcs.loc[row[0]+1].rise_pressi - row[1].set_pressi

    # sunrise to sunset
    print(row[1], '\n')
    print('**** Sunrise to Sunset ****')
    print(station_id, row[1].date)
    print('temp diff :', rise_set_diff_tempi)
    print('press diff:', rise_set_diff_pressi)
    
    # sunset to sunrise
    print('\n')
    print(calcs.loc[row[0] + 1])
    print('date:', row[1].date)
    print('\n**** Sunset to Sunrise ****')
    print('temp diff :', set_rise_diff_tempi)
    print('press diff:', set_rise_diff_pressi)

    
    # store values in DataFrame
    calcs.loc[row[0], 'rise_set_diff_tempi'] = rise_set_diff_tempi
    calcs.loc[row[0], 'rise_set_diff_pressi'] = rise_set_diff_pressi
    calcs.loc[row[0], 'set_rise_diff_tempi'] = set_rise_diff_tempi
    calcs.loc[row[0], 'set_rise_diff_pressi'] = set_rise_diff_pressi



date                          2017-07-01
station_id                          KMSO
rise_tempi                          53.1
set_tempi                           75.9
rise_pressi                        29.99
set_pressi                         29.88
sunrise        2017-07-01 05:46:08-06:00
sunset         2017-07-01 21:33:58-06:00
Name: 0, dtype: object 

**** Sunrise to Sunset ****
KMSO 2017-07-01
temp diff : 22.800000000000004
press diff: -0.10999999999999943


date                          2017-07-02
station_id                          KMSO
rise_tempi                            55
set_tempi                             73
rise_pressi                        29.96
set_pressi                         29.86
sunrise        2017-07-02 05:46:47-06:00
sunset         2017-07-02 21:33:42-06:00
Name: 1, dtype: object
date: 2017-07-01

**** Sunset to Sunrise ****
temp diff : -20.900000000000006
press diff: 0.08000000000000185
date                          2017-07-02
station_id                         

Name: 53, dtype: object 

**** Sunrise to Sunset ****
KMSO 2017-08-23
temp diff : 23.0
press diff: -0.14000000000000057


date                                   2017-08-24
station_id                                   KMSO
rise_tempi                                   60.1
set_tempi                                      70
rise_pressi                                 29.88
set_pressi                                  29.83
sunrise                 2017-08-24 06:46:34-06:00
sunset                  2017-08-24 20:29:33-06:00
rise_set_diff_tempi                           NaN
rise_set_diff_pressi                          NaN
set_rise_diff_tempi                           NaN
set_rise_diff_pressi                          NaN
Name: 54, dtype: object
date: 2017-08-23

**** Sunset to Sunrise ****
temp diff : -14.899999999999999
press diff: -0.010000000000001563
date                          2017-08-24
station_id                          KMSO
rise_tempi                          60.1
set_tempi          

date                          2017-10-17
station_id                          KMSO
rise_tempi                            28
set_tempi                             63
rise_pressi                        30.13
set_pressi                          29.9
sunrise        2017-10-17 07:58:09-06:00
sunset         2017-10-17 18:44:06-06:00
Name: 108, dtype: object 

**** Sunrise to Sunset ****
KMSO 2017-10-17
temp diff : 35.0
press diff: -0.23000000000000043


date                                   2017-10-18
station_id                                   KMSO
rise_tempi                                   44.1
set_tempi                                      54
rise_pressi                                 30.04
set_pressi                                  29.89
sunrise                 2017-10-18 07:59:34-06:00
sunset                  2017-10-18 18:42:18-06:00
rise_set_diff_tempi                           NaN
rise_set_diff_pressi                          NaN
set_rise_diff_tempi                           NaN

KeyError: 'the label [123] is not in the [index]'

In [149]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123 entries, 0 to 122
Data columns (total 12 columns):
date                    123 non-null object
station_id              123 non-null object
rise_tempi              123 non-null object
set_tempi               123 non-null object
rise_pressi             123 non-null object
set_pressi              123 non-null object
sunrise                 123 non-null object
sunset                  123 non-null object
rise_set_diff_tempi     122 non-null float64
rise_set_diff_pressi    122 non-null float64
set_rise_diff_tempi     122 non-null float64
set_rise_diff_pressi    122 non-null float64
dtypes: float64(4), object(8)
memory usage: 11.6+ KB


In [150]:
calcs.tail(2)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,set_rise_diff_tempi,set_rise_diff_pressi
121,2017-10-30,KMSO,30.0,41,30.4,30.21,2017-10-30 08:16:57-06:00,2017-10-30 18:22:14-06:00,11.0,-0.19,-19.1,-0.08
122,2017-10-31,KMSO,21.9,36,30.13,29.82,2017-10-31 08:18:25-06:00,2017-10-31 18:20:42-06:00,,,,


### Max, min, ave : sunrise to sunset

In [151]:
for row in calcs.iterrows(): 
    # set variables
    station_id = row[1].station_id
    sunrise_dt = row[1].sunrise
    sunset_dt = row[1].sunset
    
    # select date range
    mask = (wund.index > sunrise_dt) & (wund.index <= sunset_dt) & (wund.station_id == station_id)

    ## max
    rise_set_max_tempi = wund[mask].tempi.max()
    rise_set_max_pressi = wund[mask].pressi.max() 
    
    ## min
    rise_set_min_tempi = wund[mask].tempi.min()
    rise_set_min_pressi = wund[mask].pressi.min()
    
    ## mean
    rise_set_mean_tempi = wund[mask].tempi.mean()
    rise_set_mean_pressi = wund[mask].pressi.mean()
    
    # store values in DataFrame
    calcs.loc[row[0], 'rise_set_max_tempi'] = rise_set_max_tempi
    calcs.loc[row[0], 'rise_set_max_pressi'] = rise_set_max_pressi
    calcs.loc[row[0], 'rise_set_min_tempi'] = rise_set_min_tempi
    calcs.loc[row[0], 'rise_set_min_pressi'] = rise_set_min_pressi
    calcs.loc[row[0], 'rise_set_mean_tempi'] = rise_set_mean_tempi
    calcs.loc[row[0], 'rise_set_mean_pressi'] = rise_set_mean_pressi
'''
    print(row[0])
    print(row[1].station_id)
    print(row[1].date)
    print(row[1].sunrise, row[1].sunset)
    print('\n', row[1], '\n')
    print('Max temp: ', rise_set_max_tempi)
    print('Max press:', rise_set_max_pressi)
    print('Min temp: ', rise_set_min_tempi)
    print('Min press:', rise_set_min_pressi)
    print('Mean temp :', rise_set_mean_tempi)
    print('Mean press:', rise_set_mean_pressi)
    print('\n', wund[mask].describe(), '\n')
'''

"\n    print(row[0])\n    print(row[1].station_id)\n    print(row[1].date)\n    print(row[1].sunrise, row[1].sunset)\n    print('\n', row[1], '\n')\n    print('Max temp: ', rise_set_max_tempi)\n    print('Max press:', rise_set_max_pressi)\n    print('Min temp: ', rise_set_min_tempi)\n    print('Min press:', rise_set_min_pressi)\n    print('Mean temp :', rise_set_mean_tempi)\n    print('Mean press:', rise_set_mean_pressi)\n    print('\n', wund[mask].describe(), '\n')\n"

In [152]:
calcs.head()

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,set_rise_diff_tempi,set_rise_diff_pressi,rise_set_max_tempi,rise_set_max_pressi,rise_set_min_tempi,rise_set_min_pressi,rise_set_mean_tempi,rise_set_mean_pressi
0,2017-07-01,KMSO,53.1,75.9,29.99,29.88,2017-07-01 05:46:08-06:00,2017-07-01 21:33:58-06:00,22.8,-0.11,-20.9,0.08,90.0,30.0,52.0,29.84,76.06875,29.913125
1,2017-07-02,KMSO,55.0,73.0,29.96,29.86,2017-07-02 05:46:47-06:00,2017-07-02 21:33:42-06:00,18.0,-0.1,-17.1,0.09,90.0,29.99,55.0,29.83,77.6875,29.91
2,2017-07-03,KMSO,55.9,75.0,29.95,29.95,2017-07-03 05:47:27-06:00,2017-07-03 21:33:23-06:00,19.1,0.0,-23.9,0.11,90.0,29.97,55.9,29.89,77.88125,29.92875
3,2017-07-04,KMSO,51.1,73.0,30.06,29.92,2017-07-04 05:48:10-06:00,2017-07-04 21:33:01-06:00,21.9,-0.14,-16.0,0.09,91.9,30.08,48.9,29.9,75.7375,29.98875
4,2017-07-05,KMSO,57.0,82.9,30.01,29.95,2017-07-05 05:48:54-06:00,2017-07-05 21:32:37-06:00,25.9,-0.06,-25.0,0.15,98.1,30.06,57.0,29.89,81.6125,29.976875


### Max, min, ave : sunset to sunrise

In [153]:
for row in calcs.iterrows(): 
    # set variables
    station_id = row[1].station_id
    sunrise_dt = calcs.loc[row[0]+1].sunrise
    sunset_dt = row[1].sunset
    
    # select date range
    mask = (wund.index > sunset_dt) & (wund.index <= sunrise_dt) & (wund.station_id == station_id)

    set_rise_mean_pressi = wund[mask].pressi.mean()

    # max
    set_rise_max_tempi = wund[mask].tempi.max()
    set_rise_max_pressi = wund[mask].pressi.max()
    
    # min
    set_rise_min_tempi = wund[mask].tempi.min()
    set_rise_min_pressi = wund[mask].pressi.min()
    
    # mean
    set_rise_mean_tempi = wund[mask].tempi.mean()

    # store values in DataFrame
    calcs.loc[row[0], 'set_rise_max_tempi'] = set_rise_max_tempi
    calcs.loc[row[0], 'set_rise_max_pressi'] = set_rise_max_pressi
    
    calcs.loc[row[0], 'set_rise_min_tempi'] = set_rise_min_tempi
    calcs.loc[row[0], 'set_rise_min_pressi'] = set_rise_min_pressi
    
    calcs.loc[row[0], 'set_rise_mean_tempi'] = set_rise_mean_tempi
    calcs.loc[row[0], 'set_rise_mean_pressi'] = set_rise_mean_pressi

'''    print(row[0])
    print(row[1].station_id)
    print(row[1].date)
    print(sunset_dt, sunrise_dt)
    print('\n', row[1], '\n')
    print('Max temp: ', set_rise_max_tempi)
    print('Max press:', set_rise_max_pressi)
    print('Min temp: ', set_rise_min_tempi)
    print('Min press:', set_rise_min_pressi)
    print('Mean temp :', set_rise_mean_tempi)
    print('Mean press:', set_rise_mean_pressi)
    print('\n', wund[mask].describe(), '\n')'''

KeyError: 'the label [123] is not in the [index]'

In [154]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123 entries, 0 to 122
Data columns (total 24 columns):
date                    123 non-null object
station_id              123 non-null object
rise_tempi              123 non-null object
set_tempi               123 non-null object
rise_pressi             123 non-null object
set_pressi              123 non-null object
sunrise                 123 non-null object
sunset                  123 non-null object
rise_set_diff_tempi     122 non-null float64
rise_set_diff_pressi    122 non-null float64
set_rise_diff_tempi     122 non-null float64
set_rise_diff_pressi    122 non-null float64
rise_set_max_tempi      123 non-null float64
rise_set_max_pressi     123 non-null float64
rise_set_min_tempi      123 non-null float64
rise_set_min_pressi     123 non-null float64
rise_set_mean_tempi     123 non-null float64
rise_set_mean_pressi    123 non-null float64
set_rise_max_tempi      122 non-null float64
set_rise_max_pressi     122 non-null float64
set

In [155]:
calcs.head()

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,...,rise_set_min_tempi,rise_set_min_pressi,rise_set_mean_tempi,rise_set_mean_pressi,set_rise_max_tempi,set_rise_max_pressi,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi
0,2017-07-01,KMSO,53.1,75.9,29.99,29.88,2017-07-01 05:46:08-06:00,2017-07-01 21:33:58-06:00,22.8,-0.11,...,52.0,29.84,76.06875,29.913125,75.9,29.94,57.0,29.88,65.1375,29.90375
1,2017-07-02,KMSO,55.0,73.0,29.96,29.86,2017-07-02 05:46:47-06:00,2017-07-02 21:33:42-06:00,18.0,-0.1,...,55.0,29.83,77.6875,29.91,73.0,29.94,55.0,29.86,63.6625,29.90125
2,2017-07-03,KMSO,55.9,75.0,29.95,29.95,2017-07-03 05:47:27-06:00,2017-07-03 21:33:23-06:00,19.1,0.0,...,55.9,29.89,77.88125,29.92875,75.0,30.05,51.1,29.95,59.8875,30.0075
3,2017-07-04,KMSO,51.1,73.0,30.06,29.92,2017-07-04 05:48:10-06:00,2017-07-04 21:33:01-06:00,21.9,-0.14,...,48.9,29.9,75.7375,29.98875,73.0,29.99,57.0,29.92,63.875,29.9575
4,2017-07-05,KMSO,57.0,82.9,30.01,29.95,2017-07-05 05:48:54-06:00,2017-07-05 21:32:37-06:00,25.9,-0.06,...,57.0,29.89,81.6125,29.976875,82.9,30.08,59.0,29.95,68.5125,30.0225


In [98]:
wund['2017-10-30']

Unnamed: 0_level_0,station_id,pressi,pressm,tempi,tempm,utc_date
local_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-10-30 00:53:00-06:00,KMSO,30.39,1029.1,37.9,3.3,2017-10-30 06:53:00+00:00
2017-10-30 01:53:00-06:00,KMSO,30.4,1029.5,36.0,2.2,2017-10-30 07:53:00+00:00
2017-10-30 02:53:00-06:00,KMSO,30.43,1030.3,34.0,1.1,2017-10-30 08:53:00+00:00
2017-10-30 03:53:00-06:00,KMSO,30.44,1030.7,34.0,1.1,2017-10-30 09:53:00+00:00
2017-10-30 04:53:00-06:00,KMSO,30.45,1030.9,33.1,0.6,2017-10-30 10:53:00+00:00
2017-10-30 05:53:00-06:00,KMSO,30.45,1030.9,30.9,-0.6,2017-10-30 11:53:00+00:00
2017-10-30 06:53:00-06:00,KMSO,30.44,1030.6,27.0,-2.8,2017-10-30 12:53:00+00:00
2017-10-30 07:53:00-06:00,KMSO,30.4,1029.5,30.0,-1.1,2017-10-30 13:53:00+00:00
2017-10-30 08:53:00-06:00,KMSO,30.4,1029.5,33.1,0.6,2017-10-30 14:53:00+00:00
2017-10-30 09:53:00-06:00,KMSO,30.37,1028.4,37.0,2.8,2017-10-30 15:53:00+00:00


### Difference max, min, ave sunset to sunrise : sunrise to sunset

In [349]:
# remember to set -1 to account for no 11/1 value
for row in calcs.iterrows():
    # sunrise to sunset
    station_id = row[1].station_id
    
    rise_set_set_rise_mean_diff_tempi = row[1].rise_set_mean_tempi - row[1].set_rise_mean_tempi
    rise_set_set_rise_mean_diff_pressi = row[1].rise_set_mean_pressi - row[1].set_rise_mean_pressi

    calcs.loc[row[0], 'rise_set_set_rise_mean_diff_tempi'] = rise_set_set_rise_mean_diff_tempi
    calcs.loc[row[0], 'rise_set_set_rise_mean_diff_pressi'] = rise_set_set_rise_mean_diff_pressi
    

    # set variables
    rise_set_set_rise_max_diff_tempi = row[1].rise_set_max_tempi - row[1].set_rise_max_tempi
    rise_set_set_rise_max_diff_pressi = row[1].rise_set_max_pressi - row[1].set_rise_max_pressi
    rise_set_set_rise_min_diff_tempi = row[1].rise_set_min_tempi - row[1].set_rise_min_tempi
    rise_set_set_rise_min_diff_pressi = row[1].rise_set_min_pressi - row[1].set_rise_min_pressi
    rise_set_set_rise_mean_diff_tempi = row[1].rise_set_mean_tempi - row[1].set_rise_mean_tempi
    rise_set_set_rise_mean_diff_pressi = row[1].rise_set_mean_pressi - row[1].set_rise_mean_pressi

    # store values in DataFrame
    calcs.loc[row[0], 'rise_set_set_rise_max_diff_tempi'] = rise_set_set_rise_max_diff_tempi
    calcs.loc[row[0], 'rise_set_set_rise_max_diff_pressi'] = rise_set_set_rise_max_diff_pressi
    calcs.loc[row[0], 'rise_set_set_rise_min_diff_tempi'] = rise_set_set_rise_min_diff_tempi
    calcs.loc[row[0], 'rise_set_set_rise_min_diff_pressi'] = rise_set_set_rise_min_diff_pressi
    calcs.loc[row[0], 'rise_set_set_rise_mean_diff_tempi'] = rise_set_set_rise_mean_diff_tempi

In [158]:
calcs.head(3)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,...,rise_set_mean_tempi,rise_set_mean_pressi,set_rise_max_tempi,set_rise_max_pressi,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi,rise_set_set_rise_mean_diff_tempi,rise_set_set_rise_mean_diff_pressi
0,2017-07-01,KMSO,53.1,75.9,29.99,29.88,2017-07-01 05:46:08-06:00,2017-07-01 21:33:58-06:00,22.8,-0.11,...,76.06875,29.913125,75.9,29.94,57.0,29.88,65.1375,29.90375,10.93125,0.009375
1,2017-07-02,KMSO,55.0,73.0,29.96,29.86,2017-07-02 05:46:47-06:00,2017-07-02 21:33:42-06:00,18.0,-0.1,...,77.6875,29.91,73.0,29.94,55.0,29.86,63.6625,29.90125,14.025,0.00875
2,2017-07-03,KMSO,55.9,75.0,29.95,29.95,2017-07-03 05:47:27-06:00,2017-07-03 21:33:23-06:00,19.1,0.0,...,77.88125,29.92875,75.0,30.05,51.1,29.95,59.8875,30.0075,17.99375,-0.07875


In [162]:
calcs.tail()

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,...,rise_set_mean_tempi,rise_set_mean_pressi,set_rise_max_tempi,set_rise_max_pressi,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi,rise_set_set_rise_mean_diff_tempi,rise_set_set_rise_mean_diff_pressi
118,2017-10-27,KMSO,27.0,48.0,30.48,30.35,2017-10-27 08:12:33-06:00,2017-10-27 18:26:58-06:00,21.0,-0.13,...,43.6,30.414,48.0,30.39,-9999.0,30.32,-494.021053,30.359474,537.621053,0.054526
119,2017-10-28,KMSO,-9999.0,48.9,30.39,30.08,2017-10-28 08:14:01-06:00,2017-10-28 18:25:22-06:00,10047.9,-0.31,...,43.009091,30.244545,48.9,30.11,26.1,30.0,32.141667,30.043333,10.867424,0.201212
120,2017-10-29,KMSO,25.0,43.0,30.05,30.35,2017-10-29 08:15:29-06:00,2017-10-29 18:23:47-06:00,18.0,0.3,...,39.228571,30.174286,44.1,30.45,27.0,30.35,36.714286,30.404286,2.514286,-0.23
121,2017-10-30,KMSO,30.0,41.0,30.4,30.21,2017-10-30 08:16:57-06:00,2017-10-30 18:22:14-06:00,11.0,-0.19,...,40.72,30.299,37.9,30.23,19.9,30.13,25.314286,30.193571,15.405714,0.105429
122,2017-10-31,KMSO,21.9,36.0,30.13,29.82,2017-10-31 08:18:25-06:00,2017-10-31 18:20:42-06:00,,,...,31.0,29.995,,,,,,,,


In [180]:
calcs.tail()

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,...,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi,rise_set_set_rise_mean_diff_tempi,rise_set_set_rise_mean_diff_pressi,rise_set_set_rise_max_diff_tempi,rise_set_set_rise_max_diff_pressi,rise_set_set_rise_min_diff_tempi,rise_set_set_rise_min_diff_pressi
118,2017-10-27,KMSO,27.0,48.0,30.48,30.35,2017-10-27 08:12:33-06:00,2017-10-27 18:26:58-06:00,21.0,-0.13,...,-9999.0,30.32,-494.021053,30.359474,537.621053,0.054526,6.0,0.1,10027.9,0.0
119,2017-10-28,KMSO,-9999.0,48.9,30.39,30.08,2017-10-28 08:14:01-06:00,2017-10-28 18:25:22-06:00,10047.9,-0.31,...,26.1,30.0,32.141667,30.043333,10.867424,0.201212,7.0,0.28,0.0,0.08
120,2017-10-29,KMSO,25.0,43.0,30.05,30.35,2017-10-29 08:15:29-06:00,2017-10-29 18:23:47-06:00,18.0,0.3,...,27.0,30.35,36.714286,30.404286,2.514286,-0.23,3.9,-0.13,-2.0,-0.3
121,2017-10-30,KMSO,30.0,41.0,30.4,30.21,2017-10-30 08:16:57-06:00,2017-10-30 18:22:14-06:00,11.0,-0.19,...,19.9,30.13,25.314286,30.193571,15.405714,0.105429,7.1,0.17,13.2,0.08
122,2017-10-31,KMSO,21.9,36.0,30.13,29.82,2017-10-31 08:18:25-06:00,2017-10-31 18:20:42-06:00,,,...,,,,,,,,,,


In [173]:
df.columns.tolist()

['date',
 'station_id',
 'rise_tempi',
 'set_tempi',
 'rise_pressi',
 'set_pressi',
 'sunrise',
 'sunset',
 'rise_set_diff_tempi',
 'rise_set_diff_pressi',
 'set_rise_diff_tempi',
 'set_rise_diff_pressi',
 'rise_set_max_tempi',
 'rise_set_max_pressi',
 'rise_set_min_tempi',
 'rise_set_min_pressi',
 'rise_set_mean_tempi',
 'rise_set_mean_pressi',
 'set_rise_max_tempi',
 'set_rise_max_pressi',
 'set_rise_min_tempi',
 'set_rise_min_pressi',
 'set_rise_mean_tempi',
 'set_rise_mean_pressi',
 'rise_set_set_rise_mean_diff_tempi',
 'rise_set_set_rise_mean_diff_pressi']

In [159]:
filename = '../data/2017-sunrise_set-temp_pressure-190211_KMSO_001.csv'
calcs.to_csv(filename, index=False)

# Notes

* set to rise calculated based on 9/1 set to 9/2 rise value assigned to 9/1
* set extreme values to NaN
* NA in all the columns except the date and location/station name
* there are some NaNs in here which I am working to understand
 * the NaNs exist in max,min,mean columns which work with an array of wunderground sample rows
  * my guess is there is some kind of issues in the array of rows used to generate max, min, mean

In [314]:
filename = '../data/2017-sunrise_set-temp_pressure-190211_KMSO_001.csv'
calcs = pd.read_csv(filename)

In [315]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123 entries, 0 to 122
Data columns (total 26 columns):
date                                  123 non-null object
station_id                            123 non-null object
rise_tempi                            123 non-null float64
set_tempi                             123 non-null float64
rise_pressi                           123 non-null float64
set_pressi                            123 non-null float64
sunrise                               123 non-null object
sunset                                123 non-null object
rise_set_diff_tempi                   122 non-null float64
rise_set_diff_pressi                  122 non-null float64
set_rise_diff_tempi                   122 non-null float64
set_rise_diff_pressi                  122 non-null float64
rise_set_max_tempi                    123 non-null float64
rise_set_max_pressi                   123 non-null float64
rise_set_min_tempi                    123 non-null float64
rise_set_min_p

In [324]:
all_cols = calcs.columns.tolist()

In [328]:
all_cols.remove('date')
all_cols.remove('sunrise')
all_cols.remove('sunset')
all_cols.remove('station_id')

In [353]:
calcs.min()

date                                                 2017-07-01
station_id                                                 KMSO
rise_tempi                                                 21.9
set_tempi                                                    36
rise_pressi                                               29.42
set_pressi                                                29.52
sunrise                               2017-07-01 05:46:08-06:00
sunset                                2017-07-01 21:33:58-06:00
rise_set_diff_tempi                                          -9
rise_set_diff_pressi                                      -0.29
set_rise_diff_tempi                                       -37.1
set_rise_diff_pressi                                      -0.17
rise_set_max_tempi                                           36
rise_set_max_pressi                                       29.72
rise_set_min_tempi                                           23
rise_set_min_pressi                     

In [345]:
calcs.loc[calcs.rise_set_min_tempi < -1000, all_cols]

Unnamed: 0,rise_tempi,set_tempi,rise_pressi,set_pressi,rise_set_diff_tempi,rise_set_diff_pressi,set_rise_diff_tempi,set_rise_diff_pressi,rise_set_max_tempi,rise_set_max_pressi,...,rise_set_mean_tempi,rise_set_mean_pressi,set_rise_max_tempi,set_rise_max_pressi,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi,rise_set_set_rise_mean_diff_tempi,rise_set_set_rise_mean_diff_pressi
17,48.9,78.1,30.03,29.88,29.2,-0.15,-20.2,0.11,90.0,30.05,...,-553.43125,-596.856875,78.1,29.99,57.9,29.88,68.777778,29.931111,-622.209028,-626.787986
45,46.9,71.1,29.92,29.86,24.2,-0.06,-26.1,0.15,82.9,29.96,...,-651.014286,29.881429,71.1,29.99,46.9,29.86,56.6,29.926,-707.614286,-0.044571
115,27.0,48.9,30.71,30.5,21.9,-0.21,-23.9,-0.38,55.9,30.8,...,-727.384615,30.662308,48.9,30.51,24.1,30.13,30.780769,30.293846,-758.165385,0.368462
117,39.0,41.0,30.41,30.37,2.0,-0.04,-14.0,0.11,50.0,30.46,...,-959.53,30.401,41.0,30.48,25.0,30.37,32.421429,30.416429,-991.951429,-0.015429


In [354]:
calcs.loc[117, :]

date                                                 2017-10-26
station_id                                                 KMSO
rise_tempi                                                  NaN
set_tempi                                                   NaN
rise_pressi                                                 NaN
set_pressi                                                  NaN
sunrise                               2017-10-26 08:11:06-06:00
sunset                                2017-10-26 18:28:35-06:00
rise_set_diff_tempi                                         NaN
rise_set_diff_pressi                                        NaN
set_rise_diff_tempi                                         NaN
set_rise_diff_pressi                                        NaN
rise_set_max_tempi                                          NaN
rise_set_max_pressi                                         NaN
rise_set_min_tempi                                          NaN
rise_set_min_pressi                     

## Output

In [356]:
filename = '../data/2017-sunrise_set-temp_pressure-190212_KMSO_003.csv'
calcs.to_csv(filename)