For each 24 hour day (midnight tomidnight) we'd like to know the following:
* Temperature and barometric pressure values at sunrise and sunset
* Difference between temp and pressure at sunrise and sunset
* Maximum, minimum and averages for temp and pressure for both of the time frames of sunset to sunrise and sunrise to sunset
* Differences between the max, min and average for both of these time frames.


# Load libraries

In [2]:
# pandas for data structure
import pandas as pd
import datetime

# Load data

#### Weather Data

* data collected from [Wunderground](https://www.wunderground.com/weather/api/)
* hosted file: [Google Drive](https://drive.google.com/file/d/1eS0gGM14g7iFulUeqz3XwbKb5OtK9aSI/view)

In [3]:
# local file
filename_wunderground = '../data/wunderground-170701_171101-day_night.csv'

In [4]:
# load data into dataframes
wund = pd.read_csv(filename_wunderground, parse_dates=['utc_date'])

In [5]:
wund['utc_date'] = wund['utc_date'].dt.tz_localize('utc')

In [6]:
# localize datetime make local_date column
wund['local_date'] = pd.to_datetime(wund.loc[:, 'utc_date']).dt.tz_convert('US/Mountain')

In [7]:
wund = wund.set_index('local_date')

In [8]:
wund = wund[['station_id', 'pressurei', 'pressurem', 'tempi', 'tempm', 'utc_date']]

In [9]:
wund.columns = ['station_id', 'pressi', 'pressm', 'tempi', 'tempm', 'utc_date']

In [10]:
wund.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 727764 entries, 2017-07-01 00:12:00-06:00 to 2017-10-31 23:56:00-06:00
Data columns (total 6 columns):
station_id    727764 non-null object
pressi        727764 non-null float64
pressm        727764 non-null float64
tempi         727764 non-null float64
tempm         727764 non-null float64
utc_date      727764 non-null datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), float64(4), object(1)
memory usage: 38.9+ MB


In [11]:
wund.tail(2)

Unnamed: 0_level_0,station_id,pressi,pressm,tempi,tempm,utc_date
local_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-10-31 23:51:00-06:00,KMTVICTO9,29.71,1006.0,50.9,10.5,2017-11-01 05:51:00+00:00
2017-10-31 23:56:00-06:00,KMTVICTO9,29.71,1006.0,50.5,10.3,2017-11-01 05:56:00+00:00


#### Sunset Sunrise Data

In [12]:
# Load Sunset Sunrise data
sun_filename = '../data/sunrise_sunset-wunderground-utc.csv'
sun = pd.read_csv(sun_filename, parse_dates=['sunrise', 'sunset'])

In [13]:
# Select a subset of loaded DataFrame
sun = sun[['station_id', 'sunrise', 'sunset']]

In [14]:
# Rename columns
sun.columns = ['station_id', 'sunrise_utc', 'sunset_utc']

In [15]:
# Localize datetime to UTC
sun['sunrise_utc'] = sun['sunrise_utc'].dt.tz_localize('utc')
sun['sunset_utc'] = sun['sunset_utc'].dt.tz_localize('utc')

In [16]:
# Create US/Mountain datetimes
sun['sunrise_local'] = pd.to_datetime(sun.loc[:, 'sunrise_utc']).dt.tz_convert('US/Mountain')
sun['sunset_local'] = pd.to_datetime(sun.loc[:, 'sunset_utc']).dt.tz_convert('US/Mountain')

In [17]:
# Reorder columns
sun = sun[['station_id', 'sunrise_local', 'sunset_local', 'sunrise_utc', 'sunset_utc']]

In [18]:
# Drop rows which start 2017-06-30
sun = sun[sun.sunset_local.dt.month != 6]

In [19]:
# Drop rows which start with 2017-11-01 
sun[sun.sunset_local.dt.month == 11]

Unnamed: 0,station_id,sunrise_local,sunset_local,sunrise_utc,sunset_utc


In [20]:
# Reset index
sun = sun.reset_index(drop=True)

In [21]:
sun.head(2)

Unnamed: 0,station_id,sunrise_local,sunset_local,sunrise_utc,sunset_utc
0,KMTCORVA9,2017-07-01 05:48:49-06:00,2017-07-01 21:31:41-06:00,2017-07-01 11:48:49+00:00,2017-07-02 03:31:41+00:00
1,KMTCORVA9,2017-07-02 05:49:27-06:00,2017-07-02 21:31:25-06:00,2017-07-02 11:49:27+00:00,2017-07-03 03:31:25+00:00


# New DataFrame

In [22]:
# Column names
columns = ['date', 'station_id', 'rise_tempi','set_tempi','rise_pressi','set_pressi']

In [23]:
calcs = pd.DataFrame(columns=columns)

#### Populate with stations and date range

In [24]:
# Create date range by day
start = datetime.date(2017, 7, 1)
end = datetime.date(2017, 10, 31)
date_index = pd.date_range(start, periods=123, freq='D')

In [25]:
# Append days and unique station ids
for station in sun.station_id.unique():
    for date in date_index:
        calcs = calcs.append({'station_id': station, 'date': date.strftime('%Y-%m-%d')},
                            ignore_index=True)

In [26]:
calcs = calcs.set_index('date')

In [27]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4059 entries, 2017-07-01 to 2017-10-31
Data columns (total 5 columns):
station_id     4059 non-null object
rise_tempi     0 non-null object
set_tempi      0 non-null object
rise_pressi    0 non-null object
set_pressi     0 non-null object
dtypes: object(5)
memory usage: 190.3+ KB


# Calculations

### Column for values at Sunrise and Sunset
* 'values' refers to temperature and pressure data
* indexed by day
* date will go sunrise to sunset
* columns = ['rise_tempi','set_tempi','rise_pressurei','set_pressurei']
* index = ['2017-07-01', .... '2017-10-31']

* Get closest wund.local_date to sun.sunrise_local
* [query the closest datetime index](https://stackoverflow.com/questions/42264848/pandas-dataframe-how-to-query-the-closest-datetime-index)

In [743]:
def find_closest_weather_sample(station, date):
    wund_station = wund[wund.station_id == station].copy()
    wund_station = wund_station.groupby(wund_station.index).first()
    idx = wund_station.index.get_loc(date, method='nearest')
    return wund_station.iloc[idx]

### Sunrise/Sunset Temp/Pressure

In [468]:
wund.head(1)

Unnamed: 0_level_0,station_id,pressi,pressm,tempi,tempm,utc_date
local_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-07-01 00:12:00-06:00,KMTCORVA9,26.0,880.4,58.8,14.9,2017-07-01 06:12:00+00:00


In [462]:
calcs.head(1)

Unnamed: 0_level_0,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-07-01,KMTCORVA9,,,,


In [637]:
# Pull date out of index
calcs = calcs.reset_index()

In [None]:
row_counter = 0
# Find weather samples for sunrise
for row in sun.iterrows():
    print(row_counter)
    row_counter+=1
    station_id = row[1]['station_id']
    sunrise_local = row[1]['sunrise_local']
    sunset_local = row[1]['sunset_local']

    sunrise_Ymd = sunrise_local.strftime('%Y-%m-%d')
    sunset_Ymd = sunset_local.strftime('%Y-%m-%d')
    
    sunrise_sample = find_closest_weather_sample(station_id, sunrise_local)
    sunset_sample = find_closest_weather_sample(station_id, sunset_local)
    
    sample_rise_temp = sunrise_sample.tempi
    sample_rise_press = sunrise_sample.pressi
    
    sample_set_temp = sunset_sample.tempi
    sample_set_press = sunset_sample.pressi
    
    # Insert data into new dataframe 
    sunrise_mask = (calcs.date == sunrise_Ymd) & (calcs.station_id == station_id)
    calcs.rise_tempi.loc[sunrise_mask] = sample_rise_temp
    calcs.rise_pressi.loc[sunrise_mask] = sample_rise_press
    
    sunset_mask = (calcs.date == sunset_Ymd) & (calcs.station_id == station_id)
    calcs.set_tempi.loc[sunset_mask] = sample_set_temp
    calcs.set_pressi.loc[sunset_mask] = sample_set_press

    calcs['sunrise'].loc[(calcs.date == sunset_Ymd) & (calcs.station_id == station_id)] = sunrise_local
    calcs['sunset'].loc[(calcs.date == sunset_Ymd) & (calcs.station_id == station_id)] = sunset_local
'''
    print(sunrise_local.strftime('%Y-%m-%d'))
    print('**********************************************')
    print('*', station_id)
    print('Sunrise:', sunrise_local, '\n')

    print('* Nearest Sunrise Weather Sample Datetime:')
    print(sunrise_sample)
    print('rise_temp :', sample_rise_temp)
    print('rise_press:', sample_rise_press)
    
    print('\n* Nearest Sunset Weather Sample Datetime:')
    print(sunset_sample)
    print('set_temp  :', sample_set_temp)
    print('set_press :', sample_set_press)

    print('\n* Calcs DataFrame Selected Row')
    print(calcs.loc[(calcs.date == sunrise_local.strftime('%Y-%m-%d')) & 
                     (calcs.station_id == station_id)])
    
    print('**********************************************')
    print('\n')
'''

In [769]:
calcs[(calcs.date == '2017-07-01') & (calcs.station_id == 'KMTCORVA9')]

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi
0,2017-07-01,KMTCORVA9,51.1,71.6,26,25.94


In [783]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4059 entries, 0 to 4058
Data columns (total 8 columns):
date           4059 non-null object
station_id     4059 non-null object
rise_tempi     4059 non-null object
set_tempi      4059 non-null object
rise_pressi    4059 non-null object
set_pressi     4059 non-null object
sunrise        4059 non-null object
sunset         4059 non-null object
dtypes: object(8)
memory usage: 253.8+ KB


In [800]:
calcs.head(2)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset
0,2017-07-01,KMTCORVA9,51.1,71.6,26.0,25.94,2017-07-01 05:48:49-06:00,2017-07-01 21:31:41-06:00
1,2017-07-02,KMTCORVA9,57.2,90.1,25.99,25.93,2017-07-02 05:49:27-06:00,2017-07-02 21:31:25-06:00


In [776]:
filename = '../data/2017-sunrise_set-temp_pressure-190124_002.csv'
calcs.to_csv(filename, index=False)

### Difference sunrise - sunset, sunset - sunrise values

In [993]:
calcs.tail(2)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,set_rise_diff_tempi,set_rise_diff_pressi,rise_set_max_tempi,rise_set_max_pressi,rise_set_min_tempi,rise_set_min_pressi,rise_set_mean_tempi,rise_set_mean_pressi
4057,2017-10-30,KMTVICTO9,26.8,33.1,28.38,28.23,2017-10-30 08:16:52-06:00,2017-10-30 18:24:17-06:00,6.3,-0.15,-7.2,-0.11,50.0,28.33,37.9,28.15,43.75614,28.230789
4058,2017-10-31,KMTVICTO9,25.9,47.3,28.12,27.88,2017-10-31 08:18:19-06:00,2017-10-31 18:22:47-06:00,,,,,40.1,28.39,26.4,28.23,35.02973,28.316126


In [1020]:
# remember to set -1 to account for no 11/1 value
for row in calcs[-2:-1].iterrows():
    # sunrise to sunset
    station_id = row[1].station_id
    
    # set variables
    rise_set_diff_tempi = row[1].set_tempi - row[1].rise_tempi
    rise_set_diff_pressi = row[1].set_pressi - row[1].rise_pressi
    set_rise_diff_tempi = calcs.loc[row[0]+1].rise_tempi - row[1].set_tempi
    set_rise_diff_pressi = calcs.loc[row[0]+1].rise_pressi - row[1].set_pressi
    
    # store values in DataFrame
#   calcs.loc[row[0], 'rise_set_diff_tempi'] = rise_set_diff_tempi
#   calcs.loc[row[0], 'rise_set_diff_pressi'] = rise_set_diff_pressi
#   calcs.loc[row[0], 'set_rise_diff_tempi'] = set_rise_diff_tempi
#   calcs.loc[row[0], 'set_rise_diff_pressi'] = set_rise_diff_pressi
    

    # sunrise to sunset
    print(row[1], '\n')
    print('**** Sunrise to Sunset ****')
    print(station_id, row[1].date)
    print('temp diff :', rise_set_diff_tempi)
    print('press diff:', rise_set_diff_pressi)
    
    # sunset to sunrise
    print('\n')
    print(calcs.loc[row[0] + 1])
    print('date:', row[1].date)
    print('\n**** Sunset to Sunrise ****')
    print('temp diff :', set_rise_diff_tempi)
    print('press diff:', set_rise_diff_pressi)


date                                   2017-10-30
station_id                              KMTVICTO9
rise_tempi                                   26.8
set_tempi                                    33.1
rise_pressi                                 28.38
set_pressi                                  28.23
sunrise                 2017-10-30 08:16:52-06:00
sunset                  2017-10-30 18:24:17-06:00
rise_set_diff_tempi                           6.3
rise_set_diff_pressi                        -0.15
set_rise_diff_tempi                          -7.2
set_rise_diff_pressi                        -0.11
rise_set_max_tempi                           53.6
rise_set_max_pressi                         28.11
rise_set_min_tempi                           26.4
rise_set_min_pressi                         27.89
rise_set_mean_tempi                       43.3186
rise_set_mean_pressi                      28.0155
Name: 4057, dtype: object 

**** Sunrise to Sunset ****
KMTVICTO9 2017-10-30
temp diff : 6.300000000

In [943]:
filename = '../data/2017-sunrise_set-temp_pressure-190124_003.csv'
calcs.to_csv(filename, index=False)

### Max, min, ave : sunrise to sunset

In [1005]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4059 entries, 0 to 4058
Data columns (total 18 columns):
date                    4059 non-null object
station_id              4059 non-null object
rise_tempi              4059 non-null object
set_tempi               4059 non-null object
rise_pressi             4059 non-null object
set_pressi              4059 non-null object
sunrise                 4059 non-null object
sunset                  4059 non-null object
rise_set_diff_tempi     4058 non-null float64
rise_set_diff_pressi    4058 non-null float64
set_rise_diff_tempi     4058 non-null float64
set_rise_diff_pressi    4058 non-null float64
rise_set_max_tempi      3723 non-null float64
rise_set_max_pressi     3723 non-null float64
rise_set_min_tempi      3723 non-null float64
rise_set_min_pressi     3723 non-null float64
rise_set_mean_tempi     3723 non-null float64
rise_set_mean_pressi    3723 non-null float64
dtypes: float64(10), object(8)
memory usage: 570.9+ KB


In [None]:
for row in calcs[12:16].iterrows(): 
    # set variables
    station_id = row[1].station_id
    sunrise_dt = row[1].sunrise
    sunset_dt = row[1].sunset
    
    # select date range
    mask = (wund.index > sunrise_dt) & (wund.index <= sunset_dt) & (wund.station_id == station_id)

    ## max
    rise_set_max_tempi = wund[mask].tempi.max()
    rise_set_max_pressi = wund[mask].pressi.max() 
    
    ## min
    rise_set_min_tempi = wund[mask].tempi.min()
    rise_set_min_pressi = wund[mask].pressi.min()
    
    ## mean
    rise_set_mean_tempi = wund[mask].tempi.mean()
    rise_set_mean_pressi = wund[mask].pressi.mean()
    
    # store values in DataFrame
#    calcs.loc[row[0], 'rise_set_max_tempi'] = rise_set_max_tempi
#    calcs.loc[row[0], 'rise_set_max_pressi'] = rise_set_max_pressi
#    calcs.loc[row[0], 'rise_set_min_tempi'] = rise_set_min_tempi
#    calcs.loc[row[0], 'rise_set_min_pressi'] = rise_set_min_pressi
#    calcs.loc[row[0], 'rise_set_mean_tempi'] = rise_set_mean_tempi
#    calcs.loc[row[0], 'rise_set_mean_pressi'] = rise_set_mean_pressi

    print(row[0])
    print(row[1].station_id)
    print(row[1].date)
    print(row[1].sunrise, row[1].sunset)
    print('\n', row[1], '\n')
    print('Max temp: ', rise_set_max_tempi)
    print('Max press:', rise_set_max_pressi)
    print('Min temp: ', rise_set_min_tempi)
    print('Min press:', rise_set_min_pressi)
    print('Mean temp :', rise_set_mean_tempi)
    print('Mean press:', rise_set_mean_pressi)
    print('\n', wund[mask].describe(), '\n')


### Max, min, ave : sunset to sunrise

In [35]:
# 190201 run again partially to fix set_rise_mean_pressi error
filename = '../data/2017-sunrise_set-temp_pressure-190131_006.csv'
calcs = pd.read_csv(filename)

In [61]:
calcs.loc[0:2]

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,...,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi,rise_set_set_rise_max_diff_tempi,rise_set_set_rise_max_diff_pressi,rise_set_set_rise_min_diff_tempi,rise_set_set_rise_min_diff_pressi,rise_set_set_rise_mean_diff_tempi,rise_set_set_rise_mean_diff_pressi
0,2017-07-01,KMTCORVA9,51.1,71.6,26.0,25.94,2017-07-01 05:48:49-06:00,2017-07-01 21:31:41-06:00,20.5,-0.06,...,55.6,25.94,62.315625,25.95875,19.1,0.03,0.5,-0.01,13.31258,0.031506
1,2017-07-02,KMTCORVA9,57.2,90.1,25.99,25.93,2017-07-02 05:49:27-06:00,2017-07-02 21:31:25-06:00,32.9,-0.06,...,,,,,,,,,,
2,2017-07-03,KMTCORVA9,83.5,70.0,26.0,25.99,2017-07-03 05:50:07-06:00,2017-07-03 21:31:07-06:00,-13.5,-0.01,...,49.6,26.0,58.1125,26.045,23.7,-0.06,6.5,-0.07,17.515705,-0.054744


In [60]:
calcs.iloc[-3:, :]

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,...,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi,rise_set_set_rise_max_diff_tempi,rise_set_set_rise_max_diff_pressi,rise_set_set_rise_min_diff_tempi,rise_set_set_rise_min_diff_pressi,rise_set_set_rise_mean_diff_tempi,rise_set_set_rise_mean_diff_pressi
3933,2017-10-29,KMTVICTO9,38.1,39.6,28.15,28.33,2017-10-29 08:15:26-06:00,2017-10-29 18:25:49-06:00,1.5,0.18,...,26.8,28.33,33.200637,28.393949,12.0,-0.05,7.2,-0.16,11.703749,-0.104037
3934,2017-10-30,KMTVICTO9,26.8,33.1,28.38,28.23,2017-10-30 08:16:52-06:00,2017-10-30 18:24:17-06:00,6.3,-0.15,...,23.5,28.12,26.847403,28.195714,20.9,-0.15,2.9,-0.23,16.471181,-0.180228
3935,2017-10-31,KMTVICTO9,25.9,47.3,28.12,27.88,2017-10-31 08:18:19-06:00,2017-10-31 18:22:47-06:00,,,...,,,,,,,,,,


In [54]:
for row in calcs[:-1].iterrows(): 
    # set variables
    station_id = row[1].station_id
    sunrise_dt = calcs.loc[row[0]+1].sunrise
    sunset_dt = row[1].sunset
    
    # select date range
    mask = (wund.index > sunset_dt) & (wund.index <= sunrise_dt) & (wund.station_id == station_id)

    set_rise_mean_pressi = wund[mask].pressi.mean()
    
    calcs.loc[row[0], 'set_rise_mean_pressi'] = set_rise_mean_pressi
'''
    # max
    set_rise_max_tempi = wund[mask].tempi.max()
    set_rise_max_pressi = wund[mask].pressi.max()
    
    # min
    set_rise_min_tempi = wund[mask].tempi.min()
    set_rise_min_pressi = wund[mask].pressi.min()
    
    # mean
    set_rise_mean_tempi = wund[mask].tempi.mean()
'''
    
'''    
    # store values in DataFrame
    calcs.loc[row[0], 'set_rise_max_tempi'] = set_rise_max_tempi
    calcs.loc[row[0], 'set_rise_max_pressi'] = set_rise_max_pressi
    calcs.loc[row[0], 'set_rise_min_tempi'] = set_rise_min_tempi
    calcs.loc[row[0], 'set_rise_min_pressi'] = set_rise_min_pressi
    calcs.loc[row[0], 'set_rise_mean_tempi'] = set_rise_mean_tempi
'''    
   
'''
    print(row[0])
    print(row[1].station_id)
    print(row[1].date)
    print(sunset_dt, sunrise_dt)
    print('\n', row[1], '\n')
    print('Max temp: ', set_rise_max_tempi)
    print('Max press:', set_rise_max_pressi)
    print('Min temp: ', set_rise_min_tempi)
    print('Min press:', set_rise_min_pressi)
    print('Mean temp :', set_rise_mean_tempi)
    print('Mean press:', set_rise_mean_pressi)
    print('\n', wund[mask].describe(), '\n')
'''

"\n    print(row[0])\n    print(row[1].station_id)\n    print(row[1].date)\n    print(sunset_dt, sunrise_dt)\n    print('\n', row[1], '\n')\n    print('Max temp: ', set_rise_max_tempi)\n    print('Max press:', set_rise_max_pressi)\n    print('Min temp: ', set_rise_min_tempi)\n    print('Min press:', set_rise_min_pressi)\n    print('Mean temp :', set_rise_mean_tempi)\n    print('Mean press:', set_rise_mean_pressi)\n    print('\n', wund[mask].describe(), '\n')\n"

In [59]:
calcs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3936 entries, 0 to 3935
Data columns (total 30 columns):
date                                  3936 non-null object
station_id                            3936 non-null object
rise_tempi                            3936 non-null float64
set_tempi                             3936 non-null float64
rise_pressi                           3936 non-null float64
set_pressi                            3936 non-null float64
sunrise                               3936 non-null object
sunset                                3936 non-null object
rise_set_diff_tempi                   3935 non-null float64
rise_set_diff_pressi                  3935 non-null float64
set_rise_diff_tempi                   3935 non-null float64
set_rise_diff_pressi                  3935 non-null float64
rise_set_max_tempi                    3702 non-null float64
rise_set_max_pressi                   3702 non-null float64
rise_set_min_tempi                    3702 non-null float

### Difference max, min, ave sunset to sunrise : sunrise to sunset

In [58]:
# remember to set -1 to account for no 11/1 value
for row in calcs.iterrows():
    # sunrise to sunset
    station_id = row[1].station_id
    
    rise_set_set_rise_mean_diff_tempi = row[1].rise_set_mean_tempi - row[1].set_rise_mean_tempi
    rise_set_set_rise_mean_diff_pressi = row[1].rise_set_mean_pressi - row[1].set_rise_mean_pressi

    calcs.loc[row[0], 'rise_set_set_rise_mean_diff_tempi'] = rise_set_set_rise_mean_diff_tempi
    calcs.loc[row[0], 'rise_set_set_rise_mean_diff_pressi'] = rise_set_set_rise_mean_diff_pressi
    
'''
    # set variables
    rise_set_set_rise_max_diff_tempi = row[1].rise_set_max_tempi - row[1].set_rise_max_tempi
    rise_set_set_rise_max_diff_pressi = row[1].rise_set_max_pressi - row[1].set_rise_max_pressi
    rise_set_set_rise_min_diff_tempi = row[1].rise_set_min_tempi - row[1].set_rise_min_tempi
    rise_set_set_rise_min_diff_pressi = row[1].rise_set_min_pressi - row[1].set_rise_min_pressi
    rise_set_set_rise_mean_diff_tempi = row[1].rise_set_mean_tempi - row[1].set_rise_mean_tempi
    rise_set_set_rise_mean_diff_pressi = row[1].rise_set_mean_pressi - row[1].set_rise_mean_pressi

    # store values in DataFrame
    calcs.loc[row[0], 'rise_set_set_rise_max_diff_tempi'] = rise_set_set_rise_max_diff_tempi
    calcs.loc[row[0], 'rise_set_set_rise_max_diff_pressi'] = rise_set_set_rise_max_diff_pressi
    calcs.loc[row[0], 'rise_set_set_rise_min_diff_tempi'] = rise_set_set_rise_min_diff_tempi
    calcs.loc[row[0], 'rise_set_set_rise_min_diff_pressi'] = rise_set_set_rise_min_diff_pressi
    calcs.loc[row[0], 'rise_set_set_rise_mean_diff_tempi'] = rise_set_set_rise_mean_diff_tempi
'''    

"\n    # set variables\n    rise_set_set_rise_max_diff_tempi = row[1].rise_set_max_tempi - row[1].set_rise_max_tempi\n    rise_set_set_rise_max_diff_pressi = row[1].rise_set_max_pressi - row[1].set_rise_max_pressi\n    rise_set_set_rise_min_diff_tempi = row[1].rise_set_min_tempi - row[1].set_rise_min_tempi\n    rise_set_set_rise_min_diff_pressi = row[1].rise_set_min_pressi - row[1].set_rise_min_pressi\n    rise_set_set_rise_mean_diff_tempi = row[1].rise_set_mean_tempi - row[1].set_rise_mean_tempi\n    rise_set_set_rise_mean_diff_pressi = row[1].rise_set_mean_pressi - row[1].set_rise_mean_pressi\n\n    # store values in DataFrame\n    calcs.loc[row[0], 'rise_set_set_rise_max_diff_tempi'] = rise_set_set_rise_max_diff_tempi\n    calcs.loc[row[0], 'rise_set_set_rise_max_diff_pressi'] = rise_set_set_rise_max_diff_pressi\n    calcs.loc[row[0], 'rise_set_set_rise_min_diff_tempi'] = rise_set_set_rise_min_diff_tempi\n    calcs.loc[row[0], 'rise_set_set_rise_min_diff_pressi'] = rise_set_set_rise

In [1068]:
calcs.head(1)

Unnamed: 0,date,station_id,rise_tempi,set_tempi,rise_pressi,set_pressi,sunrise,sunset,rise_set_diff_tempi,rise_set_diff_pressi,...,set_rise_max_pressi,set_rise_min_tempi,set_rise_min_pressi,set_rise_mean_tempi,set_rise_mean_pressi,rise_set_set_rise_max_diff_tempi,rise_set_set_rise_max_diff_pressi,rise_set_set_rise_min_diff_tempi,rise_set_set_rise_min_diff_pressi,rise_set_set_rise_mean_diff_tempi
0,2017-07-01,KMTCORVA9,51.1,71.6,26,25.94,2017-07-01 05:48:49-06:00,2017-07-01 21:31:41-06:00,20.5,-0.06,...,25.99,55.6,25.94,62.315625,62.315625,19.1,0.03,0.5,-0.01,13.31258


In [1]:
calcs.info()

NameError: name 'calcs' is not defined

In [1066]:
filename = '../data/2017-sunrise_set-temp_pressure-190125_004.csv'
calcs.to_csv(filename, index=False)

# Notes

* set to rise calculated based on 9/1 set to 9/2 rise value assigned to 9/1
* there are some NaNs in here which I am working to understand
 * the NaNs exist in max,min,mean columns which work with an array of wunderground sample rows
  * my guess is there is some kind of issues in the array of rows used to generate max, min, mean