In [78]:
import requests
import pandas as pd
import json
import datetime
import numpy as np

# for position API
import http.client, urllib.parse

In [79]:
country_input = (input("Enter a Country using a 2-letter (US) or 3-letter (USA) country code: "))
query_input = (input("Enter a location (City, Zip Code, etc.): "))

print(
    country_input,
    query_input
)

Enter a Country using a 2-letter (US) or 3-letter (USA) country code: 
Enter a location (City, Zip Code, etc.): 
 


In [80]:
# this function gets latitude and longitude for a given city and country
# from positionstack.com
# variables could use renaming: country_input means country, query_input means city

def get_latt_long(country_input, query_input):
    conn = http.client.HTTPConnection('api.positionstack.com')
    params = urllib.parse.urlencode({
        'access_key': '1fbf7de4ce05274e9b9005659970c429',
        # Variables get live user input above.
        'country': country_input,
        #'region': region_input,
        'query': query_input,
        # "limit" of 1 ensures only 1 Lat/Lon will be passed below. 
        'limit': 1
    })
    conn.request('GET', '/v1/forward?{}'.format(params))
    res = conn.getresponse()
    geo = res.read()
    geo_str = (geo.decode('utf-8'))
    # Convert string to json (which is a dictionary).
    geo_json = json.loads(geo_str)
    # extract latitude
    geo_json_lat = geo_json['data']
    var_lat = []
    for x in geo_json_lat:
        var_lat.append((x['latitude']))
    # extract longitude
    geo_json_lon = geo_json['data']
    var_lon = []
    for x in geo_json_lon:
        var_lon.append((x['longitude']))
    return (var_lat[0], var_lon[0])


In [81]:
# testing: this should return (35.695126, 139.75391)
# get_latt_long('jp', 'tokyo')

In [82]:
# this function builds the url for the API call to retrieve HOURLY weather data 
# from https://archive-api.open-meteo.com/v1/era5
# for given lattitue/longitude coordinates, for the given years;

def get_hourly_weather_url(latitude, longitude, start_year, end_year):
    url_snip_hr = {}
    # Break URL into snippets to be assembled below, hourly:
    url_snip_hr[1] = "https://archive-api.open-meteo.com/v1/era5?latitude=" 
    url_snip_hr[2]= str(latitude)
    url_snip_hr[3] = "&longitude="
    url_snip_hr[4] = str(longitude)
    url_snip_hr[5] = "&start_date="
    url_snip_hr[6] = str(start_year)
    url_snip_hr[7] = "-01-01&end_date="  # month and day of start_year
    url_snip_hr[8] = str(end_year)
    url_snip_hr[9] = "-12-31&hourly="   # elements, hourly
    url_snip_hr[10] = "temperature_2m,"   # data element
    url_snip_hr[11] = "relativehumidity_2m,"   # data element
    url_snip_hr[12] = "rain,"   # data element
    url_snip_hr[13] = "snowfall,"   # data element
    url_snip_hr[14] = "cloudcover,"   # data element
    url_snip_hr[15] = "windspeed_10m,"   # data element
    url_snip_hr[16] = "winddirection_10m"   # data element
    url_snip_hr[17] = "&timezone=America%2FLos_Angeles"   # time zone
    url_snip_hr[18] = "&temperature_unit=fahrenheit"   # temp unit
    url_snip_hr[19] = "&windspeed_unit=mph"   # windspeed unit
    url_snip_hr[20] = "&precipitation_unit=inch"   # precip unit
    weather_url_hr = ""
    for i in range(20):
        weather_url_hr += url_snip_hr[i+1]
    return weather_url_hr   

In [83]:
# this function builds the url for the API call to retrieve DAILY weather data 
# from https://archive-api.open-meteo.com/v1/era5
# for given lattitue/longitude coordinates, for the given years;

def get_daily_weather_url(latitude, longitude, start_year, end_year):
    url_snip_dy = {}
    # Break URL into snippets to be assembled below, hourly:
    url_snip_dy[1] = "https://archive-api.open-meteo.com/v1/era5?latitude=" 
    url_snip_dy[2] = str(latitude)   # latitude, input from previous json
    url_snip_dy[3] = "&longitude="
    url_snip_dy[4] = str(longitude)   # longitude, input from previous json
    url_snip_dy[5] = "&start_date="
    url_snip_dy[6] = str(start_year)
    url_snip_dy[7] = "-01-01&end_date="  # month and day of start_year
    url_snip_dy[8] = str(end_year)
    url_snip_dy[9] = "-12-31&daily="   # elements, daily
    url_snip_dy[10] = "temperature_2m_max,"   # data element
    url_snip_dy[11] = "temperature_2m_min,"   # data element
    url_snip_dy[12] = "rain_sum,"   # data element
    url_snip_dy[13] = "snowfall_sum,"   # data element
    url_snip_dy[14] = "precipitation_hours"   # data element
    url_snip_dy[15] = "&timezone=America%2FLos_Angeles"   # time zone
    url_snip_dy[16] = "&temperature_unit=fahrenheit"   # temp unit
    url_snip_dy[17] = "&windspeed_unit=mph"   # windspeed unit
    url_snip_dy[18] = "&precipitation_unit=inch"   # precip unit
    weather_url_dy = ""
    for i in range(18):
        weather_url_dy += url_snip_dy[i+1]
    return weather_url_dy

In [84]:
# daily_or_hourly should be a string
#  ### needs better error-handling ###

def get_weather_url(latitude, longitude, start_year, end_year, daily_or_hourly):
    if daily_or_hourly == 'daily':
        return get_daily_weather_url(latitude, longitude, start_year, end_year)
    elif daily_or_hourly== 'hourly':
        return get_hourly_weather_url(latitude, longitude, start_year, end_year)
    else:
        return 'error'

In [90]:
# # testing: 'print' makes them clickable!! so go look at the data :)

# print( get_weather_url(35.695126, 139.75391, 2010, 2019,'daily') )
# print( get_weather_url(35.695126, 139.75391, 2010, 2019,'hourly') )


In [86]:
# this function retrieves weather data for given lattitue/longitude coordinates
# from https://archive-api.open-meteo.com/v1/era5
# for the given years; 
# e.g. if start_year=2010 and end_year=2020, 11 years of data are retrieved,
# starting 2010-01-01 and ending 2020-12-31, inclusive
#   ### valid years ###
# again, daily_or_hourly must be one of the two strings

def get_weather(latitude, longitude, start_year, end_year, daily_or_hourly):
    url = get_weather_url(latitude, longitude, start_year, end_year, daily_or_hourly)
    # Data comes in as one long string:
    weather_hr_str = requests.get(url).text
    # So convert string to dictionary.
    weather_hr_json = json.loads(weather_hr_str)
    # now turn dictionary into dataframe
    weather_raw = pd.DataFrame.from_records(weather_hr_json[daily_or_hourly])
    # convert the provided ISO string 'time' into a 'pure_date' in python datetime format
    # for aggregating and joining with daily data
    weather_raw["pure_date"] = weather_raw['time'].map(lambda x: 
                                                        datetime.datetime.fromisoformat(x[0:10]))
    # forget the string with the hour
    weather_raw.drop('time', axis=1, inplace=True)
    return weather_raw

In [87]:
# testing this should complete within a second or five, without errors
#  ### testing best practices: how do i display expected output?? 
#  ### which is a better test: test_df.info() or test_df.describe()?

daily_test_df = get_weather(35.695126, 139.75391, 2010, 2019, 'daily')
daily_test_df.info()

hourly_test_df = get_weather(35.695126, 139.75391, 2010, 2019, 'hourly')
hourly_test_df.info()

# note that wind_direction in hourly is null when windspeed is 0
# we do nothing with wind direction, so don't bother fixing it

# gaierror means check your internet connection

# these dataframes are used in later testing!

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3652 entries, 0 to 3651
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   precipitation_hours  3652 non-null   float64       
 1   rain_sum             3652 non-null   float64       
 2   snowfall_sum         3652 non-null   float64       
 3   temperature_2m_max   3652 non-null   float64       
 4   temperature_2m_min   3652 non-null   float64       
 5   pure_date            3652 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(5)
memory usage: 171.3 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87648 entries, 0 to 87647
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   cloudcover           87648 non-null  int64         
 1   rain                 87648 non-null  float64       
 2   relativehumidity_2m  87648 non-null 

In [35]:
# this auxillary function will be used to aggreagte hourly data into daily
# for example, getting the temperature of 6th hottest hour answers questions like:
# Were there at least 6 hours above 80F? and Were there at least 18 hours below 32F?

def enth(x, n):
    return x.sort_values().iloc[n]

In [36]:
# this function takes in the hourly and daily weather dataframes made by get_weather
#  ### behaviour if different time periods ###
# the returned dataframe is indexed by pure_date, so same number of rows as daily input
# the returned dataframe has all the columns of the daily input,
# plus a bunch of aggregates of data from hourly
# yes, max_wind = wind_high and temp_high is also a rename; someday, we fix this inefficiency

def agg_hourly_and_daily(hourly_df, daily_df):
    ## BEWARE arguments are passed by reference, don't mess with them ! ##
    output_df = hourly_df.groupby('pure_date').agg(
    # historical statistics (adjectives): humid_avg, wind_high, cloud_avg, temp_6
        humid_avg=('relativehumidity_2m', np.mean),
        wind_high = ('windspeed_10m', np.max),
        cloud_avg=('cloudcover', np.mean),
        temp_6= ('temperature_2m', lambda x: enth(x,18)),
    # machine learning: avg_humidity, median_wind, max_wind, cloud_4, cloud_12, cloud_20
        avg_humidity=('relativehumidity_2m', np.mean),
        median_wind = ('windspeed_10m', np.median),
        max_wind = ('windspeed_10m', np.max),
        cloud_4 = ('cloudcover', lambda x: enth(x,3)),
        cloud_12 = ('cloudcover', lambda x: enth(x,11)),
        cloud_20 = ('cloudcover', lambda x: enth(x,19)) )
    output_df = output_df.join(daily_df.set_index('pure_date'))
    output_df['temp_high'] = output_df['temperature_2m_max']
    # i want 'pure_date' to go back to being a regular column
    output_df.reset_index(inplace=True)
    return output_df

In [37]:
# testing, using the hourly_test_df from previous test
#  ### again, how to display expected output? ###

clean_df = agg_hourly_and_daily(hourly_test_df, daily_test_df)
clean_df.info()
clean_df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3652 entries, 0 to 3651
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   pure_date            3652 non-null   datetime64[ns]
 1   humid_avg            3652 non-null   float64       
 2   wind_high            3652 non-null   float64       
 3   cloud_avg            3652 non-null   float64       
 4   temp_6               3652 non-null   float64       
 5   avg_humidity         3652 non-null   float64       
 6   median_wind          3652 non-null   float64       
 7   max_wind             3652 non-null   float64       
 8   cloud_4              3652 non-null   int64         
 9   cloud_12             3652 non-null   int64         
 10  cloud_20             3652 non-null   int64         
 11  precipitation_hours  3652 non-null   float64       
 12  rain_sum             3652 non-null   float64       
 13  snowfall_sum         3652 non-nul

Unnamed: 0,humid_avg,wind_high,cloud_avg,temp_6,avg_humidity,median_wind,max_wind,cloud_4,cloud_12,cloud_20,precipitation_hours,rain_sum,snowfall_sum,temperature_2m_max,temperature_2m_min,temp_high
count,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0,3652.0
mean,71.337692,13.292333,45.558621,64.94299,71.337692,7.172193,13.292333,24.085706,44.033954,64.3954,4.058598,0.153442,0.054493,68.38023,55.000821,68.38023
std,11.054197,5.407639,29.659068,14.746371,11.054197,3.363566,5.407639,29.077274,35.016543,34.064647,5.922985,0.398454,0.707603,14.402073,15.819658,14.402073
min,29.208333,4.0,0.0,33.7,29.208333,1.8,4.0,0.0,0.0,0.0,0.0,0.0,0.0,36.2,17.5,36.2
25%,64.541667,9.4,19.697917,51.2,64.541667,4.9,9.4,1.0,13.0,32.0,0.0,0.0,0.0,55.4,40.4,55.4
50%,73.25,12.1,42.166667,65.6,73.25,6.3,12.1,13.0,35.0,71.0,1.0,0.0,0.0,69.0,55.9,69.0
75%,79.625,16.0,69.625,76.9,79.625,8.4625,16.0,35.0,77.0,100.0,7.0,0.11,0.0,80.1,68.7,80.1
max,96.583333,52.3,100.0,97.4,96.583333,25.8,52.3,100.0,100.0,100.0,24.0,7.7,23.94,102.5,84.3,102.5


## Getting adjectives for historical summary

Now we 
 - filter by user-requested dates;
 - get boolean columns for adjectives, and 
 - compute rates/likelihoods for each adjective.

The next function takes in a date interval, generates the same intervals in previous years, and returns the starts and ends of those intervals, starting with the original.

In [38]:
# start_date, end_date should be datetime; 
# years >= 0 should integer; if years=0, ([start_date*], [end_date*]) is returned
# (*) with feb-29 removed


def backdate(start_date, end_date, years):
    # replace start_date feb-29 by feb-28
    if (start_date.month == 2 & start_date.day == 29):
        start_date = start_date - datetime.timedelta(days = 1)
    # replace end_date feb-29 by mar-01
    if (end_date.month == 2 & end_date.day == 29):
        end_date = end_date + datetime.timedelta(days = 1)
    starts = [start_date]
    ends = [end_date]
    for i in range(years):
        start_date = start_date.replace(year = start_date.year -1)
        end_date = end_date.replace(year = end_date.year -1)
        starts.append(start_date)
        ends.append(end_date)
    return (starts, ends)
    

In [39]:
# testing: expected output
# ([datetime.datetime(2023, 2, 25, 0, 0),
#   datetime.datetime(2022, 2, 25, 0, 0),
#   datetime.datetime(2021, 2, 25, 0, 0),
#   datetime.datetime(2020, 2, 25, 0, 0)],
#  [datetime.datetime(2023, 3, 5, 0, 0),
#   datetime.datetime(2022, 3, 5, 0, 0),
#   datetime.datetime(2021, 3, 5, 0, 0),
#   datetime.datetime(2020, 3, 5, 0, 0)])

# first two are used in further testing
star = datetime.datetime.fromisoformat('2023-02-25')
endd = datetime.datetime.fromisoformat('2023-03-05')

starts_ends = backdate(star, endd, 3)
starts_ends

([datetime.datetime(2023, 2, 25, 0, 0),
  datetime.datetime(2022, 2, 25, 0, 0),
  datetime.datetime(2021, 2, 25, 0, 0),
  datetime.datetime(2020, 2, 25, 0, 0)],
 [datetime.datetime(2023, 3, 5, 0, 0),
  datetime.datetime(2022, 3, 5, 0, 0),
  datetime.datetime(2021, 3, 5, 0, 0),
  datetime.datetime(2020, 3, 5, 0, 0)])

The next function ingests a dataframe with a column 'pure_date', along with a date interval, and the number of years to go back by; and returns the rows of the input dataframe where the pure_date falls within one of the intervals.

In [40]:
def date_period_filter(daf, start_date, end_date, years):
    starts_ends = backdate(start_date, end_date, years)
    # for each backdated interval, test whether pure_date falls in it, then
    # add up the booleans, get 0 if all false, more otherwise
    mask = sum( (daf['pure_date'] >= starts_ends[0][i]) &
               (daf['pure_date'] <= starts_ends[1][i]) for i in range(years) )
    # if interval is too big, backdated intervals may overlap, but we don't want 2 as a value
    # also, returns it to boolean from integer
    mask = (mask > 0)
    return daf[mask].copy()
    

In [41]:
# testing, used below 
little_df = date_period_filter(clean_df, star, endd, 6)
little_df

Unnamed: 0,pure_date,humid_avg,wind_high,cloud_avg,temp_6,avg_humidity,median_wind,max_wind,cloud_4,cloud_12,cloud_20,precipitation_hours,rain_sum,snowfall_sum,temperature_2m_max,temperature_2m_min,temp_high
2977,2018-02-25,69.125,10.7,85.416667,45.2,69.125,8.0,10.7,31,100,100,7.0,0.09,0.0,50.0,38.3,50.0
2978,2018-02-26,67.333333,10.6,50.75,47.3,67.333333,4.6,10.6,19,48,88,0.0,0.0,0.0,51.9,38.3,51.9
2979,2018-02-27,71.416667,9.3,29.458333,52.9,71.416667,5.1,9.3,6,27,52,0.0,0.0,0.0,56.2,34.1,56.2
2980,2018-02-28,69.375,26.0,71.5,59.6,69.375,9.55,26.0,8,100,100,10.0,1.39,0.0,67.0,46.9,67.0
2981,2018-03-01,50.958333,15.4,0.666667,55.4,50.958333,5.3,15.4,0,0,0,0.0,0.0,0.0,62.0,38.3,62.0
2982,2018-03-02,55.583333,10.6,8.125,53.9,55.583333,6.2,10.6,0,2,18,0.0,0.0,0.0,58.7,34.4,58.7
2983,2018-03-03,69.291667,20.5,22.791667,61.6,69.291667,3.05,20.5,0,14,64,0.0,0.0,0.0,66.3,42.0,66.3
2984,2018-03-04,82.416667,24.4,50.875,62.5,82.416667,10.25,24.4,0,39,100,3.0,0.04,0.0,63.8,49.9,63.8
2985,2018-03-05,71.875,24.6,91.916667,57.3,71.875,10.4,24.6,76,100,100,11.0,0.24,0.0,63.3,46.3,63.3
3342,2019-02-25,71.041667,7.7,81.166667,51.6,71.041667,5.85,7.7,41,100,100,5.0,0.02,0.0,58.9,47.5,58.9


## Step 2: getting adjectives for historical summary statistics.

In [42]:
# ideally, the definitions of adjectives should be stored separately and passed to this function,
# for easier future adjustment; but good enough for now

def add_bool_col_for_adj(daf):
    daf_with_bool = daf.copy()
    
    # temp
    # freezing = >19hours with <32F: Use "temp_6" (6th highest temp of day) as cutoff.
    # cold = high <60F
    # hot = >6hours with >80F
    # warm = leftovers
    daf_with_bool['freezing'] = (daf_with_bool['temp_6'] <= 32)
    daf_with_bool['cold'] = (daf_with_bool['temp_high'] <= 60) & (daf_with_bool['freezing'] == 0)
    daf_with_bool['hot'] = (daf_with_bool['temp_6'] >= 80)
    daf_with_bool['warm'] = (daf_with_bool['freezing'] == 0) & (daf_with_bool['cold'] == 0) & (daf_with_bool['hot'] == 0)

    # clouds
    # average hourly "percent cloud cover"
    # clear = <=33%
    # cloudy = >= 67%
    # partly_cloudy = leftovers
    daf_with_bool['clear'] = (daf_with_bool['cloud_avg'] <= 30)
    daf_with_bool['cloudy'] = (daf_with_bool['cloud_avg'] >= 60)
    daf_with_bool['partly_cloudy'] = (daf_with_bool['clear'] == 0) & (daf_with_bool['cloudy'] == 0)

    # rain
    # not_rainy = 0 hours, or  <2.5 mm total rain (Laurina changing to 0.5 inches)
    # very_rainy = >=6 hour and >=10mm (Laurina changing to 2 inches)
    # lightly_rainy = leftovers
    daf_with_bool['not_rainy'] = (daf_with_bool['rain_sum'] <= 0.5) | (daf_with_bool['precipitation_hours'] == 0)
    daf_with_bool['very_rainy'] = (daf_with_bool['rain_sum'] >= 2) | (daf_with_bool['precipitation_hours'] >= 6)
    daf_with_bool['lightly_rainy'] = (daf_with_bool['not_rainy'] == 0) & (daf_with_bool['very_rainy'] == 0)

    # snow
    # data used: total snow for the day
    # not_snowy = 0
    # very_snowy = >6"
    # lightly_snowy = >0
    # (unlike rain, we ignore hours)   
    daf_with_bool['not_snowy'] = (daf_with_bool['snowfall_sum'] == 0)
    daf_with_bool['very_snowy'] = (daf_with_bool['snowfall_sum'] >= 6)
    daf_with_bool['lightly_snowy'] = (daf_with_bool['not_snowy'] == 0) & (daf_with_bool['very_snowy'] == 0)

    # wind
    # windy = maximum	> ?? mph
    # maybe 8? mayb 12  
    daf_with_bool['windy'] = (daf_with_bool['wind_high'] >= 12)

    # humidity
    # average hourlies, then:
    # low_humidity = <30%
    # high_humidity = >60%
    # medium_humidity = leftovers
    daf_with_bool['low_humidity'] = (daf_with_bool['humid_avg'] <= 30)
    daf_with_bool['high_humidity'] = (daf_with_bool['humid_avg'] >= 60)
    daf_with_bool['medium_humidity'] = (daf_with_bool['low_humidity'] == 0) & (daf_with_bool['high_humidity'] == 0)

    return daf_with_bool
    

In [43]:
#testing

little_with_bool = add_bool_col_for_adj(little_df)
little_with_bool

# note that a day-long drizzle is both very_rainy and not_rainy

Unnamed: 0,pure_date,humid_avg,wind_high,cloud_avg,temp_6,avg_humidity,median_wind,max_wind,cloud_4,cloud_12,...,not_rainy,very_rainy,lightly_rainy,not_snowy,very_snowy,lightly_snowy,windy,low_humidity,high_humidity,medium_humidity
2977,2018-02-25,69.125,10.7,85.416667,45.2,69.125,8.0,10.7,31,100,...,True,True,False,True,False,False,False,False,True,False
2978,2018-02-26,67.333333,10.6,50.75,47.3,67.333333,4.6,10.6,19,48,...,True,False,False,True,False,False,False,False,True,False
2979,2018-02-27,71.416667,9.3,29.458333,52.9,71.416667,5.1,9.3,6,27,...,True,False,False,True,False,False,False,False,True,False
2980,2018-02-28,69.375,26.0,71.5,59.6,69.375,9.55,26.0,8,100,...,False,True,False,True,False,False,True,False,True,False
2981,2018-03-01,50.958333,15.4,0.666667,55.4,50.958333,5.3,15.4,0,0,...,True,False,False,True,False,False,True,False,False,True
2982,2018-03-02,55.583333,10.6,8.125,53.9,55.583333,6.2,10.6,0,2,...,True,False,False,True,False,False,False,False,False,True
2983,2018-03-03,69.291667,20.5,22.791667,61.6,69.291667,3.05,20.5,0,14,...,True,False,False,True,False,False,True,False,True,False
2984,2018-03-04,82.416667,24.4,50.875,62.5,82.416667,10.25,24.4,0,39,...,True,False,False,True,False,False,True,False,True,False
2985,2018-03-05,71.875,24.6,91.916667,57.3,71.875,10.4,24.6,76,100,...,True,True,False,True,False,False,True,False,True,False
3342,2019-02-25,71.041667,7.7,81.166667,51.6,71.041667,5.85,7.7,41,100,...,True,False,False,True,False,False,False,False,True,False


Aggregate boolean columns into statistics.

In [44]:
# location is likely to consist of the coutry, city, state info that goes 

def bool_to_stats(daf):
    # list of column names to be aggregated
    bool_cols = ['freezing', 'cold', 'warm', 'hot',
                'not_rainy', 'lightly_rainy', 'very_rainy',
                'not_snowy', 'lightly_snowy', 'very_snowy',
                'windy',
                'low_humidity', 'medium_humidity', 'high_humidity',
                'clear', 'partly_cloudy', 'cloudy']
    # denominators for rate/likelyhood
    row_count = len(daf)
    # initialize storage
    stats_dict = {}
    # aggregate
    for stat in bool_cols:
        stats_dict[stat] = daf[stat].sum() / row_count
    return stats_dict



In [45]:
# testing

bool_to_stats(little_with_bool)

{'freezing': 0.0,
 'cold': 0.7222222222222222,
 'warm': 0.2777777777777778,
 'hot': 0.0,
 'not_rainy': 0.8333333333333334,
 'lightly_rainy': 0.0,
 'very_rainy': 0.4444444444444444,
 'not_snowy': 1.0,
 'lightly_snowy': 0.0,
 'very_snowy': 0.0,
 'windy': 0.4444444444444444,
 'low_humidity': 0.0,
 'medium_humidity': 0.16666666666666666,
 'high_humidity': 0.8333333333333334,
 'clear': 0.2777777777777778,
 'partly_cloudy': 0.16666666666666666,
 'cloudy': 0.5555555555555556}

In [None]:
bob = [1,3]

In [None]:
sum([i==2 for i in bob])

In [None]:
b = pd.DataFrame({'id': [1, 2, 9, 8],
                  'is': [1, 3, 5, 8],
                  'val1': ['p', 'q', 'r', 's']})

In [None]:
b

In [None]:
b.set_index('id', inplace=True)

In [None]:
b['id'] = ( b['id'] > 0)

In [None]:
b

In [None]:
b['id'] = b['id'].map(lambda x: max(x, 1)

In [None]:
b.reset_index(inplace=True)

In [None]:
b

In [None]:
start_dat = [1,2,5]
end_dat = [3,5,8]

In [None]:
i=1
joe = (b['id'] >= start_dat[i]) & (b['id'] <= end_dat[i])

In [None]:
joe

In [None]:
joe+joe

In [None]:
sum([joe, joe])

In [None]:
cat = sum([(b['id'] >= start_dat[i]) & (b['id'] <= end_dat[i]) for i in range(2)])

In [None]:
cat

In [None]:
for i in range(0):
    print("cat")