In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import LeaveOneOut

In [2]:
PATH = "/home/jovyan/persist/modsimpy/data/"

In [3]:
temperature_file_name = "temperature.csv"
pressure_file_name = "pressure.csv"
humidity_file_name = "humidity.csv"
weather_type = "weather_description.csv"

In [6]:
class TimeSequence(pd.Series):
    def __init__(self, *args, **kwargs):
        if args or kwargs:
            super().__init__(*args, **kwargs)
        else:
            super().__init__([], dtype=np.float64)
            
    def set(self, **kwargs):
        """
        Updating Series
        """
        for name, value in kwargs.items():
            self[name] = value
        

In [7]:
class WeatherTransition(pd.DataFrame):
        def __init__(self, *args, **kwargs):
            if args or kwargs:
                super().__init__(*args, **kwargs)
            else:
                super().__init__([], dtype=np.float64)

In [6]:
pd_temperature = pd.read_csv(PATH+pressure_file_name)

In [9]:
pd_temperature.head()

Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,Philadelphia,New York,Montreal,Boston,Beersheba,Tel Aviv District,Eilat,Haifa,Nahariyya,Jerusalem
0,2012-10-01 12:00:00,,,,,,,,,,...,,,,,,,1011.0,,,
1,2012-10-01 13:00:00,,1024.0,1009.0,1027.0,1013.0,1013.0,1018.0,1013.0,1024.0,...,1014.0,1012.0,1001.0,1014.0,984.0,1012.0,1010.0,1013.0,1013.0,990.0
2,2012-10-01 14:00:00,,1024.0,1009.0,1027.0,1013.0,1013.0,1018.0,1013.0,1024.0,...,1014.0,1012.0,986.0,1014.0,984.0,1012.0,1010.0,1013.0,1013.0,990.0
3,2012-10-01 15:00:00,,1024.0,1009.0,1028.0,1013.0,1013.0,1018.0,1013.0,1024.0,...,1014.0,1012.0,945.0,1014.0,984.0,1012.0,1010.0,1013.0,1013.0,990.0
4,2012-10-01 16:00:00,,1024.0,1009.0,1028.0,1013.0,1013.0,1018.0,1013.0,1024.0,...,1014.0,1012.0,904.0,1014.0,984.0,1012.0,1010.0,1013.0,1013.0,990.0


In [21]:
city_name = list(pd_temperature.columns)

city_name.remove('datetime')

In [23]:
pd_temperature["monthday"] = pd_temperature["datetime"].apply(lambda x: x[5:10])

In [24]:
city_models = TimeSequence()
for city in city_name:
    print("City Picked "+ city)
    pd_city_daily_temperature = pd_temperature.groupby("monthday")[city].apply(list)
    ts = TimeSequence()
    for month_day in pd_city_daily_temperature.index:
        #print("Month Day "+ month_day)
        kde = KernelDensity(bandwidth=1.0, kernel='gaussian')
        x = np.array(pd_city_daily_temperature[month_day])
        x = x[~np.isnan(x)].astype(int)
        kde.fit(x[:,None])
        ts[month_day] = kde
    city_models[city] = ts

City Picked Vancouver
City Picked Portland
City Picked San Francisco
City Picked Seattle
City Picked Los Angeles
City Picked San Diego
City Picked Las Vegas
City Picked Phoenix
City Picked Albuquerque
City Picked Denver
City Picked San Antonio
City Picked Dallas
City Picked Houston
City Picked Kansas City
City Picked Minneapolis
City Picked Saint Louis
City Picked Chicago
City Picked Nashville
City Picked Indianapolis
City Picked Atlanta
City Picked Detroit
City Picked Jacksonville
City Picked Charlotte
City Picked Miami
City Picked Pittsburgh
City Picked Toronto
City Picked Philadelphia
City Picked New York
City Picked Montreal
City Picked Boston
City Picked Beersheba
City Picked Tel Aviv District
City Picked Eilat
City Picked Haifa
City Picked Nahariyya
City Picked Jerusalem


In [25]:
city_models["Jerusalem"]["01-18"].sample()

array([[1008.77332604]])

In [325]:
#random.choice(city_models["Jerusalem"])

KernelDensity(algorithm='auto', atol=0, bandwidth=1.0, breadth_first=True,
       kernel='gaussian', leaf_size=40, metric='euclidean',
       metric_params=None, rtol=0)

In [26]:
city_models["Jerusalem"]["07-18"].sample()

array([[988.34622564]])

In [88]:
[ i for i in city_models["Jerusalem"].keys() ]
None

## Weather Type State Machine

In [92]:
weather_mapping = {
    
    'mist': 'Foggy',
    'broken clouds': 'Cloudy',
    'sky is clear': 'Clearsky',
    'light rain': 'Rainy',
    'few clouds': 'Cloudy',
    'fog': 'Foggy',
    'overcast clouds': 'Cloudy',
    'light intensity shower rain': 'Rainy',
    'moderate rain': 'Rainy',
    'light intensity drizzle': 'Rainy',
    'scattered clouds': 'Cloudy',
    'proximity shower rain': 'Rainy',
    'heavy intensity rain': 'Rainy',
    'heavy snow': 'Snow',
    'shower rain': 'Rainy',
    'snow': 'Snow',
    'heavy shower snow': 'Snowstorm',
    'light intensity drizzle rain': 'Rainy',
    'light snow': 'Snow',
    'very heavy rain': 'Rainy',
    'smoke': 'Smog',
    'thunderstorm with heavy rain': 'Thunderstorm',
    'light shower snow': 'Snow',
    'thunderstorm': 'Thunderstorm',
    'thunderstorm with light rain': 'Thunderstorm',
    'haze': 'Smog',
    'dust': 'Smog',
    'volcanic ash': 'Smog',
    'heavy intensity shower rain': 'Rainy',
    'thunderstorm with rain': 'Thunderstorm',
    'sleet': 'Snow',
    'light rain and snow': 'Snow',
    'drizzle': 'Rainy',
    'shower snow': 'Snowstorm',
    'light shower sleet': 'Snow',
    'proximity thunderstorm': 'Thunderstorm',
    'ragged thunderstorm': 'Thunderstorm',
    'freezing rain': 'Snow',
    'heavy intensity drizzle': 'Rainy',
    'proximity thunderstorm with rain': 'Thunderstorm',
    'proximity thunderstorm with drizzle': 'Thunderstorm',
    'thunderstorm with drizzle': 'Thunderstorm',
    'thunderstorm with light drizzle': 'Thunderstorm',
    'thunderstorm with heavy drizzle': 'Thunderstorm',
    'heavy thunderstorm': 'Thunderstorm',
    'squalls': 'Windy',
    'proximity sand/dust whirls': 'Sandstorm',
    'proximity moderate rain': 'Rainy',
    'sand': 'Sandstorm',
    'sand/dust whirls': 'Sandstorm',
    'tornado': 'Tornado',
    'shower drizzle': 'Rainy',
    'rain and snow': 'Snow',
    'ragged shower rain': 'Rain'
}

#pd_weather["Vancouver"] = pd_weather["Vancouver"].apply(lambda x: weather_mapping[x])

#citylist = list(pd_weather.columns)

#citylist.remove('datetime')

In [4]:
pd_weather = pd.read_csv(PATH+weather_type)
pd_weather = pd_weather.dropna()

In [5]:
pd_weather.head()

Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,Philadelphia,New York,Montreal,Boston,Beersheba,Tel Aviv District,Eilat,Haifa,Nahariyya,Jerusalem
1,2012-10-01 13:00:00,mist,scattered clouds,light rain,sky is clear,mist,sky is clear,sky is clear,sky is clear,sky is clear,...,broken clouds,few clouds,overcast clouds,sky is clear,sky is clear,sky is clear,haze,sky is clear,sky is clear,sky is clear
2,2012-10-01 14:00:00,broken clouds,scattered clouds,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,...,broken clouds,few clouds,sky is clear,few clouds,sky is clear,sky is clear,broken clouds,overcast clouds,sky is clear,overcast clouds
3,2012-10-01 15:00:00,broken clouds,scattered clouds,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,...,broken clouds,few clouds,sky is clear,few clouds,overcast clouds,sky is clear,broken clouds,overcast clouds,overcast clouds,overcast clouds
4,2012-10-01 16:00:00,broken clouds,scattered clouds,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,...,broken clouds,few clouds,sky is clear,few clouds,overcast clouds,sky is clear,broken clouds,overcast clouds,overcast clouds,overcast clouds
5,2012-10-01 17:00:00,broken clouds,scattered clouds,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,sky is clear,...,broken clouds,few clouds,sky is clear,few clouds,overcast clouds,sky is clear,broken clouds,overcast clouds,overcast clouds,overcast clouds


In [98]:
citylist = list(pd_weather.columns)

citylist.remove('datetime')

In [99]:
for cityname in citylist:
    pd_weather[cityname] = pd_weather[cityname].apply(lambda x: weather_mapping[x])

In [184]:
pd_weather.head()

Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,Philadelphia,New York,Montreal,Boston,Beersheba,Tel Aviv District,Eilat,Haifa,Nahariyya,Jerusalem
1,2012-10-01 13:00:00,Foggy,Cloudy,Rainy,Clearsky,Foggy,Clearsky,Clearsky,Clearsky,Clearsky,...,Cloudy,Cloudy,Cloudy,Clearsky,Clearsky,Clearsky,Smog,Clearsky,Clearsky,Clearsky
2,2012-10-01 14:00:00,Cloudy,Cloudy,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,...,Cloudy,Cloudy,Clearsky,Cloudy,Clearsky,Clearsky,Cloudy,Cloudy,Clearsky,Cloudy
3,2012-10-01 15:00:00,Cloudy,Cloudy,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,...,Cloudy,Cloudy,Clearsky,Cloudy,Cloudy,Clearsky,Cloudy,Cloudy,Cloudy,Cloudy
4,2012-10-01 16:00:00,Cloudy,Cloudy,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,...,Cloudy,Cloudy,Clearsky,Cloudy,Cloudy,Clearsky,Cloudy,Cloudy,Cloudy,Cloudy
5,2012-10-01 17:00:00,Cloudy,Cloudy,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,Clearsky,...,Cloudy,Cloudy,Clearsky,Cloudy,Cloudy,Clearsky,Cloudy,Cloudy,Cloudy,Cloudy


In [174]:
#hmap = {key:cnt for cnt,key in enumerate(set(weather_mapping.values()))}
#hmap

{'Tornado': 0,
 'Foggy': 1,
 'Cloudy': 2,
 'Snowstorm': 3,
 'Rain': 4,
 'Windy': 5,
 'Smog': 6,
 'Sandstorm': 7,
 'Clearsky': 8,
 'Thunderstorm': 9,
 'Snow': 10,
 'Rainy': 11}

In [197]:
pd_weather_vancouver = pd_weather[["datetime","Portland"]]

In [198]:
#pd_weather_vancouver.head()
pd_weather_vancouver["month_day"] = pd_weather_vancouver["datetime"].apply(lambda x: x[5:10])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [199]:
vv_prev = pd_weather_vancouver["Portland"].values

In [200]:
state_change = {}
for curr,ncurr in zip(vv_prev,vv_prev[1:]):
    state_change.setdefault(curr, {})
    state_change[curr].setdefault(ncurr, 0)
    state_change[curr][ncurr] += 1

In [201]:
hmap = {key:cnt for cnt,key in enumerate(state_change.keys())}

In [202]:
hmap

{'Cloudy': 0,
 'Clearsky': 1,
 'Foggy': 2,
 'Smog': 3,
 'Rainy': 4,
 'Snow': 5,
 'Thunderstorm': 6}

In [203]:
weather_type_info = {}
for state in state_change:
    weather_type_info.setdefault(state,[0]*len(hmap.keys()))
    for transit_state,transit_value in state_change[state].items():
        weather_type_info[state][hmap[transit_state]] = transit_value

In [204]:
weather_type_info

{'Cloudy': [12989, 1367, 673, 44, 1425, 21, 11],
 'Clearsky': [1368, 9904, 211, 23, 172, 5, 2],
 'Foggy': [757, 174, 2789, 53, 838, 23, 1],
 'Smog': [63, 24, 32, 530, 19, 0, 0],
 'Rainy': [1336, 209, 897, 18, 8306, 6, 12],
 'Snow': [12, 6, 31, 0, 6, 69, 0],
 'Thunderstorm': [4, 2, 2, 0, 18, 0, 7]}

In [205]:
weather_type_info.keys()

dict_keys(['Cloudy', 'Clearsky', 'Foggy', 'Smog', 'Rainy', 'Snow', 'Thunderstorm'])

In [206]:
df = WeatherTransition(weather_type_info, index=weather_type_info.keys(), columns = hmap.keys())

In [207]:
#df = pd.DataFrame(weather_type_info, index=weather_type_info.keys(), columns = hmap.keys())

df["marginal_previous"] = df.sum(axis=1)

df.head()

tser = df.sum(axis=0)
tser

df.loc["marginal_next"] = tser
#state_change

df

#df = df / 44459

Unnamed: 0,Cloudy,Clearsky,Foggy,Smog,Rainy,Snow,Thunderstorm,marginal_previous
Cloudy,12989,1368,757,63,1336,12,4,16529
Clearsky,1367,9904,174,24,209,6,2,11686
Foggy,673,211,2789,32,897,31,2,4635
Smog,44,23,53,530,18,0,0,668
Rainy,1425,172,838,19,8306,6,18,10784
Snow,21,5,23,0,6,69,0,124
Thunderstorm,11,2,1,0,12,0,7,33
marginal_next,16530,11685,4635,668,10784,124,33,44459


In [208]:
largest_value = df["marginal_previous"].max()
df = df / largest_value

In [209]:
for col_name in df.columns:
    if col_name != "marginal_previous":
        df[col_name] = df[col_name] / df["marginal_previous"]


In [210]:
df

Unnamed: 0,Cloudy,Clearsky,Foggy,Smog,Rainy,Snow,Thunderstorm,marginal_previous
Cloudy,0.785831,0.082764,0.045798,0.003811,0.080828,0.000726,0.000242,0.371781
Clearsky,0.116978,0.84751,0.01489,0.002054,0.017885,0.000513,0.000171,0.262849
Foggy,0.1452,0.045523,0.601726,0.006904,0.193528,0.006688,0.000431,0.104253
Smog,0.065868,0.034431,0.079341,0.793413,0.026946,0.0,0.0,0.015025
Rainy,0.13214,0.01595,0.077708,0.001762,0.770215,0.000556,0.001669,0.242561
Snow,0.169355,0.040323,0.185484,0.0,0.048387,0.556452,0.0,0.002789
Thunderstorm,0.333333,0.060606,0.030303,0.0,0.363636,0.0,0.212121,0.000742
marginal_next,0.371803,0.262826,0.104253,0.015025,0.242561,0.002789,0.000742,1.0


In [143]:
initial_weather_condition = 'Clearsky'
df.loc['Clearsky']

Foggy                0.022804
Cloudy               0.105740
Clearsky             0.822335
Rainy                0.041000
Snow                 0.004295
Snowstorm            0.000078
Smog                 0.003514
Thunderstorm         0.000234
marginal_previous    0.288018
Name: Clearsky, dtype: float64

In [144]:
random.choices(population=df.columns, weights=df.loc[initial_weather_condition].values, k=1)

['Clearsky']

In [129]:
df1 = pd_weather_vancouver.pivot_table(index='month_day', columns='Vancouver', aggfunc='size', fill_value=0)

df1["marginal_day"] = df1.sum(axis=1)
df1.loc["marginal_wtype"] = df1.sum(axis=0)
largest_value = df1["marginal_day"].max()


df1 = df1 / largest_value
for col_name in df1.columns:
    if col_name != "marginal_day":
        df1[col_name] = df1[col_name] / df1["marginal_day"]
df1.head()
#df["month"] = df.index.apply(lambda x : x[5:7])

Vancouver,Clearsky,Cloudy,Foggy,Rainy,Smog,Snow,Snowstorm,Thunderstorm,marginal_day
month_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
01-01,0.466667,0.216667,0.1,0.116667,0.0,0.1,0.0,0.0,0.002699
01-02,0.225,0.233333,0.241667,0.183333,0.0,0.116667,0.0,0.0,0.002699
01-03,0.283333,0.341667,0.158333,0.141667,0.0,0.075,0.0,0.0,0.002699
01-04,0.35,0.266667,0.05,0.25,0.0,0.083333,0.0,0.0,0.002699
01-05,0.25,0.083333,0.166667,0.308333,0.0,0.175,0.016667,0.0,0.002699


In [130]:
df1.loc["01-05"].values

array([0.25      , 0.08333333, 0.16666667, 0.30833333, 0.        ,
       0.175     , 0.01666667, 0.        , 0.00269906])

In [134]:
random.choices(population=df1.columns, weights=df1.loc["01-05"].values, k=1)

['Clearsky']

## City Attributes

In [77]:
city_attributes = "city_attributes.csv"
pd_city_attr = pd.read_csv(PATH+city_attributes)

In [78]:
pd_city_attr.head()

Unnamed: 0,City,Country,Latitude,Longitude
0,Vancouver,Canada,49.24966,-123.119339
1,Portland,United States,45.523449,-122.676208
2,San Francisco,United States,37.774929,-122.419418
3,Seattle,United States,47.606209,-122.332069
4,Los Angeles,United States,34.052231,-118.243683


In [34]:
pd_city_attr = pd_city_attr.set_index("City")

In [38]:
pd_city_attr.index

Index(['Vancouver', 'Portland', 'San Francisco', 'Seattle', 'Los Angeles',
       'San Diego', 'Las Vegas', 'Phoenix', 'Albuquerque', 'Denver',
       'San Antonio', 'Dallas', 'Houston', 'Kansas City', 'Minneapolis',
       'Saint Louis', 'Chicago', 'Nashville', 'Indianapolis', 'Atlanta',
       'Detroit', 'Jacksonville', 'Charlotte', 'Miami', 'Pittsburgh',
       'Toronto', 'Philadelphia', 'New York', 'Montreal', 'Boston',
       'Beersheba', 'Tel Aviv District', 'Eilat', 'Haifa', 'Nahariyya',
       'Jerusalem'],
      dtype='object', name='City')

In [41]:
pd_city_attr.loc['Vancouver']['Latitude']

49.24966

## Time Zone

In [42]:
import datetime
import pytz

utc_now = pytz.utc.localize(datetime.datetime.utcnow())
pst_now = utc_now.astimezone(pytz.timezone("America/Los_Angeles"))

In [43]:
utc_now

datetime.datetime(2019, 1, 20, 8, 12, 42, 290008, tzinfo=<UTC>)

In [214]:
pst_now

datetime.datetime(2019, 1, 20, 10, 12, 42, 290008, tzinfo=<DstTzInfo 'Asia/Jerusalem' IST+2:00:00 STD>)

In [217]:
pst_now.strftime("%m-%d")

'01-20'

In [145]:
pst_now.isoformat()

'2019-01-20T10:12:42.290008+02:00'

In [45]:
z = "Asia/Jerusalem"
pst_now = utc_now.astimezone(pytz.timezone(z))
pst_now

datetime.datetime(2019, 1, 20, 10, 12, 42, 290008, tzinfo=<DstTzInfo 'Asia/Jerusalem' IST+2:00:00 STD>)

In [61]:
from tzwhere import tzwhere
tz = tzwhere.tzwhere(forceTZ=True)

In [50]:
pd_city_attr.loc['Vancouver']

Country       Canada
Latitude     49.2497
Longitude   -123.119
Name: Vancouver, dtype: object

In [62]:
print(tz.tzNameAt(pd_city_attr.loc['Vancouver']['Latitude'], pd_city_attr.loc['Vancouver']['Longitude']))

America/Vancouver


In [79]:
def zones(latitude,longitude):
    return tz.tzNameAt(latitude,longitude)

pd_city_attr["Zone"] = pd_city_attr.apply(lambda x : tz.tzNameAt(x["Latitude"],x["Longitude"],forceTZ=True), axis =1)

In [80]:
pd_city_attr.head()

Unnamed: 0,City,Country,Latitude,Longitude,Zone
0,Vancouver,Canada,49.24966,-123.119339,America/Vancouver
1,Portland,United States,45.523449,-122.676208,America/Los_Angeles
2,San Francisco,United States,37.774929,-122.419418,America/Los_Angeles
3,Seattle,United States,47.606209,-122.332069,America/Los_Angeles
4,Los Angeles,United States,34.052231,-118.243683,America/Los_Angeles


In [81]:
pd_city_attr["Zone"].unique()

array(['America/Vancouver', 'America/Los_Angeles', 'America/Phoenix',
       'America/Denver', 'America/Chicago',
       'America/Indiana/Indianapolis', 'America/New_York',
       'America/Detroit', 'America/Toronto', 'America/Montreal',
       'Asia/Jerusalem'], dtype=object)

In [82]:
city_attributes_zone = "city_attributes_zone.csv"
pd_city_attr.to_csv(PATH+city_attributes_zone,encoding='utf-8', index=True)

In [83]:
pd_city_attr = pd.read_csv(PATH+city_attributes_zone)

In [84]:
pd_city_attr.head()

Unnamed: 0.1,Unnamed: 0,City,Country,Latitude,Longitude,Zone
0,0,Vancouver,Canada,49.24966,-123.119339,America/Vancouver
1,1,Portland,United States,45.523449,-122.676208,America/Los_Angeles
2,2,San Francisco,United States,37.774929,-122.419418,America/Los_Angeles
3,3,Seattle,United States,47.606209,-122.332069,America/Los_Angeles
4,4,Los Angeles,United States,34.052231,-118.243683,America/Los_Angeles


In [164]:
np.array(pd_city_attr["City"])

array(['Vancouver', 'Portland', 'San Francisco', 'Seattle', 'Los Angeles',
       'San Diego', 'Las Vegas', 'Phoenix', 'Albuquerque', 'Denver',
       'San Antonio', 'Dallas', 'Houston', 'Kansas City', 'Minneapolis',
       'Saint Louis', 'Chicago', 'Nashville', 'Indianapolis', 'Atlanta',
       'Detroit', 'Jacksonville', 'Charlotte', 'Miami', 'Pittsburgh',
       'Toronto', 'Philadelphia', 'New York', 'Montreal', 'Boston',
       'Beersheba', 'Tel Aviv District', 'Eilat', 'Haifa', 'Nahariyya',
       'Jerusalem'], dtype=object)

In [150]:
class B:
    def __init__(self):
        self.y = 1
        
    @property
    def y(self):
        return self.y

In [159]:
class A:
            
    def __init__(self, x):
        #b = B()
        self.x = x
    
    def func(self):
        self.x = 1
        return self.x + b.y
    
    @property
    def xt(self):
        return self.x

In [160]:
a = A(10)

In [161]:
a.xt

10