In [36]:
import pickle
import pandas as pd
import numpy as np

In [37]:
file = open('models/rf.sav', 'rb')
svc = pickle.load(file)
print (svc)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)


In [38]:
def to_categorical(data, cat_vars):
    for var in cat_vars:
        cat_list='var'+'_'+var
        cat_list = pd.get_dummies(data[var], prefix=var)
        data1=data.join(cat_list)
        data=data1
    data = data.drop(cat_vars, axis=1)
    return data

def round(df):
    return df.apply(lambda x: np.rint(x))

def miles(meter):
    return round(meter/1609.344)

In [57]:
data = pd.read_csv('data.csv', index_col=0)
import sklearn.utils
data = sklearn.utils.shuffle(data)

labels = np.ravel(data[['type']],order='C')
data.drop(['userId', 'lat', 'lon', 'location', 'device', 'zip', 'zipcode', 'pressure', 'windDeg'],axis=1, inplace=True)
for index,rows in data.iterrows():
    if ("cloud" in data.loc[index, 'weatherCond'].lower() or "overcast" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Clouds'
        
    if ("snow" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Snow'
        
    if ("rain" in data.loc[index, 'weatherCond'].lower() or "drizzle" in data.loc[index, 'weatherCond'].lower() or "mist" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Rain'
        
    if ("none" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Clear'
    
data['visibility'] = miles(data['visibility'])
data['temp'] = round(data['temp'])
data['windSpeed'] = round(data['windSpeed'])


cat_vars=['weatherCond']
for var in cat_vars:
    cat_list='var'+'_'+var
    cat_list = pd.get_dummies(data[var], prefix=var)
    data1=data.join(cat_list)
    data=data1
data = data.drop(['weatherCond'], axis=1)


from datetime import datetime
from dateutil import parser

day_of_week = lambda x: parser.parse(x).weekday()
is_weekend = lambda x: parser.parse(x).weekday() > 5
month = lambda x: parser.parse(x).month


data['day_of_week'] =  data['date'].map(day_of_week)
data['is_weekend'] =  data['date'].map(is_weekend)
data = to_categorical(data, cat_vars=['is_weekend'])


data['month'] =  data['date'].map(month)


def season_of_date(date_str):
    month =  parser.parse(date_str).month
    return (
        "spring" if month >=2 and month <=5
        else
        "summer" if month >=6 and month <=8
        else
        "fall" if month >=9 and month <=11
        else
        "winter"
    )

# Assuming df has a date column of type `datetime`
data['season'] = data['date'].map(season_of_date)
data = to_categorical(data, cat_vars=['season'])


def get_part_of_day(hour):
    return (
        "morning" if 5 <= hour <= 11
        else
        "afternoon" if 12 <= hour <= 17
        else
        "evening" if 18 <= hour <= 22
        else
        "night"
    )

part_of_day = lambda x: get_part_of_day(parser.parse(x).hour)
data['part_of_day'] =  data['date'].map(part_of_day)
data = to_categorical(data, cat_vars=['part_of_day'])

# data = data.drop(['type'], axis=1)
data = data.drop(['date'], axis=1)


data.head()

Unnamed: 0,clouds,humidity,temp,type,visibility,windSpeed,weatherCond_Clear,weatherCond_Clouds,weatherCond_Fog,weatherCond_Haze,...,is_weekend_True,month,season_fall,season_spring,season_summer,season_winter,part_of_day_afternoon,part_of_day_evening,part_of_day_morning,part_of_day_night
F07AC5CD-8F11-4A88-9448-91C28B2748E9,20,42,50.0,1,10.0,6.0,0,1,0,0,...,0,2,0,1,0,0,0,0,0,1
615C478C-774C-4255-A39D-5629DFE43311,1,76,56.0,1,10.0,3.0,1,0,0,0,...,0,9,1,0,0,0,1,0,0,0
B7F5C3D4-39D3-4FA6-A8BC-48F78720FB46,1,94,72.0,0,10.0,3.0,1,0,0,0,...,0,4,0,1,0,0,0,0,1,0
0D4E685E-C368-4073-AD37-7AA4ED30049F,90,100,47.0,1,3.0,13.0,0,0,0,0,...,0,11,1,0,0,0,0,1,0,0
936B6F50-C045-4F03-BB12-9A75F5AA5BD1,90,57,75.0,0,10.0,5.0,0,1,0,0,...,0,6,0,0,1,0,0,0,0,1


In [58]:

numpy_matrix = data.as_matrix()
numpy_matrix[0]

  


array([20., 42., 50.,  1., 10.,  6.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
        0.,  1.,  1.,  0.,  2.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.])

In [59]:
def predict():
    sample = data.sample()
    label = sample['type']
    sample.drop(['type'],axis=1, inplace=True)
    numpy_matrix = sample.as_matrix()
    values =  sample.values.tolist()

    return sample, label, values, svc.predict(values)
    

In [60]:
sample, label, values, predict = predict()
print("label", label)
print("values", values)
print("predict", predict)



label 579C9191-6EFB-477C-BBC9-4C534A5E4091    0
Name: type, dtype: int64
values [[40.0, 61.0, 74.0, 10.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 0.0, 5.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]]
predict [0]


  """
