In [2]:
import pickle
import pandas as pd
import numpy as np

In [3]:
file = open('models/logreg.sav', 'rb')
svc = pickle.load(file)
print (svc)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)


In [4]:
def to_categorical(data, cat_vars):
    for var in cat_vars:
        cat_list='var'+'_'+var
        cat_list = pd.get_dummies(data[var], prefix=var)
        data1=data.join(cat_list)
        data=data1
    data = data.drop(cat_vars, axis=1)
    return data

def round(df):
    return df.apply(lambda x: np.rint(x))

def miles(meter):
    return round(meter/1609.344)

In [9]:
data = pd.read_csv('data.csv', index_col=0)
import sklearn.utils
data = sklearn.utils.shuffle(data)

labels = np.ravel(data[['type']],order='C')
data.drop(['userId', 'lat', 'lon', 'location', 'device', 'zip', 'zipcode', 'pressure', 'windDeg'],axis=1, inplace=True)
for index,rows in data.iterrows():
    if ("cloud" in data.loc[index, 'weatherCond'].lower() or "overcast" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Clouds'
        
    if ("snow" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Snow'
        
    if ("rain" in data.loc[index, 'weatherCond'].lower() or "drizzle" in data.loc[index, 'weatherCond'].lower() or "mist" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Rain'
        
    if ("none" in data.loc[index, 'weatherCond'].lower()):
        data.loc[index, 'weatherCond'] = 'Clear'
    
data['visibility'] = miles(data['visibility'])
data['temp'] = round(data['temp'])
data['windSpeed'] = round(data['windSpeed'])


cat_vars=['weatherCond']
for var in cat_vars:
    cat_list='var'+'_'+var
    cat_list = pd.get_dummies(data[var], prefix=var)
    data1=data.join(cat_list)
    data=data1
data = data.drop(['weatherCond'], axis=1)


from datetime import datetime
from dateutil import parser

day_of_week = lambda x: parser.parse(x).weekday()
is_weekend = lambda x: parser.parse(x).weekday() > 5
month = lambda x: parser.parse(x).month


data['day_of_week'] =  data['date'].map(day_of_week)
data['is_weekend'] =  data['date'].map(is_weekend)



def season_of_date(date_str):
    month =  parser.parse(date_str).month
    return (
        "spring" if month >=2 and month <=5
        else
        "summer" if month >=6 and month <=8
        else
        "fall" if month >=9 and month <=11
        else
        "winter"
    )

# Assuming df has a date column of type `datetime`
data['season'] = data['date'].map(season_of_date)
data = to_categorical(data, cat_vars=['season'])


def get_part_of_day(hour):
    return (
        "morning" if 5 <= hour <= 11
        else
        "afternoon" if 12 <= hour <= 17
        else
        "evening" if 18 <= hour <= 22
        else
        "night"
    )

part_of_day = lambda x: get_part_of_day(parser.parse(x).hour)
data['part_of_day'] =  data['date'].map(part_of_day)
data = to_categorical(data, cat_vars=['part_of_day'])

# data = data.drop(['type'], axis=1)
data = data.drop(['date'], axis=1)


data.head()

Unnamed: 0,clouds,humidity,temp,type,visibility,windSpeed,weatherCond_Clear,weatherCond_Clouds,weatherCond_Fog,weatherCond_Haze,...,day_of_week,is_weekend,season_fall,season_spring,season_summer,season_winter,part_of_day_afternoon,part_of_day_evening,part_of_day_morning,part_of_day_night
50ACE9B4-E6D5-45B2-95FF-3F3D2E21AC9F,90,87,58.0,0,1.0,3.0,0,0,0,0,...,0,False,0,1,0,0,1,0,0,0
FD54DC94-1015-4989-840A-5EF354405913,90,73,69.0,1,20.0,11.0,1,0,0,0,...,6,True,0,1,0,0,0,1,0,0
8E8F81AE-2DFE-4915-8BCA-152738FBEF51,1,25,26.0,0,10.0,2.0,1,0,0,0,...,2,False,0,0,0,1,0,1,0,0
6eb5f050-9f8a-11e7-b2a9-8d3ab17c1352,1,100,62.0,0,6.0,3.0,0,1,0,0,...,4,False,1,0,0,0,0,0,1,0
B21714A4-BD63-4BA2-9296-65C466681513,1,83,70.0,0,10.0,2.0,1,0,0,0,...,2,False,0,1,0,0,0,0,0,1


In [10]:

numpy_matrix = data.as_matrix()
numpy_matrix[0]

  


array([90, 87, 58.0, 0, 1.0, 3.0, 0, 0, 0, 0, 1, 0, 0, 0, 0, False, 0, 1,
       0, 0, 1, 0, 0, 0], dtype=object)

In [11]:
def predict():
    sample = data.sample()
    label = sample['type']
    sample.drop(['type'],axis=1, inplace=True)
    numpy_matrix = sample.as_matrix()
    values =  sample.values.tolist()

    return sample, label, values, svc.predict(values)
    

In [12]:
sample, label, values, predict = predict()
print("label", label)
print("values", values)
print("predict", predict)



label 113784CB-2A42-4557-B3C7-0544EF3819C7    1
Name: type, dtype: int64
values [[75, 87, 61.0, 10.0, 5.0, 0, 0, 0, 0, 1, 0, 0, 0, 3, False, 1, 0, 0, 0, 1, 0, 0, 0]]
predict [1]


  """
