## Initial data setup

Import some libraries and modules and pull in and clean the initial datasets generated from extract and transform modules. 

In [6]:
import plotly.plotly as py
import plotly.graph_objs as go
import numpy as np
import pandas as pd
import random
# Import modules
import extract
import transform
import load

#Grab the top markets to analyze. pulled ad hoc
markets = pd.read_csv('top_75_markets.csv')

#Pull the main data frame
d_ci = extract.extract_all_lazy()['d_ci']

# Register transform functions here, create them in the transform.py
column_operations = {'ANSWER_num_rooms':transform.answer_num_rooms,
                     'ANSWER_ann_op_rev':transform.answer_ann_op_rev,
                     'ANSWER_ann_revenue':transform.answer_ann_revenue,
                     'ANSWER_num_employees_pos':transform.answer_num_employees_pos,
                     'create_Executive':transform.create_Executive,
                     'ANSWER_cur_base_pay':transform.answer_cur_base_pay
                     #'create_RevPAR':transform.create_RevPAR
                    }

#Create the Modeling dataset
clean_data  = transform.clean_model(d_ci, column_operations)
d_ci_v2 = clean_data['data']
metadata = clean_data['metadata']

create_Executive
ANSWER_ann_revenue
ANSWER_num_employees_pos
ANSWER_ann_op_rev
ANSWER_cur_base_pay
ANSWER_num_rooms


In [7]:
#Grab the top markets to analyze. pulled ad hoc
markets = pd.read_csv('top_75_markets.csv')
markets = markets['0']

## Analysis Setup

Modeling will be performed here resulting in assigned clusters and more

In [8]:
# Placefolder function to generate fake 'analysis' to mimic real datasets
def random_clusters(dataframe):
    dataframe['clusters'] = dataframe['ANSWER_ann_food_bev'].apply(lambda x: random.randrange(3))
    return dataframe

d_ci_v3 = random_clusters(d_ci_v2)

print_frame = False

# analysis data frame
adata = load.kwik_analytics(d_ci_v3, print_frame, markets)

In [9]:
adata.keys()
jobs_data = adata['jobs']
hotels_data = adata['hotels']

In [10]:
num_jobs = jobs_data.groupby(['POSITION']).count().sort_values(by='CITYMARKET', ascending=False).reset_index()
num_jobs_v1 = num_jobs[['POSITION','CITYMARKET']]
top_jobs = num_jobs_v1['POSITION'].head(10).tolist()

## Charting Setup

Initially setup constants needed to filter datasets. Before each chart create text box which states dependencies. Use this to create a dashboard or work with dash. 

In [11]:
#Establish inputs that would be taken in to explore in the datasets
CITYMARKET = 'San Diego'
POSITION = 'Front Desk Agent' #top_jobs

dimensions = {
            'PROPERTY_CODE',
              'PROPERTY_NAME',
              'INDUSTRY_NAME',
                   'ADDRESS1',
                   'ADDRESS2',
                       'CITY',
                      'STATE',
                        'ZIP',
                 'CITYMARKET',
               'BINGLATITUDE',
              'BINGLONGITUDE',
                 'SURVEYTYPE',
                   'JOB_CODE',
                   'POSITION' 
             }


In [None]:
# Line Chart of Percentiles

viz_data = jobs_data[jobs_data['CITYMARKET'] == CITYMARKET]
viz_data = viz_data[viz_data['POSITION'] == POSITION]

x_axis_labels = ['min','25','50','75','max']
y0 = viz_data[viz_data['clusters'] == 0][x_axis_labels].values[0]
y1 = viz_data[viz_data['clusters'] == 1][x_axis_labels].values[0]
y2 = viz_data[viz_data['clusters'] == 2][x_axis_labels].values[0]

# Create a trace
trace0 = go.Scatter(
    x = x_axis_labels,
    y = y0
)

trace1 = go.Scatter(
    x = x_axis_labels,
    y = y1
)

trace2 = go.Scatter(
    x = x_axis_labels,
    y = y2
)

data = [trace0, trace1, trace2]

py.iplot(data, filename='basic-line')

In [19]:
data = d_ci_v3[d_ci_v3['CITYMARKET'] == CITYMARKET]


Index([                   u'PROPERTY_CODE',
                          u'PROPERTY_NAME',
                    u'ANSWER_ann_food_bev',
                      u'ANSWER_ann_op_rev',
                     u'ANSWER_ann_revenue',
               u'ANSWER_bonus_rooms_quota',
                        u'ANSWER_bt_inc_e',
                       u'ANSWER_bt_inc_ne',
                   u'ANSWER_eff_wage_date',
                  u'ANSWER_flex_hire_wage',
       u'ANSWER_housekeeper_turnover_rate',
                     u'ANSWER_hr_set_wage',
                u'ANSWER_night_shift_prem',
                       u'ANSWER_num_rooms',
               u'ANSWER_paid_rollaway_bed',
                  u'ANSWER_perc_full_time',
                   u'ANSWER_perc_op_labor',
               u'ANSWER_rooms_quota_shift',
                   u'ANSWER_sq_ft_meeting',
                  u'ANSWER_turnover_rater',
                          u'INDUSTRY_NAME',
                               u'ADDRESS1',
                               u

In [36]:
scl = [ [0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
    [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"] ]

data['ANSWER_num_employees_pos'] = data['ANSWER_num_employees_pos'].astype(int)
data['marker_size'] = (data['ANSWER_num_employees_pos']-data['ANSWER_num_employees_pos'].min())/(data['ANSWER_num_employees_pos'].max()-data['ANSWER_num_employees_pos'].min())

chart_data = [ dict(
        type = 'scattergeo',
        locationmode = 'USA-California',
        lat = data['BINGLATITUDE'],
        lon = data['BINGLONGITUDE'],
        #text = df['text'],
        mode = 'markers',
        marker = dict(
            size = data['marker_size']*25,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            symbol = 'circle',
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            colorscale = scl,
            cmin = 0,
            color = data['ANSWER_num_employees_pos'],
            cmax = data['ANSWER_num_employees_pos'].max(),
            colorbar=dict(
                title="Test"
            )
        ))]

layout = dict(
        title = 'Testing',
        colorbar = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 0.5,
            subunitwidth = 0.5
        ),
    )

fig = dict( data=chart_data, layout=layout )
py.iplot( fig, validate=False, filename='d3-airports' )



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [32]:
data['marker_size']

776      0.007634
777      0.129771
778      0.000000
779      0.000000
780      0.000000
781      0.000000
782      0.000000
783      0.091603
784      0.015267
785      0.022901
786      0.053435
787      0.076336
788      0.007634
789      0.000000
790      0.335878
791      0.778626
792      0.061069
793      0.000000
794      0.000000
795      0.000000
796      0.015267
797      0.091603
798      0.183206
799      0.022901
800      0.007634
801      0.076336
802      0.068702
803      0.007634
804      0.000000
805      0.847328
           ...   
65946    0.061069
65947    0.000000
65948    0.000000
65949    0.000000
65950    0.000000
65951    0.030534
65952    0.000000
65953    0.000000
68444    0.000000
68445    0.000000
68446    0.007634
68447    0.030534
68448    0.000000
68449    0.022901
68450    0.000000
68451    0.099237
68452    0.022901
68453    0.038168
68454    0.030534
68455    0.038168
68456    0.000000
68457    0.007634
68458    0.000000
68459    0.007634
68460    0

In [None]:
# Market Geo Chart
# Lat and Longs of hotels bubble chart of size of employees in the position




# Market Clustering Chart
# 3 dimensional plot of num_employees/revpar/meeting_space




# Bar charts