In [1]:
%matplotlib inline

from sqlalchemy import create_engine
from sqlalchemy_utils import create_database, database_exists
import psycopg2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

filenames = ['all_rideshares', 'license']
files = [f'other_data/{n}.csv' for n in filenames]

Ridesharing
-------------

In [20]:
def extract_service(x):
    if 'uber' in x.lower():
        return 'UBER'
    elif 'lyft' in x.lower():
        return 'LYFT'
    elif 'via' in x.lower():
        return 'VIA'
    elif 'juno' in x.lower():
        return 'JUNO'
    elif 'gett' in x.lower():
        return 'GETT'
    else:
        return 'OTHER'

# cols = ['DBA', 'Years', 'Week Number', 'Pickup Start Date',
#         'Pickup End Date', 'Total Dispatched Trips']
rideshare = pd.read_csv(files[0], header=0)
rideshare['service'] = rideshare.DBA.map(extract_service)
rideshare.drop('DBA', axis=1, inplace=True)
# columns = ['year', 'week_number', 'pickup_start_date',
#            'pickup_end_date', 'total_trips', 'service']
# rideshare.columns = columns
# rideshare = rideshare[np.roll(columns, shift=1)]
rideshare.head()

Unnamed: 0,Base License Number,Wave Number,Base Name,Years,Week Number,Pickup Start Date,Pickup End Date,Total Dispatched Trips,Unique Dispatched Vehicle,service
0,B02395,3,ABATAR INC,2015,37,09/06/2015,09/12/2015,1,1,UBER
1,B02395,3,ABATAR INC,2015,38,09/13/2015,09/19/2015,5,1,UBER
2,B02395,3,ABATAR INC,2015,39,09/20/2015,09/26/2015,3,1,UBER
3,B02395,3,ABATAR INC,2015,44,10/25/2015,10/31/2015,44,4,UBER
4,B02395,3,ABATAR INC,2015,45,11/01/2015,11/07/2015,29,5,UBER


In [3]:
pd.DatetimeIndex(start='2015-01-01', end='2017-12-31', freq='w')

DatetimeIndex(['2015-01-04', '2015-01-11', '2015-01-18', '2015-01-25',
               '2015-02-01', '2015-02-08', '2015-02-15', '2015-02-22',
               '2015-03-01', '2015-03-08',
               ...
               '2017-10-29', '2017-11-05', '2017-11-12', '2017-11-19',
               '2017-11-26', '2017-12-03', '2017-12-10', '2017-12-17',
               '2017-12-24', '2017-12-31'],
              dtype='datetime64[ns]', length=157, freq='W-SUN')

In [4]:
time_frame = pd.DataFrame(index=pd.DatetimeIndex(start='2015-01-01', end='2017-12-31', freq='w'),
                          data={'week': 1})
time_frame['year'] = time_frame.index.year
time_frame['week_number'] = time_frame.index.week
time_frame.drop('week', axis=1, inplace=True)
time_frame.head()

Unnamed: 0,year,week_number
2015-01-04,2015,1
2015-01-11,2015,2
2015-01-18,2015,3
2015-01-25,2015,4
2015-02-01,2015,5


In [5]:
summed = rideshare.groupby(['service', 'year', 'week_number']).sum()
summed.reset_index(inplace=True)
summed.head()
full = time_frame.join(summed.set_index(['year', 'week_number']),
                       on=['year', 'week_number'], how='outer')
full.reset_index(inplace=True)
full.head()

Unnamed: 0,index,year,week_number,service,total_trips
0,2015-01-04,2015,1,UBER,132820.0
1,2015-01-11,2015,2,UBER,356286.0
2,2015-01-18,2015,3,UBER,386848.0
3,2015-01-25,2015,4,UBER,385412.0
4,2015-02-01,2015,5,UBER,369692.0


In [6]:
engine = create_engine(f'postgres://mikemoran@localhost/transit')
if not database_exists(engine.url):
    create_database(engine.url)

In [7]:
full.to_sql('ridesharing', engine, if_exists='replace')

Business Licenses
---------------------

In [10]:
cols = ['License Type', 'Application or Renewal', 'Business Name',
        'Status', 'Start Date', 'License Category',
        'Street', 'Street 2', 'Zip']
business = pd.read_csv(files[1], header=0, usecols=cols)
# business = business[business.status == 'Issued']
business.head()

Unnamed: 0,License Type,Application or Renewal,Business Name,Status,Start Date,License Category,Street,Street 2,Zip
0,Business,Renewal,PEYKO TZENOV,Issued,01/09/2017,Home Improvement Contractor,RESERVOIR OVAL E,,10467
1,Business,Renewal,T-MOBILE NORTHEAST LLC,Issued,12/28/2016,Electronics Store,FULTON ST,,11201
2,Business,Renewal,LUCAS ELECTRONICS INC,Issued,12/31/2016,Electronics Store,MANHATTAN AVE,,11222
3,Business,Renewal,DEKALB NEWSSTAND CORP,Issued,01/02/2017,Cigarette Retail Dealer,KNICKERBOCKER AVE,,11237
4,Individual,Renewal,JUSTINIANO SALDIVAR,Issued,01/09/2017,Home Improvement Salesperson,82ND RD,,11004


In [12]:
business.Status.unique()

array(['Issued', 'Denied', 'Pending', 'Withdrawn'], dtype=object)

In [19]:
issued = business[(business.Status == 'Issued') &
#                   (business.Zip == 10035)]
#                    (business.Zip == 10029)]
#                    (business.Zip == 10128)]
#                    (business.Zip == 10075)]
                   (business.Zip == 10021)]
issued.head()

Unnamed: 0,License Type,Application or Renewal,Business Name,Status,Start Date,License Category,Street,Street 2,Zip
