In [89]:
import requests
import json
import pandas as pd
from pandas.io.json import json_normalize
import os
import glob
from bs4 import BeautifulSoup
from urllib.request import urlopen

## Load Weedmaps dataframe

In [44]:
def load_weedmaps_df(lower_latitude, left_longitude, upper_latitude, right_longitude):
    """Returns a pandas dataframe of marijuana dispensaries within the geographical constraints
    specified in the input. Uses the Weedmaps list of marijuana retailers.
    """
    url = ("https://api-g.weedmaps.com/discovery/v1/listings?filter%"
            "5Bany_retailer_services%5D%5B%5D=doctor&filter%5Bany_retailer_services"
            "%5D%5B%5D=storefront&filter%5Bany_retailer_services%5D%5B%5D=delivery&filter%"
            "5Bbounding_box%5D={},{},{},{}"
            "&page_size=150&size=150").format(lower_latitude, left_longitude, upper_latitude, right_longitude)
    get_stores = requests.get(url, verify = False)
    stores_json = get_stores.json()
    stores_json_norm = json_normalize(stores_json.get('data', {}).get('listings', {}))
    
    weed_stores_df = pd.DataFrame(stores_json_norm)[["name", "id", "address", "city", "state", "zip_code", "latitude", 
                      "longitude", "license_type", "type", "menu_items_count", 
                      "retailer_services", "online_ordering.enabled_for_delivery", 
                      "online_ordering.enabled_for_pickup", "ranking", "rating", 
                      "reviews_count", "web_url"]]
    return weed_stores_df


In [59]:
weed_stores_df = load_weedmaps_df(25, - 120, 35, -100)
weed_stores_df.shape



(150, 18)

In [60]:
weed_stores_df.head()

Unnamed: 0,name,id,address,city,state,zip_code,latitude,longitude,license_type,type,menu_items_count,retailer_services,online_ordering.enabled_for_delivery,online_ordering.enabled_for_pickup,ranking,rating,reviews_count,web_url
0,Ponderosa Releaf,26300,9240 W Northern Ave Ste 103B,Glendale,AZ,85305,33.552681,-112.258388,medical,dispensary,227,[storefront],False,True,4.963562,4.967438,4146,https://weedmaps.com/dispensaries/ponderosa-re...
1,The W 25CAP of Wilmington,46434,1302 North Wilmington Blvd,Wilmington,CA,90744,33.788653,-118.274743,medical,dispensary,194,[storefront],False,False,4.937452,4.945589,6359,https://weedmaps.com/dispensaries/the-w-collec...
2,BONAFIDE COLLECTIVE - Pre ICO,21706,7570 San Fernando Rd.,Burbank,CA,91504,34.209405,-118.35357,recreational,dispensary,367,[storefront],False,False,4.934087,4.94401,3247,https://weedmaps.com/dispensaries/bonafide-gro...
3,Roze City,43964,365 South Rosemead Blvd,PASADENA,CA,91107,34.140204,-118.073685,recreational,dispensary,451,[storefront],False,False,4.920778,4.931495,3626,https://weedmaps.com/dispensaries/nhc-natural-...
4,The Mint Dispensary,50982,5210 S Priest Dr,Guadalupe,AZ,85283,33.377176,-111.963797,medical,dispensary,433,[storefront],False,False,4.913605,4.913838,2746,https://weedmaps.com/dispensaries/the-mint-dis...


## Single Store

In [73]:
## Obtain products and prices from each store. 
get_store = requests.get("https://weedmaps.com/dispensaries/the-crop")
store_parsed = BeautifulSoup(get_store.content, 'lxml')
#store_json = json.loads(store_parsed.text)
#store_json

In [82]:
store_parsed.text



In [86]:
store_parsed_clean = store_parsed.text.replace(';__NEXT_LOADED_PAGES__=[];__NEXT_REGISTER_PAGE=function(r,f){__NEXT_LOADED_PAGES__.push([r, f])}', '')

In [91]:
## json.loads(store_parsed_clean)

## Licences DF

In [118]:
licences_df["License Type"].unique()

array(['Cannabis - Distributor Temporary License',
       'Cannabis - Retailer Temporary License',
       'Cannabis - Retailer Nonstorefront Temporary License',
       'Cannabis - Microbusiness Temporary License',
       'Cannabis - Distributor-Transport Only Temporary License',
       'Cannabis - Event Organizer Temporary License', 'License Type',
       'Cannabis - Testing Laboratory Temporary License',
       'Cannabis - Distributor License', 'Cannabis - Retailer License',
       'Cannabis - Event Organizer License'], dtype=object)

In [100]:
glob.glob("..//data//*")
licences_df = pd.read_csv("..//data//searchResults.csv", encoding = "utf-8")
licences_df.head()

Unnamed: 0,License Number,License Type,Business Owner,Business Contact Information,Business Structure,Premise Address,Status,Issue Date,Expiration Date,Activities,Adult-Use/Medicinal
0,C11-18-0000939-TEMP,Cannabis - Distributor Temporary License,Angel Fernandez,MOVOCAN : Email- medigroupangel@gmail.com : Ph...,Corporation,"CALEXICO, CA 922319530 County: IMPERIAL",Active,07/29/2019,12/31/2018,N/A for this license type,BOTH
1,C10-18-0000287-TEMP,Cannabis - Retailer Temporary License,Angel Fernandez,MOVOCAN : Email- medigroupangel@gmail.com : Ph...,Corporation,"1632 GATEWAY RD CALEXICO, CA 922319530 County:...",Active,07/29/2019,12/31/2018,N/A for this license type,Medicinal
2,C9-18-0000229-TEMP,Cannabis - Retailer Nonstorefront Temporary Li...,Angel Fernandez,MOVOCAN : Email- medigroupangel@gmail.com : Ph...,Corporation,"CALEXICO, CA 922319530 County: IMPERIAL",Active,07/29/2019,12/31/2018,N/A for this license type,BOTH
3,C12-18-0000217-TEMP,Cannabis - Microbusiness Temporary License,Javier Armas,Javier Armas : Javier's Organics : Email- Jav...,Sole Proprietorship,"OAKLAND, CA 946012132 County: ALAMEDA",Active,07/29/2019,12/31/2018,"Level 1 Manufacturer, Retailer Non-Storefront,...",BOTH
4,C9-18-0000230-TEMP,Cannabis - Retailer Nonstorefront Temporary Li...,martha Medina,RGG LLC : Ruby's Green Garden : Email- rubysg...,Limited Liability Company,"OAKLAND, CA 946012132 County: ALAMEDA",Active,07/29/2019,12/31/2018,N/A for this license type,BOTH


In [115]:
licences_df['Business Contact Information'].head()

0    MOVOCAN : Email- medigroupangel@gmail.com : Ph...
1    MOVOCAN : Email- medigroupangel@gmail.com : Ph...
2    MOVOCAN : Email- medigroupangel@gmail.com : Ph...
3    Javier Armas  : Javier's Organics : Email- Jav...
4    RGG LLC  : Ruby's Green Garden : Email- rubysg...
Name: Business Contact Information, dtype: object

In [114]:
licences_df['contact_split'] = licences_df['Business Contact Information'].str.split(':').head()
# licences_df['Business name'] = licences_df['Contact split'].apply(lambda x: x[0])
# licences_df['Business name'].head()
licences_df['contact_split'].head()

0    [MOVOCAN ,  Email- medigroupangel@gmail.com , ...
1    [MOVOCAN ,  Email- medigroupangel@gmail.com , ...
2    [MOVOCAN ,  Email- medigroupangel@gmail.com , ...
3    [Javier Armas  ,  Javier's Organics ,  Email- ...
4    [RGG LLC  ,  Ruby's Green Garden ,  Email- rub...
Name: contact_split, dtype: object