In [128]:
import ipynb
import pandas as pd
import numpy as np
import requests
import api_key
import json
from tqdm import tqdm


from ipynb.fs.full.mmtc_scrape import *

In [5]:
#clean companies df: no cleaning needed
df_companies

Unnamed: 0,Name,Phone,Email,Authorization Status,License Number
0,Ayr Cannabis Dispensary,833-254-4877,Info@libertyhealthsciences.com,Dispensing Authorization,MMTC-2015-0002
1,Cannabist,800-714-9215,info@col-carefl.com,Dispensing Authorization,MMTC-2017-0011
2,"Cookies Florida, Inc.",,,Dispensing Authorization,MMTC-2019-0018
3,Curaleaf,877-303-0741,info.fl@curaleaf.com,Dispensing Authorization,MMTC-2015-0001
4,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003
5,Gold Leaf,,,Dispensing Authorization,MMTC-2019-0019
6,Green Dragon,720-600-9555,support@greendragon.com,Dispensing Authorization,MMTC-2019-0021
7,GrowHealthy,863-223-8882,info@GrowHealthy.com,Dispensing Authorization,MMTC-2016-0007
8,GTI (Rise Dispensaries),305-306-8772,FLinfo@gtigrows.com,Dispensing Authorization,MMTC-2017-0013
9,House of Platinum Cannabis,,,Dispensing Authorization,MMTC-2018-0014


In [6]:
df_locations.count

<bound method DataFrame.count of                      COMPANY                         ADDRESS EMAIL ADDRESS  \
0    Ayr Cannabis Dispensary     6325 N Orange Blossom Trail             -   
1    Ayr Cannabis Dispensary               7390 Aloma Avenue             -   
2    Ayr Cannabis Dispensary            4650 N Alafaya Trail             -   
3    Ayr Cannabis Dispensary             440 N State Road 19             -   
4    Ayr Cannabis Dispensary  6930 Cypress Gardens Boulevard             -   
..                       ...                             ...           ...   
584                 VidaCann        2007 W Kennedy Boulevard             -   
585                 VidaCann              5203 Cortez Road W             -   
586                 VidaCann           1101 S Powerline Road             -   
587                 VidaCann          1663 Georgia Street NE             -   
588                 VidaCann                1027 N Nova Road             -   

    PHONE             CITY ZIP

In [55]:
#clean locations df

#drop empty columns
df_locations = df_locations.drop(['EMAIL ADDRESS', 'PHONE'], axis=1, errors='ignore')

#modify company names to matche the df_companies dataframe
df_locations.replace('MüV', 'MuV', inplace=True)
df_locations.replace('Sunnyside*', 'Sunnyside', inplace=True)

#left join companies onto locations on company name
merged_df = df_locations.merge(right=df_companies, how='left', left_on='COMPANY', right_on='Name')

#provide a detailed request string for geoapify
merged_df['Full Address'] = merged_df['ADDRESS'] + ", " + merged_df['CITY'] + ', FL ' + merged_df['ZIP CODE']

#get only the FLUENT Dispensaries
fluent_dispensaries = merged_df.loc[merged_df['COMPANY'] == 'FLUENT']


fluent_dispensaries.head()
merged_df.count()

COMPANY                 589
ADDRESS                 589
CITY                    589
ZIP CODE                589
COUNTY                  589
Name                    577
Phone                   545
Email                   545
Authorization Status    577
License Number          577
Full Address            589
dtype: int64

In [56]:
# miami_df_test = merged_df.loc[merged_df['CITY'].str.contains("Miami")]
# miami_df_test.count()

In [57]:
#summary analysis: count dispensaries by MMTC
merged_df['COMPANY'].value_counts()


Trulieve                      126
MuV                            70
Ayr Cannabis Dispensary        62
Curaleaf                       60
Surterra Wellness              45
FLUENT                         33
Green Dragon                   33
Sunnyside                      33
VidaCann                       26
Sanctuary Cannabis             19
GrowHealthy                    18
Cannabist                      14
Sunburn                        11
GTI (Rise Dispensaries)         9
Insa                            9
Jungle Boys                     8
The Flowery                     5
House of Platinum Cannabis      4
Cookies                         3
Gold Leaf                       1
Name: COMPANY, dtype: int64

In [58]:
example_df = fluent_dispensaries.loc[fluent_dispensaries['ADDRESS'].str.contains('175 NW')]
example_df

Unnamed: 0,COMPANY,ADDRESS,CITY,ZIP CODE,COUNTY,Name,Phone,Email,Authorization Status,License Number,Full Address
164,FLUENT,175 NW 167th Street,North Miami Beach,33169,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"175 NW 167th Street, North Miami Beach, FL 33169"


In [59]:
#input raw location dataset
input_ = merged_df

# #provide a detailed request string for geoapify
# input_['Full Address'] = input_['ADDRESS'] + ", " + input_['CITY'] + ', FL ' + input_['ZIP CODE']
# # input_ = input_[['ADDRESS']]


#clean input dataset
input_['Full Address'] = input_['Full Address'].str.replace('11245 SW 211 Street, Miami, FL 33189','11245 SW 211th St, Miami, FL  33189, United States')


#define output dataframe
output = pd.DataFrame(columns=['ADDRESS','Latitude', 'Longitude'])

#define failed list
failed_df = pd.DataFrame(columns=['ADDRESS'])
input_.head()

#limit the requests to the city of Miami Only
miami_df = input_.loc[input_['CITY'].str.contains("Miami")]

In [60]:
#geolocate all addresses (get coordinates)

#for every location address,
for row in miami_df.iterrows():
        
    #generate geoapify request url
    # location_id = row[1]['Location ID']
    #location_name = row[1]['BT Location Name']
    request_string = row[1]['Full Address']
    query_url = f"https://api.geoapify.com/v1/geocode/search?text={request_string}&format=json&apiKey={api_key.api_key}"
    
    try:
        #get returned data as json. This code hits the API
        geo_data = requests.get(query_url).json()

        #select desired fields to return to dataframe
        lat = geo_data['results'][0]['lat']
        lon = geo_data['results'][0]['lon']
        # county = geo_data['results'][0]['county']
        # popularity = geo_data['results'][0]['rank']['popularity']
        # importance = geo_data['results'][0]['rank']['importance']

        #append the data to the results dataframe
        current_frame = pd.DataFrame({'ADDRESS':[request_string],'Latitude':[lat], 'Longitude':[lon]})
        frames = [output, current_frame]
        output = pd.concat(frames)
        
        print('Success')
    
    except:
        current_frame = pd.DataFrame({'ADDRESS':[request_string]})
        failed_df = pd.concat([failed_df, current_frame])
        
        print('Failed')
    
#print the head of the dataframe to ensure that data was successfully loaded
output.head()

#print the failed dataframes
failed_df

Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success


Unnamed: 0,ADDRESS


In [127]:
output.count()

merged_df_with_coordinates = miami_df.merge(right=output, right_on='ADDRESS', left_on='Full Address', how='left')
merged_df_with_coordinates.count()
merged_df_with_coordinates.to_csv('./locations_with_coordinates.csv')

#get the fluent dispensaries in miami
fluent_dispensaries_in_miami = merged_df_with_coordinates.loc[merged_df_with_coordinates['COMPANY'] == 'FLUENT']
fluent_dispensaries_in_miami.head()

# merged_df_with_coordinates.dtypes

Unnamed: 0,COMPANY,ADDRESS_x,CITY,ZIP CODE,COUNTY,Name,Phone,Email,Authorization Status,License Number,Full Address,ADDRESS_y,Latitude,Longitude
9,FLUENT,9611 N Kendall Drive,Miami,33176,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"9611 N Kendall Drive, Miami, FL 33176","9611 N Kendall Drive, Miami, FL 33176",25.687627,-80.348817
10,FLUENT,5827 SW 40th Street,Miami,33155,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"5827 SW 40th Street, Miami, FL 33155","5827 SW 40th Street, Miami, FL 33155",25.734175,-80.28949
11,FLUENT,11245 SW 211 Street,Miami,33189,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"11245 SW 211th St, Miami, FL 33189, United St...","11245 SW 211th St, Miami, FL 33189, United St...",25.571142,-80.36925
12,FLUENT,175 NW 167th Street,North Miami Beach,33169,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"175 NW 167th Street, North Miami Beach, FL 33169","175 NW 167th Street, North Miami Beach, FL 33169",25.928299,-80.203588


In [123]:

#run through each fluent dispensary, caclulate the travel time / distance to every other dispensary

def calculate_distance_to_another_dispensary(from_waypoint, to_waypoint):

    #Test case coordinates
    # from_waypoint = [25.928299,-80.203588]
    # to_waypoint = [25.571142,-80.369250]

    #design request query
    url = f"https://api.geoapify.com/v1/routing?waypoints={from_waypoint[0]},{from_waypoint[1]}|{to_waypoint[0]},{to_waypoint[1]}&mode=drive&details=instruction_details&apiKey={api_key.api_key}"

    try:
        result_ = requests.get(url).json()
        distance_in_meters = result_['features'][0]['properties']['distance']
        print('Success')
        
        #convert distance from meters to miles
        distance_in_miles = distance_in_meters * 0.0006213712

        return(distance_in_miles)

    except:

        print(f'Faliure: {url}')



# calculate_distance_to_another_dispensary()




In [106]:
fluent_dispensaries_in_miami

Unnamed: 0,COMPANY,ADDRESS_x,CITY,ZIP CODE,COUNTY,Name,Phone,Email,Authorization Status,License Number,Full Address,ADDRESS_y,Latitude,Longitude
9,FLUENT,9611 N Kendall Drive,Miami,33176,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"9611 N Kendall Drive, Miami, FL 33176","9611 N Kendall Drive, Miami, FL 33176",25.687627,-80.348817
10,FLUENT,5827 SW 40th Street,Miami,33155,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"5827 SW 40th Street, Miami, FL 33155","5827 SW 40th Street, Miami, FL 33155",25.734175,-80.28949
11,FLUENT,11245 SW 211 Street,Miami,33189,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"11245 SW 211th St, Miami, FL 33189, United St...","11245 SW 211th St, Miami, FL 33189, United St...",25.571142,-80.36925
12,FLUENT,175 NW 167th Street,North Miami Beach,33169,Miami-Dade,FLUENT,833-735-8368,info@getfluent.com,Dispensing Authorization,MMTC-2015-0003,"175 NW 167th Street, North Miami Beach, FL 33169","175 NW 167th Street, North Miami Beach, FL 33169",25.928299,-80.203588


In [124]:

#radius in miles
radius = 5
results_dict = {'Full Address':[], 'competitors':[]}

for fluent_dispensary in fluent_dispensaries_in_miami.iterrows():

    number_of_competitors = 0
    
    for competitor in merged_df_with_coordinates.iterrows():

        fluent_coordinates = [fluent_dispensary[1]['Latitude'], fluent_dispensary[1]['Longitude']]
        competitor_coordinates = [competitor[1]['Latitude'], competitor[1]['Longitude']]

        distance_to_competitor = calculate_distance_to_another_dispensary(fluent_coordinates, competitor_coordinates)

        if distance_to_competitor <= radius:
            number_of_competitors += 1
    
    results_dict['Full Address'].append(fluent_dispensary[1]['Full Address'])
    results_dict['competitors'].append(number_of_competitors)

results_dict


Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success


{'Full Address': ['9611 N Kendall Drive, Miami, FL 33176',
  '5827 SW 40th Street, Miami, FL 33155',
  '11245 SW 211th St, Miami, FL  33189, United States',
  '175 NW 167th Street, North Miami Beach, FL 33169'],
 'competitors': [8, 6, 1, 3]}

In [126]:
results = pd.DataFrame(results_dict)
results

Unnamed: 0,Full Address,competitors
0,"9611 N Kendall Drive, Miami, FL 33176",8
1,"5827 SW 40th Street, Miami, FL 33155",6
2,"11245 SW 211th St, Miami, FL 33189, United St...",1
3,"175 NW 167th Street, North Miami Beach, FL 33169",3
