In [None]:
# Data Processing at Scale - Assignment 1 (NoSQL)
# @author  : Kiruthika Ponnan - ASUID: 1227400293

from math import radians, sin, cos, sqrt, atan2
from unqlite import UnQLite
import time

def haversine(lat1, lon1, lat2, lon2):
    R = 3959  # Radius of the Earth in miles

    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)

    a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance


def FindBusinessBasedOnCity(cityToSearch, saveLocation1, collection):
    businesses = collection.filter(lambda b: b['city'] == cityToSearch)
    with open(saveLocation1, 'w') as file:
        for business in businesses:
            line = "{}${}${}${}\n".format(
                business['name'], 
                business['full_address'], 
                business['city'], 
                business['state']
            )
            file.write(line)
    time.sleep(2)

def FindBusinessBasedOnLocation(categoriesToSearch, myLocation, maxDistance, saveLocation2, collection):
    businesses = collection.all()
    nearby_businesses = []
    for business in businesses:
        categories_exists = set(business['categories']).intersection(set(categoriesToSearch))
        if categories_exists:
            distance = haversine(business['latitude'], business['longitude'], myLocation[0], myLocation[1])
            if distance <= maxDistance:
                nearby_businesses.append(business['name'])
    with open(saveLocation2, 'w') as file:
        for business_name in nearby_businesses:
            file.write("{}\n".format(business_name))
    time.sleep(2)
   
def LoadDB(fileLocation):
    collection = UnQLite(fileLocation)
    data = collection.collection('data')
    return data


print("Loading database...")
data = LoadDB('sample.db')
for business in data.all():
    print(business)
print("Database loaded successfully!")

FindBusinessBasedOnCity('Tempe', 'output_city.txt', data)
FindBusinessBasedOnLocation(
    ['Food', 'Specialty Food'], 
    [33.3482589, -111.9088346], 30, 
    'output_loc.txt', 
    data
)



In [48]:
# Tests
true_results = ['3 Palms$7707 E McDowell Rd, Scottsdale, AZ 85257$Scottsdale$AZ', "Bob's Bike Shop$1608 N Miller Rd, Scottsdale, AZ 85257$Scottsdale$AZ",
                'Ronan & Tagart, PLC$8980 E Raintree Dr, Ste 120, Scottsdale, AZ 85260$Scottsdale$AZ', "Sangria's$7700 E McCormick Pkwy, Scottsdale, AZ 85258$Scottsdale$AZ", 'Turf Direct$8350 E Evans Rd, Scottsdale, AZ 85260$Scottsdale$AZ']

try:
    FindBusinessBasedOnCity('Scottsdale', 'output_city.txt', data)
except NameError as e:
    print('The FindBusinessBasedOnCity function is not defined! You must run the cell containing the function before running this evaluation cell.')
except TypeError as e:
    print(e)
    print("The FindBusinessBasedOnCity function is supposed to accept three arguments. Yours does not!")

try:
    opf = open('output_city.txt', 'r')
except FileNotFoundError as e:
    print("The FindBusinessBasedOnCity function does not write data to the correct location.")

lines = opf.readlines()
if len(lines) != 5:
    print("The FindBusinessBasedOnCity function does not find the correct number of results, should be 5.")
lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print("Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!")


true_results = ['Arizona Exterminating Co.$521 E Broadway Rd, Mesa, AZ 85204$Mesa$AZ', 'Bikram Yoga$1940 W 8th St, Ste 111, Mesa, AZ 85202$Mesa$AZ', "Denny's Restaurant$1330 S Power Rd, Mesa, AZ 85206$Mesa$AZ",
                'Diamondback Gymnastics$7211 E Southern Avenue, Mesa, AZ 85209$Mesa$AZ', 'Southeast Valley Medical Group$1950 S Country Club Dr, Mesa, AZ 85210$Mesa$AZ', 'Spa Pima$2150 S Power Rd, Mesa, AZ 85209$Mesa$AZ', 'The Seafood Market$1910 S Gilbert Rd, Mesa, AZ 85204$Mesa$AZ']
try:
    FindBusinessBasedOnCity('Mesa', 'output_city.txt', data)
except NameError as e:
    print('The FindBusinessBasedOnCity function is not defined! You must run the cell containing the function before running this evaluation cell.')
except TypeError as e:
    print(e)
    print("The FindBusinessBasedOnCity function is supposed to accept three arguments. Yours does not!")
try:
    opf = open('output_city.txt', 'r')
except FileNotFoundError as e:
    print("The FindBusinessBasedOnCity function does not write data to the correct location.")
lines = opf.readlines()
if len(lines) != 7:
    print("The FindBusinessBasedOnCity function does not find the correct number of results, should be 7.")
lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print("Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!")


true_results = ['Turf Direct']
try:
    FindBusinessBasedOnLocation(
        ['Gardeners'], [33.3482589, -111.9088346], 20, 'output_loc.txt', data)
except NameError as e:
    print('The FindBusinessBasedOnLocation function is not defined! You must run the cell containing the function before running this evaluation cell.')
except TypeError as e:
    print("The FindBusinessBasedOnLocation function is supposed to accept five arguments. Yours does not!")
try:
    opf = open('output_loc.txt', 'r')
except FileNotFoundError as e:
    print("The FindBusinessBasedOnLocation function does not write data to the correct location.")
lines = opf.readlines()
if len(lines) != 1:
    print("The FindBusinessBasedOnLocation function does not find the correct number of results, should be only 1.")
lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print("Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.")

true_results = ['Nothing Bundt Cakes', 'P.croissants']
try:
    FindBusinessBasedOnLocation(
        ['Bakeries'], [33.3482589, -111.9088346], 15, 'output_loc.txt', data)
except NameError as e:
    print('The FindBusinessBasedOnLocation function is not defined! You must run the cell containing the function before running this evaluation cell.')
except TypeError as e:
    print("The FindBusinessBasedOnLocation function is supposed to accept five arguments. Yours does not!")
try:
    opf = open('output_loc.txt', 'r')
except FileNotFoundError as e:
    print("The FindBusinessBasedOnLocation function does not write data to the correct location.")
lines = opf.readlines()
if len(lines) != 2:
    print("The FindBusinessBasedOnLocation function does not find the correct number of results, should be only 2.")
lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print("Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.")

true_results = ['Nothing Bundt Cakes', 'Olive Creations',
                'P.croissants', 'The Seafood Market']
try:
    FindBusinessBasedOnLocation(['Food', 'Specialty Food'], [
                                33.3482589, -111.9088346], 30, 'output_loc.txt', data)
except NameError as e:
    print('The FindBusinessBasedOnLocation function is not defined! You must run the cell containing the function before running this evaluation cell.')
except TypeError as e:
    print("The FindBusinessBasedOnLocation function is supposed to accept five arguments. Yours does not!")
try:
    opf = open('output_loc.txt', 'r')
except FileNotFoundError as e:
    print("The FindBusinessBasedOnLocation function does not write data to the correct location.")
lines = opf.readlines()
if len(lines) != 4:
    print("The FindBusinessBasedOnLocation function does not find the correct number of results, should be only 4.")
lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print("Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.")


Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!
Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!
Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.
Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.
Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.
