# Generating Businesses Data .CSV

* Pull request
* Parse and filter?
* Review prior functions?

## Imports

In [7]:
import requests
import pandas as pd
import json
import csv
import numpy as np

with open(r'C:\Users\bmcca\.secret\yelp_api.json') as f:
    keys = json.load(f)

client_id = keys['id']
yelp_key = keys['key']

## Creating the Pull Request

In [8]:
def yelp_request_offset(term, location, yelp_key, offset=0, verbose=False):
    '''Adapted from Yelp API Lab: https://github.com/BenJMcCarty/dsc-yelp-api-lab/tree/solution'''
    
    url = 'https://api.yelp.com/v3/businesses/search'

    headers = {
            'Authorization': 'Bearer {}'.format(yelp_key),
        }

    url_params = {
                    'term': term.replace(' ', '+'),
                    'location': location.replace(' ', '+'),
                    'limit': 50,
                    'offset': offset
                        }
    
    response = requests.get(url, headers=headers, params=url_params)
    
    if verbose == True:
        print(response)
        print(type(response.text))
        print(response.text[:1000])
        
    return response.json()

In [9]:
def parse_data(list_of_data):
    '''Adapted from Tyrell's code'''  

    # Create empty list to store results
    
    parsed_data = []
    
    # Loop through each business in the list of businesses
    # Add specific k:v pairs to a dictionary
    
    for business in list_of_data:
        if 'price' not in business:
            business['price'] = np.nan
            
            # Verify that the "price" key is in the selected business dict
            
        details = {'name': business['name'],
                     'location': ' '.join(business['location']['display_address']),
                     'id': business['id'],
                     'alias': business['categories'][0]['alias'],
                     'title': business['categories'][0]['title'],
                     'rating': business['rating'],
                     'review_count': business['review_count'],
                     'price': business['price'],
                     'latitude': business['coordinates']['latitude'],
                     'longitude': business['coordinates']['longitude']
                    }
        # Add the new dictionary to the previous list
        
        parsed_data.append(details)
    
    # Create a DataFrame from the resulting list
    
    df_parsed_data = pd.DataFrame(parsed_data)

    
    return df_parsed_data

In [88]:
def get_full_data(term, location, yelp_key, file_name = 'data/wineries_raw.csv'):
    '''Requests all results from Yelp API; saves as a .csv; and returns a DataFrame.'''

    # Create a .csv to store results
    blank_df = pd.DataFrame()
    blank_df.to_csv(file_name)
    
    # Process first request to Yelp API and calculate number of pages 
    results = yelp_request_offset(term, location, yelp_key, offset=0, 
                                  verbose=False)
    num_pages = results['total']//50+1
    
    # Print out confirmation feedback
    print(f'For {term} and {location}: ')
    print(f"    Total number of results: {results['total']}.")
    print(f'    Total number of pages: {num_pages}.')
    
    # Create offset for further results and create empty list
    offset = 0

    # Retrieves remaining pages
    for num in range(num_pages-1):
        try:
            # Process API request
            results = yelp_request_offset(term, location, yelp_key,
                                          offset=offset, verbose=False)
            
            # From results, take values from "Businesses" key and save
            parsed_results = parse_data(results['businesses'])
          
            # Save resulting DF to .csv from top
            parsed_results.to_csv(file_name, mode='a', index = False)
            
            # Increase offset to move to next "page" of data
            offset += 50
            
        except:
            # If error, print where the error happens
            print(f'Error on page {num}.')
            # Then save the results so far to the .csv
            parsed_results.to_csv(file_name, mode='a', index = False)


    return parsed_results

## Cleaning Data

In [89]:
def sort_by_aliases(raw_data = 'data/wineries_raw.csv'):

    # Read in businesses
    df2 = pd.read_csv(raw_data, header = 1)

    # Create new DF filtering alias and title columns
    df2_alias = df2.loc[:,['alias', 'title']]

    # Identify top 2 aliases 
    df2_alias_count = df2_alias.groupby('alias').count().sort_values(['title'],\
                                                            ascending=False)[:2]

    # Note: initially tried top 3, but it returned distributors, not wineries

    df2_alias_count.reset_index(inplace=True)
    
    print("Top two aliases: ")
    print(df2_alias_count)

    # display them as a list
    aliases_top_2 = df2_alias_count['alias'].tolist()

    # Selecting rows based on condition and saving

    df3 = df2[df2['alias'].isin(aliases_top_2)]

    df3.to_csv('data/wineries_filtered_alias.csv', index = False)

    
    return "Saved to 'data/wineries_filtered_alias.csv'"

## Testing Functions

### Get Data

In [90]:
get_full_data('winery','San Diego', yelp_key)

For winery and San Diego: 
    Total number of results: 262.
    Total number of pages: 6.


In [91]:
df_test = pd.read_csv('data/wineries_raw.csv', header = 1)
df_test

Unnamed: 0,name,location,id,alias,title,rating,review_count,price,latitude,longitude
0,The Winery Restaurant & Wine Bar,"4301 La Jolla Village Dr Ste 2040 San Diego, C...",76ADW8x8J_69qbtsc5F-2g,bars,Bars,4.0,492,$$,32.8724284,-117.2137748
1,Bernardo Winery,"13330 Paseo Del Verano Norte San Diego, CA 92128",DknnpiG1p4OoM1maFshzXA,winetastingroom,Wine Tasting Room,4.5,626,$$,33.0328,-117.04646
2,Baja Winery Tours,"4629 Cass St San Diego, CA 92109",vVaNDvLrCCE_Cw_DyPnBpA,winetours,Wine Tours,5.0,66,,32.7989164,-117.2521107
3,Callaway Vineyard & Winery,"517 4th Ave Ste 101 San Diego, CA 92101",Cn2_bpTngghYW1ej4zreZg,winetastingroom,Wine Tasting Room,5.0,100,$$,32.7107506117294,-117.160917759246
4,Négociant Winery,"2419 El Cajon Blvd San Diego, CA 92104",Cc1sQWRWgGyMCjzX2mmMQQ,winetastingroom,Wine Tasting Room,4.5,103,$$,32.75488,-117.13828
...,...,...,...,...,...,...,...,...,...,...
195,San Diego Limobuses,"3333 Midway Dr Ste 206 San Diego, CA 92110",SCaFGyzrTGTI6aQHhLxbgA,limos,Limos,3.0,46,,32.75006,-117.21138
196,Alpine Discount Liquor,"2223 Alpine Blvd Alpine, CA 91901",-ARx5ShNxJgjyahKnikTnA,beer_and_wine,"Beer, Wine & Spirits",3.5,3,$,32.8352874,-116.7659099
197,Village Wine & Spirits,"1552 Encinitas Blvd Encinitas, CA 92024",XkGnb-YxP5MK_ok1X011RA,beer_and_wine,"Beer, Wine & Spirits",4.0,24,$$,33.0458964,-117.2555835
198,The Destination Wedding Group,"Escondido, CA 92033",lJwxe_fjdt-e8xPrDq63fA,wedding_planning,Wedding Planning,5.0,18,,33.12347,-117.08652


### Clean Data

In [92]:
df_test_clean = sort_by_aliases(raw_data = 'data/wineries_raw.csv')
df_test_clean

Top two aliases: 
             alias  title
0         wineries     49
1  winetastingroom     33


"Saved to 'data/wineries_filtered_alias.csv'"

In [109]:
df_test2 = pd.read_csv('data/wineries_filtered_alias.csv')
df_test2

Unnamed: 0,name,location,id,alias,title,rating,review_count,price,latitude,longitude
0,Bernardo Winery,"13330 Paseo Del Verano Norte San Diego, CA 92128",DknnpiG1p4OoM1maFshzXA,winetastingroom,Wine Tasting Room,4.5,626,$$,33.032800,-117.046460
1,Callaway Vineyard & Winery,"517 4th Ave Ste 101 San Diego, CA 92101",Cn2_bpTngghYW1ej4zreZg,winetastingroom,Wine Tasting Room,5.0,100,$$,32.710751,-117.160918
2,Négociant Winery,"2419 El Cajon Blvd San Diego, CA 92104",Cc1sQWRWgGyMCjzX2mmMQQ,winetastingroom,Wine Tasting Room,4.5,103,$$,32.754880,-117.138280
3,San Pasqual Winery - Seaport Village,"805 W Harbor Dr San Diego, CA 92101",gMW1RvyLu90RSQAY9UrIHw,winetastingroom,Wine Tasting Room,4.5,138,$$,32.708732,-117.168195
4,Domaine Artefact Vineyard & Winery,"15404 Highland Valley Rd Escondido, CA 92025",WqVbxY77Ag96X90LultCUw,wineries,Wineries,5.0,96,$$,33.068170,-117.001600
...,...,...,...,...,...,...,...,...,...,...
77,Roll OutThe Barrell Charity Event by Meritage,"162 S Rancho Santa Fe Rd Encinitas, CA 92024",wyLm9fIoamN-VALcu3nUVg,wineries,Wineries,4.0,1,,33.037121,-117.238654
78,Licores Kentucky,Calle Puerto y 3ra S/N Col. Centro 22000 Tijua...,B7gID-M2EsdpthrTcwTNYA,wineries,Wineries,5.0,1,,32.534236,-117.034976
79,Barrica 9,Av. Revolución 1265 Col. Zona Centro 22000 Tij...,HxTqmzT4G43iAKXrB3pqQg,winetastingroom,Wine Tasting Room,4.5,7,$$,32.530430,-117.036500
80,"RL Liquid Assets, Inc","5909 Sea Lion Pl Ste G Carlsbad, CA 92010",-STecUUsS69EMSE7PxwPwA,wineries,Wineries,3.0,2,,33.134743,-117.248093


In [94]:
# list_test = df_test2.id
# list_test[list_test.str.startswith("-")]

In [95]:
# for item in df_test2['id']:
#     if item.startswith("-") == True:
#         item = None

In [138]:
def fix_csv_issue(df_var_name):
    for item in df_var_name['id']:
        if item.startswith("-") == True:
            df_var_name['id'].replace(to_replace=item, value = 0, inplace=True)
#     df_var_name = df_var_name['id'].dropna(inplace=True)
    
    return df_var_name

In [139]:
fix_csv_issue(df_test2)

Unnamed: 0,name,location,id,alias,title,rating,review_count,price,latitude,longitude
0,Bernardo Winery,"13330 Paseo Del Verano Norte San Diego, CA 92128",DknnpiG1p4OoM1maFshzXA,winetastingroom,Wine Tasting Room,4.5,626,$$,33.032800,-117.046460
1,Callaway Vineyard & Winery,"517 4th Ave Ste 101 San Diego, CA 92101",Cn2_bpTngghYW1ej4zreZg,winetastingroom,Wine Tasting Room,5.0,100,$$,32.710751,-117.160918
2,Négociant Winery,"2419 El Cajon Blvd San Diego, CA 92104",Cc1sQWRWgGyMCjzX2mmMQQ,winetastingroom,Wine Tasting Room,4.5,103,$$,32.754880,-117.138280
3,San Pasqual Winery - Seaport Village,"805 W Harbor Dr San Diego, CA 92101",gMW1RvyLu90RSQAY9UrIHw,winetastingroom,Wine Tasting Room,4.5,138,$$,32.708732,-117.168195
4,Domaine Artefact Vineyard & Winery,"15404 Highland Valley Rd Escondido, CA 92025",WqVbxY77Ag96X90LultCUw,wineries,Wineries,5.0,96,$$,33.068170,-117.001600
...,...,...,...,...,...,...,...,...,...,...
77,Roll OutThe Barrell Charity Event by Meritage,"162 S Rancho Santa Fe Rd Encinitas, CA 92024",wyLm9fIoamN-VALcu3nUVg,wineries,Wineries,4.0,1,,33.037121,-117.238654
78,Licores Kentucky,Calle Puerto y 3ra S/N Col. Centro 22000 Tijua...,B7gID-M2EsdpthrTcwTNYA,wineries,Wineries,5.0,1,,32.534236,-117.034976
79,Barrica 9,Av. Revolución 1265 Col. Zona Centro 22000 Tij...,HxTqmzT4G43iAKXrB3pqQg,winetastingroom,Wine Tasting Room,4.5,7,$$,32.530430,-117.036500
80,"RL Liquid Assets, Inc","5909 Sea Lion Pl Ste G Carlsbad, CA 92010",-STecUUsS69EMSE7PxwPwA,wineries,Wineries,3.0,2,,33.134743,-117.248093


In [104]:
df_test2[~df_test2['id'].isna()]

Unnamed: 0,name,location,id,alias,title,rating,review_count,price,latitude,longitude
0,Bernardo Winery,"13330 Paseo Del Verano Norte San Diego, CA 92128",DknnpiG1p4OoM1maFshzXA,winetastingroom,Wine Tasting Room,4.5,626,$$,33.032800,-117.046460
1,Callaway Vineyard & Winery,"517 4th Ave Ste 101 San Diego, CA 92101",Cn2_bpTngghYW1ej4zreZg,winetastingroom,Wine Tasting Room,5.0,100,$$,32.710751,-117.160918
2,Négociant Winery,"2419 El Cajon Blvd San Diego, CA 92104",Cc1sQWRWgGyMCjzX2mmMQQ,winetastingroom,Wine Tasting Room,4.5,103,$$,32.754880,-117.138280
3,San Pasqual Winery - Seaport Village,"805 W Harbor Dr San Diego, CA 92101",gMW1RvyLu90RSQAY9UrIHw,winetastingroom,Wine Tasting Room,4.5,138,$$,32.708732,-117.168195
4,Domaine Artefact Vineyard & Winery,"15404 Highland Valley Rd Escondido, CA 92025",WqVbxY77Ag96X90LultCUw,wineries,Wineries,5.0,96,$$,33.068170,-117.001600
...,...,...,...,...,...,...,...,...,...,...
77,Roll OutThe Barrell Charity Event by Meritage,"162 S Rancho Santa Fe Rd Encinitas, CA 92024",wyLm9fIoamN-VALcu3nUVg,wineries,Wineries,4.0,1,,33.037121,-117.238654
78,Licores Kentucky,Calle Puerto y 3ra S/N Col. Centro 22000 Tijua...,B7gID-M2EsdpthrTcwTNYA,wineries,Wineries,5.0,1,,32.534236,-117.034976
79,Barrica 9,Av. Revolución 1265 Col. Zona Centro 22000 Tij...,HxTqmzT4G43iAKXrB3pqQg,winetastingroom,Wine Tasting Room,4.5,7,$$,32.530430,-117.036500
80,"RL Liquid Assets, Inc","5909 Sea Lion Pl Ste G Carlsbad, CA 92010",-STecUUsS69EMSE7PxwPwA,wineries,Wineries,3.0,2,,33.134743,-117.248093


In [108]:
df_test2.dropna(axis=0, inplace=True)
df_test2

Unnamed: 0,name,location,id,alias,title,rating,review_count,price,latitude,longitude
0,Bernardo Winery,"13330 Paseo Del Verano Norte San Diego, CA 92128",DknnpiG1p4OoM1maFshzXA,winetastingroom,Wine Tasting Room,4.5,626,$$,33.0328,-117.04646
1,Callaway Vineyard & Winery,"517 4th Ave Ste 101 San Diego, CA 92101",Cn2_bpTngghYW1ej4zreZg,winetastingroom,Wine Tasting Room,5.0,100,$$,32.710751,-117.160918
2,Négociant Winery,"2419 El Cajon Blvd San Diego, CA 92104",Cc1sQWRWgGyMCjzX2mmMQQ,winetastingroom,Wine Tasting Room,4.5,103,$$,32.75488,-117.13828
3,San Pasqual Winery - Seaport Village,"805 W Harbor Dr San Diego, CA 92101",gMW1RvyLu90RSQAY9UrIHw,winetastingroom,Wine Tasting Room,4.5,138,$$,32.708732,-117.168195
4,Domaine Artefact Vineyard & Winery,"15404 Highland Valley Rd Escondido, CA 92025",WqVbxY77Ag96X90LultCUw,wineries,Wineries,5.0,96,$$,33.06817,-117.0016
5,Carruth Cellars Wine Garden,"2215 Kettner Blvd San Diego, CA 92101",yZp9FdMH6Dmn98mfNInFHw,wineries,Wineries,4.5,196,$$,32.72748,-117.17056
6,Blue Door Urban Winery,"4060 Morena Blvd San Diego, CA 92117",ElE6Nj7iz-tNV4ebV6Clew,wineries,Wineries,4.5,76,$$,32.814091,-117.218254
7,Cordiano Winery,"15732 Highland Valley Rd Escondido, CA 92025",Ub2bJsi7lIOQ9TyIKdHaJw,wineries,Wineries,4.0,547,$$,33.068118,-116.991715
8,Pali Wine Co,"2130 India St San Diego, CA 92101",fyh566YXm5XJ3Ntv_GLghg,winetastingroom,Wine Tasting Room,4.0,173,$$,32.726797,-117.169759
9,FruitCraft - Fermentery & Distillery,"1477 University Ave San Diego, CA 92103",sCET1pLdKNNPBQJyjPOkww,wineries,Wineries,4.5,308,$$,32.748217,-117.149463


## Saving Data