##### This notebook loads the places data sourced from the FourSquare API
##### It uses the following columns from that data :
        POI
        Address
        City
        State
##### We use these columns as parameter to get a business match from yelp API to get an ID 
##### This information is used to get ratings and reviews for that business from the yelp reviews endpoint
##### This data is parsed and created into a new df1 and saved to FourSq.csv
    

In [None]:
import pandas as pd
import numpy as np
import json
from get_data_API import get_business_match
from get_data_API import get_reviews
import os
Yelp_key=os.getenv('Yelp_API')
Yelp_key_MR=os.getenv('Yelp_API(MR)')


In [None]:
get_reviews

In [None]:
foursq_df=pd.read_csv('FourSq_data.csv')
foursq_df.head()

In [None]:
# map the states and make the changes in place in the data frame

state_mapping = {
    'Ontario': 'ON',
    'Alberta':'AB',
    'British Columbia':'BC',
    'Manitoba':'MB',
    'Saskatchewan':'SK',
    'New Brunswick':'NB',
    'Newfoundland and Labrador':'NL',
    'Prince Edward Island':'PEI',
    'Nova Scotia':'NS',
    'Quebec':'QC'
}

foursq_df['State'] = foursq_df['State'].map(state_mapping).fillna(foursq_df['State'])


In [None]:
# Filter rows where 'POI' column is not NaN
filtered_df = foursq_df[foursq_df['POI'].notna()]
filtered_df.head()
#filtered_df.shape

#### pass the POI,city,state,address from the data frame and get yelp business id 

## Use get_business_match function to get business id on yelp to get the ratings for the POI above

In [None]:
business_id=[]
business_name=[]
cities=[]
for index, row in filtered_df.iterrows():
    name=row['POI']
    address=row['Address']
    city=row['City']
    state=row['State']
    res_fs = get_business_match(name,address,city,state,Yelp_key_yahoo)
    if res_fs.status_code == 200:
        data = res_fs.json()
        with open( f'business_fs.json','w') as f:
             json.dump(data,f)
       # Check if 'businesses' key exists in the JSON response and if it's not empty
        if 'businesses' in data and len(data['businesses']) > 0:
            business_id.append(data['businesses'][0]['id']) # only grab business id from the json
            business_name.append(data['businesses'][0]['name']) # only grab name from json
            cities.append(city)
        else:
            business_id.append('N/A')
            business_name.append(name)
            cities.append(city)
    else:
         print(f"Error: {res_fs.status_code}, {res_fs.text}")
         business_id.append('N/A')
         business_name.append(name)
         cities.append(city)

    print(city)

In [None]:
Business_df1=pd.DataFrame({'business_id':business_id,
                        'business_name':business_name,
                          'city': cities})
Business_df1.to_csv('fs_business_id.csv',index=False)

In [None]:
Business_df1.drop_duplicates().shape

In [None]:
Business_df1.shape

## Use the business_id information to get ratings from get_reviews function 

In [None]:
ratings=[]
for index, row in Business_df1.iterrows():
    id=row['business_id']
    
    res_review=get_reviews(id,Yelp_key)
    print(index)
    if res_review.status_code == 200:
            review = res_review.json()
           # Check if 'reviews' key exists in the JSON response and if it's not empty
            if 'reviews' in review :
               ratings.append(review) # to keep list of reviews for each venue
                #dump data into json file
               with open(f'fs_ratings_yelp.json', 'w') as f: 
                 json.dump(ratings, f) 
            else:
                business_id.append('N/A')
                business_name.append(name)
    else:
             print(f"Error: {res_review.status_code}, {res_review.text}")
             ratings.append('N/A') # for venues with no business id available

print("Stopped at= ",index)

## Parse through the json fs_ratings_yelp2 to get a data frame of reviews and ratings details

In [None]:
with open ('fs_ratings_yelp.json','r') as f:
    fs_ratings=json.load(f)




In [None]:
review=[]
ratings=[]
venue_name=[]
venue_id=[]
city=[]

In [None]:
for index, row in Business_df1.iterrows():
       
    # Get the business ID from the current row
        if row['business_id'] != 'N/A':
    
        # Search for the corresponding rating in the ratings JSON data
            for review_item in fs_ratings:
    
                if 'reviews' in review_item :
                
                     for item in review_item['reviews']:
                     

                        if 'text' in item:
                            review.append(item['text'])
                        else:
                            review.append('N/A')

                        if 'rating' in item:
                            ratings.append(item['rating'])
                        else:
                            ratings.append('N/A')
                
                        venue_name.append(row.iloc[1])
                        venue_id.append(row.iloc[0])
                        city.append(row.iloc[2])
            
        else:
            review.append('N/A')
            ratings.append('N/A')
            venue_name.append(row.iloc[1])
            venue_id.append(row.iloc[0])
            city.append(row.iloc[2])

In [None]:
    
df1=pd.DataFrame({'venue':venue_name,
                 'venue_id':venue_id,
                  'city':city,
                 'reviews':review,
                 'ratings':ratings})
df1
    

In [None]:
df1.drop_duplicates(inplace=True)

In [None]:
df1.to_csv('FourSq_ratings.csv',index=False)