<h2>Import Libraries</h2>

In [2]:
import pandas as pd
import json
import sqlalchemy
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, ForeignKey, CheckConstraint

<h2>Loading Yelp Dataset Files</h2>

In [3]:
# Load business dataset
with open('yelp_academic_dataset_business.json', 'r', encoding='utf-8') as f:
    business_data = [json.loads(line) for line in f]
business_df = pd.DataFrame(business_data)


In [4]:
# Repeat for other datasets (checkin, user, tip, review)
with open('yelp_academic_dataset_checkin.json', 'r', encoding='utf-8') as f:
    checkin_data = [json.loads(line) for line in f]
checkin_df = pd.DataFrame(checkin_data)


In [5]:
with open('yelp_academic_dataset_user.json', 'r', encoding='utf-8') as f:
    user_data = [json.loads(line) for line in f]
user_df = pd.DataFrame(user_data)


In [6]:
with open('yelp_academic_dataset_tip.json', 'r', encoding='utf-8') as f:
    tip_data = [json.loads(line) for line in f]
tip_df = pd.DataFrame(tip_data)


In [7]:
with open('yelp_academic_dataset_review.json', 'r', encoding='utf-8') as f:
    review_data = [json.loads(line) for line in f]
review_df = pd.DataFrame(review_data)

<h2>Explore The Dataset Files</h2>

In [8]:
print(business_df.head())
print(business_df.info())

              business_id                      name  \
0  Pns2l4eNsfO8kk83dixA6A  Abby Rappoport, LAC, CMQ   
1  mpf3x-BjTdTEA3yCZrAYPw             The UPS Store   
2  tUFrWirKiKi_TAnsVWINQQ                    Target   
3  MTSW4McQd7CbVtyjqoe9mw        St Honore Pastries   
4  mWMc6_wTdE0EUBKIGXDVfA  Perkiomen Valley Brewery   

                           address           city state postal_code  \
0           1616 Chapala St, Ste 2  Santa Barbara    CA       93101   
1  87 Grasso Plaza Shopping Center         Affton    MO       63123   
2             5255 E Broadway Blvd         Tucson    AZ       85711   
3                      935 Race St   Philadelphia    PA       19107   
4                    101 Walnut St     Green Lane    PA       18054   

    latitude   longitude  stars  review_count  is_open  \
0  34.426679 -119.711197    5.0             7        0   
1  38.551126  -90.335695    3.0            15        1   
2  32.223236 -110.880452    3.5            22        0   
3  39.9555

In [9]:
print(checkin_df.head())
print(checkin_df.info())

              business_id                                               date
0  ---kPU91CF4Lq2-WlRu9Lw  2020-03-13 21:10:56, 2020-06-02 22:18:06, 2020...
1  --0iUa4sNDFiZFrAdIWhZQ  2010-09-13 21:43:09, 2011-05-04 23:08:15, 2011...
2  --30_8IhuyMHbSOcNWd6DQ           2013-06-14 23:29:17, 2014-08-13 23:20:22
3  --7PUidqRWpRSpXebiyxTg  2011-02-15 17:12:00, 2011-07-28 02:46:10, 2012...
4  --7jw19RH9JKXgFohspgQw  2014-04-21 20:42:11, 2014-04-28 21:04:46, 2014...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131930 entries, 0 to 131929
Data columns (total 2 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   business_id  131930 non-null  object
 1   date         131930 non-null  object
dtypes: object(2)
memory usage: 2.0+ MB
None


In [10]:
print(user_df.head())
print(user_df.info())

                  user_id    name  review_count        yelping_since  useful  \
0  qVc8ODYU5SZjKXVBgXdI7w  Walker           585  2007-01-25 16:47:26    7217   
1  j14WgRoU_-2ZE1aw1dXrJg  Daniel          4333  2009-01-25 04:35:42   43091   
2  2WnXYQFK0hXEoTxPtV2zvg   Steph           665  2008-07-25 10:41:00    2086   
3  SZDeASXq7o05mMNLshsdIA    Gwen           224  2005-11-29 04:38:33     512   
4  hA5lMy-EnncsH4JoR-hFGQ   Karen            79  2007-01-05 19:40:59      29   

   funny   cool                                              elite  \
0   1259   5994                                               2007   
1  13066  27281  2009,2010,2011,2012,2013,2014,2015,2016,2017,2...   
2   1010   1003                           2009,2010,2011,2012,2013   
3    330    299                                     2009,2010,2011   
4     15      7                                                      

                                             friends  fans  ...  \
0  NSCy54eWehBJyZdG2iE84w, pe42

In [11]:
print(tip_df.head())
print(tip_df.info())

                  user_id             business_id  \
0  AGNUgVwnZUey3gcPCJ76iw  3uLgwr0qeCNMjKenHJwPGQ   
1  NBN4MgHP9D3cw--SnauTkA  QoezRbYQncpRqyrLH6Iqjg   
2  -copOvldyKh1qr-vzkDEvw  MYoRNLb5chwjQe3c_k37Gg   
3  FjMQVZjSqY8syIO-53KFKw  hV-bABTK-glh5wj31ps_Jw   
4  ld0AperBXk1h6UbqmM80zw  _uN0OudeJ3Zl_tf6nxg5ww   

                                                text                 date  \
0                     Avengers time with the ladies.  2012-05-18 02:17:21   
1  They have lots of good deserts and tasty cuban...  2013-02-05 18:35:10   
2             It's open even when you think it isn't  2013-08-18 00:56:08   
3                          Very decent fried chicken  2017-06-27 23:05:38   
4             Appetizers.. platter special for lunch  2012-10-06 19:43:09   

   compliment_count  
0                 0  
1                 0  
2                 0  
3                 0  
4                 0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 908915 entries, 0 to 908914
Data col

In [11]:
print(review_df.head())
print(review_df.info())

                review_id                 user_id             business_id  \
0  KU_O5udG6zpxOg-VcAEodg  mh_-eMZ6K5RLWhZyISBhwA  XQfwVwDr-v0ZS3_CbbE5Xw   
1  BiTunyQ73aT9WBnpR9DZGw  OyoGAe7OKpv6SyGZT5g77Q  7ATYjTIgM3jUlt4UM3IypQ   
2  saUsX_uimxRlCVr67Z4Jig  8g_iMtfSiwikVnbP2etR0A  YjUWPpI6HXG530lwP-fb2A   
3  AqPFMleE6RsU23_auESxiA  _7bHUi9Uuf5__HHc_Q8guQ  kxX2SOes4o-D3ZQBkiMRfA   
4  Sx8TMOWLNuJBWer-0pcmoA  bcjbaE6dDog4jkNY91ncLQ  e4Vwtrqf-wpJfwesgvdgxQ   

   stars  useful  funny  cool  \
0    3.0       0      0     0   
1    5.0       1      0     1   
2    3.0       0      0     0   
3    5.0       1      0     1   
4    4.0       1      0     1   

                                                text                 date  
0  If you decide to eat here, just be aware it is...  2018-07-07 22:09:11  
1  I've taken a lot of spin classes over the year...  2012-01-03 15:28:18  
2  Family diner. Had the buffet. Eclectic assortm...  2014-02-05 20:30:30  
3  Wow!  Yummy, different,  delici

<h2>Handle Missing Values</h2>

In [12]:
business_df.dropna(subset=['business_id', 'name', 'address'], inplace=True)

<h2>Extract Relevant Fields</h2>

In [13]:
business_df = business_df[['business_id', 'name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'stars', 'review_count', 'categories']]

<h2>Convert Lists to Structured Format</h2>

In [14]:
business_df['categories'] = business_df['categories'].str.split(', ')

<h2>Remove Duplicates</h2>

In [15]:
business_df.drop_duplicates(subset=['business_id'], inplace=True)

<h2>Flatten Nested JSON</h2>

In [16]:
categories_df = business_df.explode('categories')[['business_id', 'categories']]
categories_df.dropna(subset=['categories'], inplace=True)

<center><h1>Design the SQL Database Schema (Using SQLite)</h1></center>

<h2>Create the Database</h2>

In [17]:
import sqlite3

# Connect to SQLite database (creates a file named 'YelpDB.db')
conn = sqlite3.connect('YelpDB.db')
cursor = conn.cursor()

<h2>Define Tables</h2>

In [18]:
# Create businesses table
cursor.execute('''
    CREATE TABLE IF NOT EXISTS businesses (
        business_id TEXT PRIMARY KEY,
        name TEXT NOT NULL,
        address TEXT,
        city TEXT,
        state TEXT,
        postal_code TEXT,
        latitude REAL,
        longitude REAL,
        stars REAL CHECK (stars >= 0 AND stars <= 5),
        review_count INTEGER CHECK (review_count >= 0)
    )
''')

# Create categories table
cursor.execute('''
    CREATE TABLE IF NOT EXISTS categories (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        business_id TEXT,
        category TEXT,
        FOREIGN KEY (business_id) REFERENCES businesses(business_id)
    )
''')

# Commit the changes
conn.commit()

<center><h1>Insert Data into the Database (Using SQLite)</h1></center>

<h2>Insert Business Data</h2>

In [None]:
# Prepare business data for insertion
business_data = business_df[['business_id', 'name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'stars', 'review_count']].to_records(index=False)

# Insert data into businesses table
cursor.executemany('''
    INSERT INTO businesses (business_id, name, address, city, state, postal_code, latitude, longitude, stars, review_count)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', business_data)

# Commit the changes
conn.commit()

<h2>Insert Category Data</h2>

In [20]:
# Prepare category data for insertion
category_data = categories_df[['business_id', 'categories']].to_records(index=False)

# Insert data into categories table
cursor.executemany('''
    INSERT INTO categories (business_id, category)
    VALUES (?, ?)
''', category_data)

# Commit the changes
conn.commit()

<center><h1>Query and Test the Database (Using SQLite)</h1></center>

<h2>Run Queries</h2>

In [21]:
# Query to fetch the first 5 rows from the businesses table
cursor.execute("SELECT * FROM businesses LIMIT 5")
rows = cursor.fetchall()
for row in rows:
    print(row)

('Pns2l4eNsfO8kk83dixA6A', 'Abby Rappoport, LAC, CMQ', '1616 Chapala St, Ste 2', 'Santa Barbara', 'CA', '93101', 34.4266787, -119.7111968, 5.0, b'\x07\x00\x00\x00\x00\x00\x00\x00')
('mpf3x-BjTdTEA3yCZrAYPw', 'The UPS Store', '87 Grasso Plaza Shopping Center', 'Affton', 'MO', '63123', 38.551126, -90.335695, 3.0, b'\x0f\x00\x00\x00\x00\x00\x00\x00')
('tUFrWirKiKi_TAnsVWINQQ', 'Target', '5255 E Broadway Blvd', 'Tucson', 'AZ', '85711', 32.223236, -110.880452, 3.5, b'\x16\x00\x00\x00\x00\x00\x00\x00')
('MTSW4McQd7CbVtyjqoe9mw', 'St Honore Pastries', '935 Race St', 'Philadelphia', 'PA', '19107', 39.9555052, -75.1555641, 4.0, b'P\x00\x00\x00\x00\x00\x00\x00')
('mWMc6_wTdE0EUBKIGXDVfA', 'Perkiomen Valley Brewery', '101 Walnut St', 'Green Lane', 'PA', '18054', 40.3381827, -75.4716585, 4.5, b'\r\x00\x00\x00\x00\x00\x00\x00')


In [22]:
# Query to fetch the business in Green Lane City from the businesses table
cursor.execute("SELECT * FROM businesses WHERE city='Green Lane'")
rows = cursor.fetchall()
for row in rows:
    print(row)

('mWMc6_wTdE0EUBKIGXDVfA', 'Perkiomen Valley Brewery', '101 Walnut St', 'Green Lane', 'PA', '18054', 40.3381827, -75.4716585, 4.5, b'\r\x00\x00\x00\x00\x00\x00\x00')
('3H6LFT4Pgv0ayy8HZPUFHQ', 'Green Lane Veterinary Hospital', '3015 Main St', 'Green Lane', 'PA', '18054', 40.3339635, -75.4608041, 4.5, b'\x07\x00\x00\x00\x00\x00\x00\x00')
('4mYPqTpCjHqx8Ee4TIERXA', 'Green Lane Naturals', '3113 Main St', 'Green Lane', 'PA', '18054', 40.3310103, -75.4572541, 5.0, b'\t\x00\x00\x00\x00\x00\x00\x00')
('CNGE5oRnF79TGImAIC1VAQ', 'Macoby Run Golf Course', '5275 McLean Station Rd', 'Green Lane', 'PA', '18054', 40.3592628, -75.4685297, 4.5, b'\x07\x00\x00\x00\x00\x00\x00\x00')
('e_BFCZSuLTMHUcYfxvQJXg', 'Boulder Woods Campground', '1050 Camp Skymount Rd', 'Green Lane', 'PA', '18054', 40.377114, -75.411172, 2.5, b'\x06\x00\x00\x00\x00\x00\x00\x00')
('y0wLIXL2bbQ5OSTrSxV2Hw', 'Camp Green Lane', '249 Camp Green Lane Rd', 'Green Lane', 'PA', '18054', 40.3284341208, -75.4296274639, 3.5, b'\x06\x00\x00\

In [24]:
# Query to fetch the business where stars rating is 5.0 from the businesses table
cursor.execute("SELECT * FROM businesses WHERE stars='5.0'")
rows = cursor.fetchall()
for row in rows:
    print(row)

('Pns2l4eNsfO8kk83dixA6A', 'Abby Rappoport, LAC, CMQ', '1616 Chapala St, Ste 2', 'Santa Barbara', 'CA', '93101', 34.4266787, -119.7111968, 5.0, b'\x07\x00\x00\x00\x00\x00\x00\x00')
('jaxMSoInw8Poo3XeMJt8lQ', 'Adams Dental', '15 N Missouri Ave', 'Clearwater', 'FL', '33755', 27.966235, -82.787412, 5.0, b'\n\x00\x00\x00\x00\x00\x00\x00')
('PSo_C1Sfa13JHjzVNW6ziQ', 'Indian Walk Veterinary Center', '662 Durham Rd', 'Newtown', 'PA', '18940', 40.2734926, -74.987496, 5.0, b'\x0f\x00\x00\x00\x00\x00\x00\x00')
('fvWn8oXXwbj2l79cochZyw', 'Altitude Trampoline Park - Boise', '1301 N Milwaukee St', 'Boise', 'ID', '83704', 43.6167635, -116.2853821, 5.0, b'\x1e\x00\x00\x00\x00\x00\x00\x00')
('fSCNwMtNNQY9QT69Cj9fiA', 'Sierra Pro Events', '', 'Sparks', 'NV', '89431', 39.5401545, -119.7483949, 5.0, b'\x07\x00\x00\x00\x00\x00\x00\x00')
('lwItZ1Ck3KtpCgG4CPFmpQ', 'Stomel Elliot Attorney-At-Law', '532 Rte 70 W, Fl 2', 'Cherry Hill', 'NJ', '08002', 39.915478, -75.016973, 5.0, b'\x05\x00\x00\x00\x00\x00\x00\

In [25]:
# Query to fetch the business where stars rating is 5.0 and city is Green Lane from the businesses table
cursor.execute("SELECT * FROM businesses WHERE stars='5.0' AND city='Green Lane'")
rows = cursor.fetchall()
for row in rows:
    print(row)

('4mYPqTpCjHqx8Ee4TIERXA', 'Green Lane Naturals', '3113 Main St', 'Green Lane', 'PA', '18054', 40.3310103, -75.4572541, 5.0, b'\t\x00\x00\x00\x00\x00\x00\x00')


In [26]:
# Query to fetch the business where city is Affton from the businesses table
cursor.execute("SELECT * FROM businesses WHERE city='Affton'")
rows = cursor.fetchall()
for row in rows:
    print(row)

('mpf3x-BjTdTEA3yCZrAYPw', 'The UPS Store', '87 Grasso Plaza Shopping Center', 'Affton', 'MO', '63123', 38.551126, -90.335695, 3.0, b'\x0f\x00\x00\x00\x00\x00\x00\x00')
('k0hlBqXX-Bt0vf1op7Jr1w', "Tsevi's Pub And Grill", '8025 Mackenzie Rd', 'Affton', 'MO', '63123', 38.5651648, -90.3210868, 3.0, b'\x13\x00\x00\x00\x00\x00\x00\x00')
('G_tuSnsAKr1lmspyCfdNRw', 'Telle Tire & Auto Centers', '9000 Gravois Rd', 'Affton', 'MO', '63123', 38.554993, -90.312616, 4.5, b'\x07\x00\x00\x00\x00\x00\x00\x00')
('EZNs18I_Jr7acvNkw-zXHQ', 'Appliance Repair Masters', '', 'Affton', 'MO', '63123', 38.5506084, -90.3331719, 2.5, b'\x06\x00\x00\x00\x00\x00\x00\x00')
('y6ulltaXBGkpsYhtG6UfrA', 'Super Smokers BBQ + Cajun - Affton', '9527 Gravois Rd', 'Affton', 'MO', '63123', 38.5534127141, -90.3226012059, 3.5, b'\x0b\x00\x00\x00\x00\x00\x00\x00')
('k2t4FhKQc42DF2_PlKlynQ', 'Total Access Urgent Care', '9538 Gravois Rd', 'Affton', 'MO', '63123', 38.5525114, -90.3230291, 2.5, b'\x0c\x00\x00\x00\x00\x00\x00\x00')
('

In [27]:
# Query to fetch the business where stars rating is 5.0 and city is Affton from the businesses table
cursor.execute("SELECT * FROM businesses WHERE city='Affton' AND stars='5.0'")
rows = cursor.fetchall()
for row in rows:
    print(row)

('yIEpEliKTg5TtNgeuff7UA', 'Mainstay Barber Parlour', '5233 Weber Rd', 'Affton', 'MO', '63123', 38.5546049, -90.3134851, 5.0, b'\x06\x00\x00\x00\x00\x00\x00\x00')
('p-d4PKr9KjFUh_pzpWPqlA', 'NexCore', '9700 Mackenzie Rd, Ste 221', 'Affton', 'MO', '63123', 38.5428662128, -90.3224814202, 5.0, b'\x07\x00\x00\x00\x00\x00\x00\x00')


<h2>Test Constraints</h2>

In [28]:
try:
    invalid_data = ('123', None, '123 Main St', 'City', 'State', '12345', 40.0, -75.0, 6, 100)
    cursor.execute('''
        INSERT INTO businesses (business_id, name, address, city, state, postal_code, latitude, longitude, stars, review_count)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', invalid_data)
    conn.commit()
except sqlite3.IntegrityError as e:
    print("Error:", e)

Error: NOT NULL constraint failed: businesses.name


<h2>Perform Analysis</h2>

In [29]:
cursor.execute('''
    SELECT category, AVG(stars) AS avg_rating
    FROM businesses
    JOIN categories ON businesses.business_id = categories.business_id
    GROUP BY category
    ORDER BY avg_rating DESC
''')
rows = cursor.fetchall()
for row in rows:
    print(row)

('Water Suppliers', 5.0)
('Sport Equipment Hire', 5.0)
('Somali', 5.0)
('Silent Disco', 5.0)
('Patent Law', 5.0)
('Mohels', 5.0)
('Metal Detector Services', 5.0)
('Karaoke Rental', 5.0)
('Experiences', 5.0)
('Circus Schools', 5.0)
('Childproofing', 5.0)
('Cheese Tasting Classes', 5.0)
('Calligraphy', 5.0)
('Bubble Soccer', 5.0)
('Art Consultants', 5.0)
('Real Estate Photography', 4.90625)
('Undersea/Hyperbaric Medicine', 4.9)
('Gerontologists', 4.875)
('Art Tours', 4.861111111111111)
('Boudoir Photography', 4.837837837837838)
('Qi Gong', 4.833333333333333)
('Badminton', 4.833333333333333)
('Commissioned Artists', 4.8076923076923075)
('Glass Blowing', 4.8)
('Audio/Visual Equipment Rental', 4.791666666666667)
('Free Diving', 4.785714285714286)
('Lice Services', 4.78125)
('Waldorf Schools', 4.75)
('Snorkeling', 4.75)
('Placenta Encapsulations', 4.75)
('Outdoor Movies', 4.75)
('Drama Schools', 4.75)
('Bike tours', 4.75)
('Bicycle Paths', 4.75)
('Art Installation', 4.75)
('Brazilian Jiu-jit

<center><h1>Prepare Submission Files</h1></center>

<h2>Save JSON Data</h2>

In [30]:
business_df.to_json('yelp_data.json', orient='records', lines=True)

<h2>Export SQL Schema</h2>

In [31]:
import sqlite3

# Connect to the SQLite database (or create it if it doesn’t exist)
conn = sqlite3.connect("yelp.db")
cursor = conn.cursor()

# Create the tables
cursor.executescript("""
CREATE TABLE IF NOT EXISTS businesses (
    business_id TEXT PRIMARY KEY,
    name TEXT NOT NULL,
    address TEXT,
    city TEXT,
    state TEXT,
    postal_code TEXT,
    latitude REAL,
    longitude REAL,
    stars REAL CHECK (stars >= 0 AND stars <= 5),
    review_count INTEGER CHECK (review_count >= 0)
);

CREATE TABLE IF NOT EXISTS categories (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    business_id TEXT,
    category TEXT,
    FOREIGN KEY (business_id) REFERENCES businesses(business_id)
);
""")

# Commit and close connection
conn.commit()
conn.close()


<h2>Saved Yelp Data Records File</h2>

In [32]:
# Load business dataset
with open('yelp_data.json', 'r', encoding='utf-8') as f:
    business_data = [json.loads(line) for line in f]
business_df = pd.DataFrame(business_data)
business_df.head(10)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,categories
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,"[Doctors, Traditional Chinese Medicine, Naturo..."
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,"[Shipping Centers, Local Services, Notaries, M..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,"[Department Stores, Shopping, Fashion, Home & ..."
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,"[Restaurants, Food, Bubble Tea, Coffee & Tea, ..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,"[Brewpubs, Breweries, Food]"
5,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,615 S Main St,Ashland City,TN,37015,36.269593,-87.058943,2.0,6,"[Burgers, Fast Food, Sandwiches, Food, Ice Cre..."
6,n_0UpQx1hsNbnPUSlodU8w,Famous Footwear,"8522 Eager Road, Dierbergs Brentwood Point",Brentwood,MO,63144,38.627695,-90.340465,2.5,13,"[Sporting Goods, Fashion, Shoe Stores, Shoppin..."
7,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,FL,33707,27.76659,-82.732983,3.5,5,"[Synagogues, Religious Organizations]"
8,k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,8025 Mackenzie Rd,Affton,MO,63123,38.565165,-90.321087,3.0,19,"[Pubs, Restaurants, Italian, Bars, American (T..."
9,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,2312 Dickerson Pike,Nashville,TN,37207,36.208102,-86.76817,1.5,10,"[Ice Cream & Frozen Yogurt, Fast Food, Burgers..."


In [33]:
business_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150346 entries, 0 to 150345
Data columns (total 11 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   business_id   150346 non-null  object 
 1   name          150346 non-null  object 
 2   address       150346 non-null  object 
 3   city          150346 non-null  object 
 4   state         150346 non-null  object 
 5   postal_code   150346 non-null  object 
 6   latitude      150346 non-null  float64
 7   longitude     150346 non-null  float64
 8   stars         150346 non-null  float64
 9   review_count  150346 non-null  int64  
 10  categories    150243 non-null  object 
dtypes: float64(3), int64(1), object(7)
memory usage: 12.6+ MB
