In [1]:
import pandas as pd

# Load the Yelp business data from the JSON file
business_df = pd.read_json('yelp_academic_dataset_business.json', lines=True)

# Display the first few rows to understand the structure
print(business_df.head())

              business_id                      name  \
0  Pns2l4eNsfO8kk83dixA6A  Abby Rappoport, LAC, CMQ   
1  mpf3x-BjTdTEA3yCZrAYPw             The UPS Store   
2  tUFrWirKiKi_TAnsVWINQQ                    Target   
3  MTSW4McQd7CbVtyjqoe9mw        St Honore Pastries   
4  mWMc6_wTdE0EUBKIGXDVfA  Perkiomen Valley Brewery   

                           address           city state postal_code  \
0           1616 Chapala St, Ste 2  Santa Barbara    CA       93101   
1  87 Grasso Plaza Shopping Center         Affton    MO       63123   
2             5255 E Broadway Blvd         Tucson    AZ       85711   
3                      935 Race St   Philadelphia    PA       19107   
4                    101 Walnut St     Green Lane    PA       18054   

    latitude   longitude  stars  review_count  is_open  \
0  34.426679 -119.711197    5.0             7        0   
1  38.551126  -90.335695    3.0            15        1   
2  32.223236 -110.880452    3.5            22        0   
3  39.9555

In [2]:
# Ensure 'categories' column is a list (if it's not already)
business_df['categories'] = business_df['categories'].apply(lambda x: x.split(', ') if isinstance(x, str) else x)

# Check the transformed 'categories' column
print(business_df[['name', 'categories']].head())

                       name                                         categories
0  Abby Rappoport, LAC, CMQ  [Doctors, Traditional Chinese Medicine, Naturo...
1             The UPS Store  [Shipping Centers, Local Services, Notaries, M...
2                    Target  [Department Stores, Shopping, Fashion, Home & ...
3        St Honore Pastries  [Restaurants, Food, Bubble Tea, Coffee & Tea, ...
4  Perkiomen Valley Brewery                        [Brewpubs, Breweries, Food]


In [3]:
category_to_industry = {
    'Restaurants': 'Food & Restaurants',
    'Food': 'Food & Restaurants',
    'Shopping': 'Retail',
    'Home Services': 'Home Services',
    'Beauty & Spas': 'Beauty & Spas',
    'Nightlife': 'Nightlife',
    'Health & Medical': 'Healthcare',
    'Local Services': 'Home Services',
    'Bars': 'Nightlife',
    'Automotive': 'Automobile',
    'Event Planning & Services': 'Event Planning & Services',
    'Sandwiches': 'Food & Restaurants',
    'American (Traditional)': 'Food & Restaurants',
    'Active Life': 'Health & Fitness',
    'Pizza': 'Food & Restaurants',
    'Coffee & Tea': 'Food & Restaurants',
    'Fast Food': 'Food & Restaurants',
    'Breakfast & Brunch': 'Food & Restaurants',
    'American (New)': 'Food & Restaurants',
    'Hotels & Travel': 'Travel & Hospitality',
    'Home & Garden': 'Retail',
    'Fashion': 'Retail',
    'Burgers': 'Food & Restaurants',
    'Arts & Entertainment': 'Entertainment & Culture',
    'Auto Repair': 'Automobile',
    'Hair Salons': 'Beauty & Spas',
    'Nail Salons': 'Beauty & Spas',
    'Mexican': 'Food & Restaurants',
    'Italian': 'Food & Restaurants',
    'Specialty Food': 'Food & Restaurants',
    'Doctors': 'Healthcare',
    'Pets': 'Pet Services',
    'Real Estate': 'Real Estate',
    'Seafood': 'Food & Restaurants',
    'Fitness & Instruction': 'Health & Fitness',
    'Professional Services': 'Professional Services',
    'Hair Removal': 'Beauty & Spas',
    'Desserts': 'Food & Restaurants',
    'Chinese': 'Food & Restaurants',
    'Bakeries': 'Food & Restaurants',
    'Grocery': 'Retail',
    'Salad': 'Food & Restaurants',
    'Hotels': 'Travel & Hospitality',
    'Chicken Wings': 'Food & Restaurants',
    'Cafes': 'Food & Restaurants',
    'Ice Cream & Frozen Yogurt': 'Food & Restaurants',
    'Caterers': 'Event Planning & Services',
    'Pet Services': 'Pet Services',
    'Dentists': 'Healthcare',
    'Skin Care': 'Beauty & Spas',
    'Venues & Event Spaces': 'Event Planning & Services',
    'Tires': 'Automobile',
    'Wine & Spirits': 'Food & Restaurants',
    'Beer': 'Food & Restaurants',
    'Delis': 'Food & Restaurants',
    'Oil Change Stations': 'Automobile',
    'Waxing': 'Beauty & Spas',
    'Contractors': 'Home Services',
    'Women\'s Clothing': 'Retail',
    'Massage': 'Beauty & Spas',
    'Sports Bars': 'Nightlife',
    'Day Spas': 'Beauty & Spas',
    'General Dentistry': 'Healthcare',
    'Education': 'Education',
    'Flowers & Gifts': 'Retail',
    'Auto Parts & Supplies': 'Automobile',
    'Apartments': 'Real Estate',
    'Convenience Stores': 'Retail',
    'Home Decor': 'Retail',
    'Gyms': 'Health & Fitness',
    'Japanese': 'Food & Restaurants',
    'Pubs': 'Nightlife',
    'Cocktail Bars': 'Nightlife',
    'Sushi Bars': 'Food & Restaurants',
    'Barbeque': 'Food & Restaurants',
    'Juice Bars & Smoothies': 'Food & Restaurants',
    'Barbers': 'Beauty & Spas',
    'Car Dealers': 'Automobile',
    'Sporting Goods': 'Retail',
    'Accessories': 'Retail',
    'Drugstores': 'Retail',
    'Cosmetic Dentists': 'Healthcare',
    'Local Flavor': 'Entertainment & Culture',
    'Furniture Stores': 'Retail',
    'Pet Groomers': 'Pet Services',
    'Asian Fusion': 'Food & Restaurants',
    'Cosmetics & Beauty Supply': 'Retail',
    'Jewelry': 'Retail',
    'Steakhouses': 'Food & Restaurants',
    'Diners': 'Food & Restaurants',
    'Financial Services': 'Professional Services',
    'Trainers': 'Health & Fitness',
    'Hair Stylists': 'Beauty & Spas',
    'Arts & Crafts': 'Entertainment & Culture',
    'Department Stores': 'Retail',
    'Electronics': 'Retail',
    'Veterinarians': 'Pet Services',
    'Massage Therapy': 'Beauty & Spas',
    'Pet Sitting': 'Pet Services',
    'Eyelash Service': 'Beauty & Spas',
    'Men\'s Clothing': 'Retail',
    'Food Trucks': 'Food & Restaurants',
    'Mediterranean': 'Food & Restaurants',
    'Body Shops': 'Automobile',
    'Public Services & Government': 'Community Services',
    'Heating & Air Conditioning/HVAC': 'Home Services',
    'Lounges': 'Nightlife',
    'Wine Bars': 'Nightlife',
    'IT Services & Computer Repair': 'Professional Services',
    'Vegetarian': 'Food & Restaurants',
    'Laundry Services': 'Home Services',
    'Medical Centers': 'Healthcare',
    'Optometrists': 'Healthcare',
    'Plumbing': 'Home Services',
    'Gas Stations': 'Automobile',
    'Music Venues': 'Entertainment & Culture',
    'Donuts': 'Food & Restaurants',
    'Dry Cleaning & Laundry': 'Home Services',
    'Party & Event Planning': 'Event Planning & Services',
    'Soup': 'Food & Restaurants',
    'Food Delivery Services': 'Food & Restaurants',
    'Pet Stores': 'Pet Services',
    'Tacos': 'Food & Restaurants',
    'Shoe Stores': 'Retail',
    'Eyewear & Opticians': 'Healthcare',
    'Southern': 'Food & Restaurants',
    'Parks': 'Recreation',
    'Mags': 'Retail',
    'Music & Video': 'Retail',
    'Books': 'Retail',
    'Makeup Artists': 'Beauty & Spas',
    'Thai': 'Food & Restaurants',
    'Car Wash': 'Automobile',
    'Florists': 'Retail',
    'Building Supplies': 'Home Services',
    'Mobile Phones': 'Retail',
    'Movers': 'Home Services',
    'Yoga': 'Health & Fitness',
    'Specialty Schools': 'Education',
    'Cajun/Creole': 'Food & Restaurants',
    'Tex-Mex': 'Food & Restaurants',
    'Auto Detailing': 'Home Services',
    'Landscaping': 'Home Services',
    'Nurseries & Gardening': 'Retail',
    'Appliances': 'Retail',
    'Beer Bar': 'Nightlife',
    'Discount Store': 'Retail',
    'Breweries': 'Nightlife',
    'Transportation': 'Travel & Transportation',
    'Vegan': 'Food & Restaurants',
    'Vietnamese': 'Food & Restaurants',
    'Home Cleaning': 'Home Services',
    'Tours': 'Travel & Transportation',
    'Sewing & Alterations': 'Arts & Crafts',
    'Indian': 'Food & Restaurants',
    'Men\'s Hair Salons': 'Beauty & Spas',
    'Latin American': 'Food & Restaurants',
    'Chicken Shop': 'Food & Restaurants',
    'Appliances & Repair': 'Home Services',
    'Real Estate Services': 'Real Estate',
    'Gift Shops': 'Retail',
    'Self Storage': 'Home Services',
    'Shipping Centers': 'Professional Services',
    'Hardware Stores': 'Retail',
    'Printing Services': 'Professional Services',
    'Art Galleries': 'Arts & Crafts',
    'Medical Spas': 'Beauty & Spas',
    'Greek': 'Food & Restaurants',
    'Gluten-Free': 'Food & Restaurants',
    'Hot Dogs': 'Food & Restaurants',
    'Kitchen & Bath': 'Home Services',
    'Property Management': 'Real Estate',
    'Vintage & Consignment': 'Retail',
    'Used': 'Retail',
    'Oral Surgeons': 'Healthcare',
    'Banks & Credit Unions': 'Professional Services',
    'Comfort Food': 'Food & Restaurants',
    'Bagels': 'Food & Restaurants',
    'Auto Glass Services': 'Automotive',
    'Chiropractors': 'Healthcare',
    'Wedding Planning': 'Events & Planning',
    'Endodontists': 'Healthcare',
    'Towing': 'Automotive',
    'Tobacco Shops': 'Retail',
    'Car Rental': 'Travel & Transportation',
    'Tanning': 'Beauty & Spas',
    'Orthodontists': 'Healthcare',
    'Ethnic Food': 'Food & Restaurants',
    'Transmission Repair': 'Automotive',
    'Antiques': 'Arts & Crafts',
    'Dive Bars': 'Nightlife',
    'Mattresses': 'Retail',
    'Hair Extensions': 'Beauty & Spas',
    'Buffets': 'Food & Restaurants',
    'Flooring': 'Home Services',
    'Cheesesteaks': 'Food & Restaurants',
    'Thrift Stores': 'Retail',
    'Tattoo': 'Beauty & Spas',
    'Health Markets': 'Health & Wellness Products',
    'Caribbean': 'Food & Restaurants',
    'Middle Eastern': 'Food & Restaurants',
    'Performing Arts': 'Arts & Crafts',
    'Sports Wear': 'Retail',
    'Real Estate Agents': 'Real Estate',
    'Pet Training': 'Pets & Animal Services',
    'Tree Services': 'Home Services',
    'Physical Therapy': 'Healthcare',
    'Water Heater Installation/Repair': 'Home Services',
    'Bridal': 'Events & Planning',
    'Hobby Shops': 'Arts & Crafts',
    'Meat Shops': 'Food & Restaurants',
    'Urgent Care': 'Healthcare',
    'Soul Food': 'Food & Restaurants',
    'Fruits & Veggies': 'Food & Restaurants',
    'Pediatric Dentists': 'Healthcare',
    'Carpet Cleaning': 'Home Services',
    'Mobile Phone Repair': 'Tech & Electronics',
    'Toy Stores': 'Retail',
    'Notaries': 'Professional Services',
    'Gastropubs': 'Nightlife',
    'Interior Design': 'Home Services',
    'Photographers': 'Events & Planning',
    'Pharmacy': 'Healthcare',
    'Electronics Repair': 'Tech & Electronics',
    'Family Practice': 'Healthcare',
    'French': 'Food & Restaurants',
    'Community Service/Non-Profit': 'Community Services',
    'Pest Control': 'Home Services',
    'Farmers Market': 'Food & Restaurants',
    'Korean': 'Food & Restaurants',
    'Golf': 'Recreation',
    'Electricians': 'Home Services',
    'Bubble Tea': 'Food & Restaurants',
    'Vape Shops': 'Retail',
    'Mobile Phone Accessories': 'Tech & Electronics',
    'Roofing': 'Home Services',
    'Diagnostic Services': 'Healthcare',
    'Photography Stores & Services': 'Retail',
    'Periodontists': 'Healthcare',
    'Bikes': 'Recreation',
    'Blow Dry/Out Services': 'Beauty & Spas',
    'Eyebrow Services': 'Beauty & Spas',
    'Dance Clubs': 'Nightlife',
    'Windows Installation': 'Home Services',
    'Painters': 'Home Services',
    'Children\'s Clothing': 'Retail',
    'Festivals': 'Events & Planning',
    'Weight Loss Centers': 'Fitness & Wellness',
    'Tapas/Small Plates': 'Food & Restaurants',
    'Nutritionists': 'Health & Wellness Products',
    'Used Car Dealers': 'Automotive',
    'Pet Boarding': 'Pets & Animal Services',
    'Acupuncture': 'Healthcare',
    'Food Stands': 'Food & Restaurants',
    'Cupcakes': 'Food & Restaurants',
    'Ophthalmologists': 'Healthcare',
    'Bookstores': 'Retail',
    'Child Care & Day Care': 'Education & Learning',
    'Museums': 'Arts & Crafts',
    'Reflexology': 'Healthcare',
    'Insurance': 'Professional Services',
    'Junk Removal & Hauling': 'Home Services',
    'Telecommunications': 'Tech & Electronics',
    'Hospitals': 'Healthcare',
    'Laser Hair Removal': 'Beauty & Spas',
    'Noodles': 'Food & Restaurants',
    'Dry Cleaning': 'Home Services',
    'Arcades': 'Entertainment',
    'Permanent Makeup': 'Beauty & Spas',
    'Office Cleaning': 'Home Services',
    'Outdoor Gear': 'Retail',
    'Piercing': 'Beauty & Spas',
    'Kids Activities': 'Recreation',
    'Cosmetic Surgeons': 'Healthcare',
    'Mortgage Brokers': 'Professional Services',
    'Masonry/Concrete': 'Home Services',
    'Lawyers': 'Professional Services',
    'Halal': 'Food & Restaurants',
    'Cinema': 'Entertainment',
    'Street Vendors': 'Food & Restaurants',
    'Session Photography': 'Events & Planning',
    'Candy Stores': 'Retail',
    'Handyman': 'Home Services',
    'Internet Service Providers': 'Tech & Electronics',
    'Damage Restoration': 'Home Services',
    'Windshield Installation & Repair': 'Automotive',
    'Jewelry Repair': 'Retail',
    'Preschools': 'Education & Learning',
    'Wineries': 'Nightlife',
    'Mailbox Centers': 'Professional Services',
    'Computers': 'Tech & Electronics',
    'Chocolatiers & Shops': 'Retail',
    'Seafood Markets': 'Food & Restaurants',
    'Boot Camps': 'Fitness & Wellness',
    'Lighting Fixtures & Equipment': 'Retail',
    'Dermatologists': 'Healthcare',
    'Spray Tanning': 'Beauty & Spas',
    'Watches': 'Retail',
    'Airport Shuttles': 'Travel & Transportation',
    'Fishing': 'Recreation & Outdoor Activities',
    'Barre Classes': 'Fitness & Wellness',
    'Home Organization': 'Home & Local Services',
    'Irish Pub': 'Restaurants & Food',
    'Mass Media': 'Professional Services',
    'Lawn Services': 'Home & Local Services',
    'Countertop Installation': 'Home & Local Services',
    'Threading Services': 'Beauty, Spas & Personal Care',
    'Pretzels': 'Restaurants & Food',
    'Gun/Rifle Ranges': 'Recreation & Outdoor Activities',
    'Drywall Installation & Repair': 'Home & Local Services',
    'Rugs': 'Retail & Shopping',
    'Cheese Shops': 'Retail & Shopping',
    'Home & Rental Insurance': 'Professional Services',
    'Animal Shelters': 'Pets & Animal Services',
    'Wine Tasting Room': 'Beverages & Specialty Foods',
    'Cardio Classes': 'Fitness & Wellness',
    'Refinishing Services': 'Home & Local Services',
    'Comic Books': 'Arts, Crafts & Culture',
    'Walk-in Clinics': 'Healthcare & Medical',
    'Life Insurance': 'Healthcare & Medical',
    'Propane': 'Automotive',
    'Personal Injury Law': 'Professional Services',
    'Do-It-Yourself Food': 'Restaurants & Food',
    'Internet Cafes': 'Tech & Electronics',
    'Vacation Rentals': 'Travel, Hotels & Transportation',
    'Vinyl Records': 'Retail & Shopping',
    'Departments of Motor Vehicles': 'Community & Public Services',
    'Dim Sum': 'Restaurants & Food',
    'Leather Goods': 'Retail & Shopping',
    'Social Clubs': 'Entertainment & Nightlife',
    'Cannabis Clinics': 'Healthcare & Medical',
    'Tax Services': 'Professional Services',
    'Home Theatre Installation': 'Tech & Electronics',
    'Costumes': 'Arts, Crafts & Culture',
    'Historical Tours': 'Events & Planning',
    'Supernatural Readings': 'Entertainment & Nightlife',
    'Team Building Activities': 'Fitness & Wellness',
    'Divorce & Family Law': 'Professional Services',
    'Public Transportation': 'Community & Public Services',
    'Educational Services': 'Education & Learning',
    'Gay Bars': 'Entertainment & Nightlife',
    'Plus Size Fashion': 'Retail & Shopping',
    'Carpet Installation': 'Home & Local Services',
    'Water Purification Services': 'Home & Local Services',
    'Gold Buyers': 'Retail & Shopping',
    'Modern European': 'Restaurants & Food',
    'Country Clubs': 'Recreation & Outdoor Activities',
    'Boxing': 'Fitness & Wellness',
    'Stadiums & Arenas': 'Entertainment & Nightlife',
    'Life Coach': 'Professional Services',
    'Boat Charters': 'Travel, Hotels & Transportation',
    'Food Court': 'Restaurants & Food',
    'Tanning Beds': 'Beauty, Spas & Personal Care',
    'Cannabis Dispensaries': 'Beverages & Specialty Foods',
    'Screen Printing/T-Shirt Printing': 'Tech & Electronics',
    'Web Design': 'Tech & Electronics',
    'Marketing': 'Professional Services',
    'Party Bus Rentals': 'Events & Planning',
    'DJs': 'Entertainment & Nightlife',
    'Rafting/Kayaking': 'Recreation & Outdoor Activities',
    'Accountants': 'Professional Services',
    'Fireplace Service': 'Home & Local Services',
    'German': 'Restaurants & Food',
    'Podiatrists': 'Healthcare & Medical',
    'Knitting Supplies': 'Retail & Shopping',
    'Surgeons': 'Healthcare & Medical',
    'Home Automation': 'Tech & Electronics',
    'Teeth Whitening': 'Beauty, Spas & Personal Care',
    'Beaches': 'Recreation & Outdoor Activities',
    'Airports': 'Travel, Hotels & Transportation',
    'Mortgage Lenders': 'Real Estate & Housing',
    'University Housing': 'Real Estate & Housing',
    'Cycling Classes': 'Fitness & Wellness',
    'Data Recovery': 'Tech & Electronics',
    'Paint Stores': 'Retail & Shopping',
    'Adult': 'Entertainment & Nightlife',
    'Airlines': 'Travel, Hotels & Transportation',
    'Decks & Railing': 'Home & Local Services',
    'Septic Services': 'Home & Local Services',
    'Patisserie/Cake Shop': 'Restaurants & Food',
    'Amateur Sports Teams': 'Recreation & Outdoor Activities',
    'Commercial Real Estate': 'Real Estate & Housing',
    'Solar Installation': 'Home & Local Services',
    'Personal Chefs': 'Restaurants & Food',
    'Pain Management': 'Healthcare & Medical',
    'African': 'Restaurants & Food',
    'Casinos': 'Entertainment & Nightlife',
    'Szechuan': 'Restaurants & Food',
    'Chimney Sweeps': 'Home & Local Services',
    'Souvenir Shops': 'Retail & Shopping',
    'Home Health Care': 'Healthcare & Medical',
    'Herbs & Spices': 'Retail & Shopping',
    'Skating Rinks': 'Recreation & Outdoor Activities',
    'Swimwear': 'Retail & Shopping',
    'New Mexican Cuisine': 'Restaurants & Food',
    'Wine Tours': 'Beverages & Specialty Foods',
    'Candle Stores': 'Retail & Shopping',
    'RV Dealers': 'Automotive',
    'Paint & Sip': 'Arts, Crafts & Culture',
    'RV Repair': 'Home & Local Services',
    'Pawn Shops': 'Retail & Shopping',
    'Filipino': 'Restaurants & Food',
    'Grout Services': 'Home & Local Services',
    'Distilleries': 'Beverages & Specialty Foods',
    'Carpenters': 'Home & Local Services',
    'Bespoke Clothing': 'Retail & Shopping',
    'Plastic Surgeons': 'Beauty, Spas & Personal Care',
    'Falafel': 'Restaurants & Food',
    'Dance Schools': 'Entertainment & Nightlife',
    'Utilities': 'Home & Local Services',
    'Donation Center': 'Community & Public Services',
    'Video Game Stores': 'Retail & Shopping',
    'Walking Tours': 'Travel, Hotels & Transportation',
    'Occupational Therapy': 'Healthcare & Medical',
    'Wildlife Control': 'Pets & Animal Services',
    'Meditation Centers': 'Fitness & Wellness',
    'Puerto Rican': 'Restaurants & Food',
    'Paddleboarding': 'Recreation & Outdoor Activities',
    'Waterproofing': 'Home & Local Services',
    'Fitness/Exercise Equipment': 'Fitness & Wellness',
    'Comedy Clubs': 'Entertainment & Nightlife',
    'Retirement Homes': 'Real Estate & Housing',
    'Shutters': 'Home & Local Services',
    'Car Buyers': 'Automotive',
    'Mini Golf': 'Recreation & Outdoor Activities',
    'Art Schools': 'Arts, Crafts & Culture',
    'Adult Education': 'Education & Learning',
    'Body Contouring': 'Beauty, Spas & Personal Care',
    'Cosmetology Schools': 'Beauty, Spas & Personal Care',
    'Cantonese': 'Restaurants & Food',
    'Emergency Rooms': 'Healthcare & Medical',
    'Battery Stores': 'Home & Local Services',
    'Insulation Installation': 'Home & Local Services',
    'Campgrounds': 'Recreation & Outdoor Activities',
    'Photo Booth Rentals': 'Events & Planning',
    'Cooking Classes': 'Education & Learning',
    'Tattoo Removal': 'Beauty, Spas & Personal Care',
    'Criminal Defense Law': 'Professional Services',
    'Advertising': 'Professional Services',
    'Kebab': 'Restaurants & Food',
    'Uniforms': 'Retail & Shopping',
    'Investing': 'Professional Services',
    'Psychiatrists': 'Healthcare & Medical',
    'Live/Raw Food': 'Restaurants & Food',
    'Customized Merchandise': 'Retail & Shopping',
    'Farms': 'Community & Public Services',
    'Spiritual Shop': 'Health & Wellness Products',
    'Tennis': 'Fitness & Wellness',
    'Motorcycle Gear': 'Retail & Shopping',
    'Video/Film Production': 'Tech & Electronics',
    'Vehicle Wraps': 'Automotive',
    'Hunting & Fishing Supplies': 'Retail & Shopping',
    'Wigs': 'Retail & Shopping',
    'RV Parks': 'Travel, Hotels & Transportation',
    'Donairs': 'Restaurants & Food',
    'Turkish': 'Restaurants & Food',
    'Whiskey Bars': 'Beverages & Specialty Foods',
    'Guitar Stores': 'Retail & Shopping',
    'Legal Services': 'Professional Services',
    'Lebanese': 'Restaurants & Food',
    'Swimming Lessons/Schools': 'Fitness & Wellness',
    'Outdoor Furniture Stores': 'Retail & Shopping',
    'Peruvian': 'Restaurants & Food',
    'Appraisal Services': 'Professional Services',
    'Employment Agencies': 'Professional Services',
    'Screen Printing': 'Tech & Electronics',
    'Indoor Playcentre': 'Recreation & Outdoor Activities',
    'Radiologists': 'Healthcare & Medical',
    'Auto Upholstery': 'Automotive',
    'Kickboxing': 'Fitness & Wellness',
    'Metal Fabricators': 'Home & Local Services',
    'Ear Nose & Throat': 'Healthcare & Medical',
    'General Litigation': 'Professional Services',
    'Boat Repair': 'Automotive',
    'Registration Services': 'Home & Local Services',
    'Brazilian': 'Restaurants & Food',
    'Middle Schools & High Schools': 'Education & Learning',
    'Taiwanese': 'Restaurants & Food',
    'Tabletop Games': 'Retail & Shopping',
    'Print Media': 'Tech & Electronics',
    'Cooking Schools': 'Education & Learning',
    'Pop-Up Restaurants': 'Restaurants & Food',
    'Smokehouse': 'Restaurants & Food',
    'British': 'Restaurants & Food',
    'Hot Pot': 'Restaurants & Food',
    'Embroidery & Crochet': 'Arts, Crafts & Culture',
    'Gymnastics': 'Fitness & Wellness',
    'Trampoline Parks': 'Recreation & Outdoor Activities',
    'Emergency Pet Hospital': 'Pets & Animal Services',
    'Videographers': 'Tech & Electronics',
    'Kosher': 'Restaurants & Food',
    'Estate Planning Law': 'Professional Services',
    'Tiki Bars': 'Entertainment & Nightlife',
    'Generator Installation/Repair': 'Home & Local Services',
    'Golf Equipment': 'Recreation & Outdoor Activities',
    'Orthotics': 'Healthcare & Medical',
    'Bus Tours': 'Travel, Hotels & Transportation',
    'Flea Markets': 'Retail & Shopping',
    'DUI Law': 'Professional Services',
    'Aquariums': 'Recreation & Outdoor Activities',
    'Pan Asian': 'Restaurants & Food',
    'Colombian': 'Restaurants & Food',
    'Ethiopian': 'Restaurants & Food',
    'Horseback Riding': 'Recreation & Outdoor Activities',
    'Saunas': 'Fitness & Wellness',
    'Allergists': 'Healthcare & Medical',
    'Mountain Biking': 'Recreation & Outdoor Activities',
    'Commercial Truck Repair': 'Automotive',
    'Patio Coverings': 'Home & Local Services',
    'TV Mounting': 'Home & Local Services',
    'Furniture Assembly': 'Home & Local Services',
    'Hydro-jetting': 'Home & Local Services',
    'Empanadas': 'Restaurants & Food',
    'Farming Equipment': 'Retail & Shopping',
    'Golf Lessons': 'Fitness & Wellness',
    'Food Tours': 'Recreation & Outdoor Activities',
    'Vocational & Technical School': 'Education & Learning',
    'Neurologist': 'Healthcare & Medical',
    'Christmas Trees': 'Retail & Shopping',
    'Guest Houses': 'Travel, Hotels & Transportation',
    'Boat Tours': 'Travel, Hotels & Transportation',
    'Security Services': 'Community & Public Services',
    'Salvadoran': 'Restaurants & Food',
    'Business Consulting': 'Professional Services',
    'Perfume': 'Retail & Shopping',
    'Driving Schools': 'Education & Learning',
    'Wholesalers': 'Retail & Shopping',
    'Botanical Gardens': 'Arts, Crafts & Culture',
    'Climbing': 'Recreation & Outdoor Activities',
    'Cultural Center': 'Arts, Crafts & Culture',
    'Hair Loss Centers': 'Beauty, Spas & Personal Care',
    'Shared Office Spaces': 'Real Estate & Housing',
    'Stucco Services': 'Home & Local Services',
    'Hats': 'Retail & Shopping',
    'Psychics': 'Health & Wellness Products',
    'Alternative Medicine': 'Health & Wellness Products',
    'Firearm Training': 'Community & Public Services',
    'Professional Sports Teams': 'Entertainment & Nightlife',
    'Auction Houses': 'Retail & Shopping',
    'Environmental Testing': 'Community & Public Services',
    'RV Rental': 'Travel, Hotels & Transportation',
    'Popcorn Shops': 'Retail & Shopping',
    'Tutoring Centers': 'Education & Learning',
    'Assisted Living Facilities': 'Real Estate & Housing',
    'Maternity Wear': 'Retail & Shopping',
    'Business Law': 'Professional Services',
    'Special Education': 'Education & Learning',
    'Axe Throwing': 'Recreation & Outdoor Activities',
    'Town Car Service': 'Travel, Hotels & Transportation',
    'Cigar Bars': 'Entertainment & Nightlife',
    'Macarons': 'Restaurants & Food',
    'Furniture Repair': 'Home & Local Services',
    'Beverage Store': 'Beverages & Specialty Foods',
    'Sleep Specialists': 'Healthcare & Medical',
    'Sunglasses': 'Retail & Shopping',
    'Zoos': 'Pets & Animal Services',
    'Auto Security': 'Automotive',
    'Home Network Installation': 'Tech & Electronics',
    'Diving': 'Recreation & Outdoor Activities',
    'Beer Tours': 'Beverages & Specialty Foods',
    'Boat Dealers': 'Travel, Hotels & Transportation',
    'Cardiologists': 'Healthcare & Medical',
    'Float Spa': 'Beauty, Spas & Personal Care',
    'Herbal Shops': 'Retail & Shopping',
    'Environmental Abatement': 'Home & Local Services',
    'Challenge Courses': 'Recreation & Outdoor Activities',
    'Bartenders': 'Entertainment & Nightlife',
    'Laser Tag': 'Entertainment & Nightlife',
    'Brazilian Jiu-jitsu': 'Fitness & Wellness',
    'Employment Law': 'Professional Services',
    'Kids Hair Salons': 'Beauty, Spas & Personal Care',
    'Moroccan': 'Restaurants & Food',
    'Demolition Services': 'Home & Local Services',
    'Hostels': 'Travel, Hotels & Transportation',
    'Estheticians': 'Beauty, Spas & Personal Care',
    'Yelp Events': 'Events & Planning',
    'Haunted Houses': 'Entertainment & Nightlife',
    'Laotian': 'Restaurants & Food',
    'Venezuelan': 'Restaurants & Food',
    'Service Stations': 'Home & Local Services',
    'IV Hydration': 'Health & Wellness Products',
    'Piano Bars': 'Entertainment & Nightlife',
    'Acne Treatment': 'Beauty, Spas & Personal Care',
    'Real Estate Law': 'Professional Services',
    'Cafeteria': 'Restaurants & Food',
    'Brewing Supplies': 'Beverages & Specialty Foods',
    'Tui Na': 'Beauty, Spas & Personal Care',
    'Piano Services': 'Home & Local Services',
    'Brasseries': 'Restaurants & Food',
    'Art Museums': 'Arts, Crafts & Culture',
    'Fertility': 'Healthcare & Medical',
    'Skilled Nursing': 'Healthcare & Medical',
    'Radio Stations': 'Entertainment & Nightlife',
    'Private Tutors': 'Education & Learning',
    'Bankruptcy Law': 'Professional Services',
    'Blood & Plasma Donation Centers': 'Healthcare & Medical',
    'Musicians': 'Entertainment & Nightlife',
    'Leisure Centers': 'Recreation & Outdoor Activities',
    'Aquarium Services': 'Pets & Animal Services',
    'Officiants': 'Events & Planning',
    'Skate Shops': 'Retail & Shopping',
    'Car Inspectors': 'Automotive',
    'Tasting Classes': 'Beverages & Specialty Foods',
    'Payroll Services': 'Professional Services',
    'Home Staging': 'Real Estate & Housing',
    'Afghan': 'Restaurants & Food',
    'Midwives': 'Healthcare & Medical',
    'Used Bookstore': 'Retail & Shopping',
    'Dominican': 'Restaurants & Food',
    'Polish': 'Restaurants & Food',
    'Dental Hygienists': 'Healthcare & Medical',
    'Pool & Billiards': 'Recreation & Outdoor Activities',
    'Trailer Repair': 'Home & Local Services',
    'Aestheticians': 'Beauty, Spas & Personal Care',
    'Kombucha': 'Beverages & Specialty Foods',
    'Newspapers & Magazines': 'Community & Public Services',
    'Buses': 'Automotive',
    'Foundation Repair': 'Home & Local Services',
    'Russian': 'Restaurants & Food',
    'Prosthodontists': 'Healthcare & Medical',
    'Firewood': 'Home & Local Services',
    'Grilling Equipment': 'Retail & Shopping',
    'Luggage': 'Retail & Shopping',
    'Holiday Decorations': 'Retail & Shopping',
    'Lighting Stores': 'Retail & Shopping',
    'Circuit Training Gyms': 'Fitness & Wellness',
    'Persian/Iranian': 'Restaurants & Food',
    'Vehicle Shipping': 'Automotive',
    'Hypnosis/Hypnotherapy': 'Health & Wellness Products',
    'Ski & Snowboard Shops': 'Recreation & Outdoor Activities',
    'Local Fish Stores': 'Retail & Shopping',
    'Gastroenterologist': 'Healthcare & Medical',
    'Hospice': 'Healthcare & Medical',
    'Day Camps': 'Recreation & Outdoor Activities',
    'Basque': 'Restaurants & Food',
    'Sugaring': 'Beauty, Spas & Personal Care',
    'CSA': 'Retail & Shopping',
    'Roof Inspectors': 'Home & Local Services',
    'Farm Equipment Repair': 'Home & Local Services',
    'Marinas': 'Travel, Hotels & Transportation',
    'Henna Artists': 'Beauty, Spas & Personal Care',
    'Check Cashing/Pay-day Loans': 'Professional Services',
    'Osteopathic Physicians': 'Healthcare & Medical',
    'Fire Protection Services': 'Home & Local Services',
    'Livestock Feed & Supply': 'Home & Local Services',
    'Mobile Dent Repair': 'Home & Local Services',
    'Trusts': 'Professional Services',
    'Excavation Services': 'Home & Local Services',
    'Wills': 'Professional Services',
    'Go Karts': 'Recreation & Outdoor Activities',
    'Teppanyaki': 'Restaurants & Food',
    'Pet Transportation': 'Pets & Animal Services',
    'Pet Hospice': 'Pets & Animal Services',
    'Pet Cremation Services': 'Pets & Animal Services',
    'Olive Oil': 'Beverages & Specialty Foods',
    'Paint-Your-Own Pottery': 'Arts, Crafts & Culture',
    'Clothing Rental': 'Retail & Shopping',
    'Estate Liquidation': 'Professional Services',
    'Travel Agents': 'Travel, Hotels & Transportation',
    'Boudoir Photography': 'Arts, Crafts & Culture',
    'Cryotherapy': 'Beauty, Spas & Personal Care',
    'Trailer Dealers': 'Automotive',
    'Archery': 'Recreation & Outdoor Activities',
    'Mongolian': 'Restaurants & Food',
    'Wedding Chapels': 'Events & Planning',
    'Arabic': 'Restaurants & Food',
    'Race Tracks': 'Recreation & Outdoor Activities',
    'Audiologist': 'Healthcare & Medical',
    'Recording & Rehearsal Studios': 'Entertainment & Nightlife',
    'Train Stations': 'Travel, Hotels & Transportation',
    'Dinner Theater': 'Entertainment & Nightlife',
    'Rock Climbing': 'Recreation & Outdoor Activities',
    'Engraving': 'Arts, Crafts & Culture',
    'Career Counseling': 'Professional Services',
    'Police Departments': 'Community & Public Services',
    'Awnings': 'Home & Local Services',
    'Shoe Shine': 'Retail & Shopping',
    'Workers Compensation Law': 'Professional Services',
    'Argentine': 'Restaurants & Food',
    'Scuba Diving': 'Recreation & Outdoor Activities',
    'Dumpster Rental': 'Home & Local Services',
    'Jet Skis': 'Recreation & Outdoor Activities',
    'Strip Clubs': 'Entertainment & Nightlife',
    'Paintball': 'Recreation & Outdoor Activities',
    'Psychologists': 'Healthcare & Medical',
    'Portuguese': 'Restaurants & Food',
    'Health Coach': 'Health & Wellness Products',
    'Bistros': 'Restaurants & Food',
    'Prenatal/Perinatal Care': 'Healthcare & Medical',
    'Test Preparation': 'Education & Learning',
    'Ethnic Grocery': 'Retail & Shopping',
    'Soccer': 'Recreation & Outdoor Activities',
    'Community Centers': 'Community & Public Services',
    'Bike tours': 'Recreation & Outdoor Activities',
    'Spine Surgeons': 'Healthcare & Medical',
    'Real Estate Photography': 'Real Estate & Housing',
    'Balloon Services': 'Events & Planning',
    'Urologists': 'Healthcare & Medical',
    'Wine Tasting Classes': 'Restaurants & Food',
    'Pop-up Shops': 'Retail & Shopping',
    'Television Stations': 'Entertainment & Nightlife',
    'Doulas': 'Beauty, Spas & Personal Care',
    'Gemstones & Minerals': 'Retail & Shopping',
    'Knife Sharpening': 'Home & Local Services',
    'Commercial Truck Dealers': 'Automotive',
    'Speakeasies': 'Entertainment & Nightlife',
    'Surfing': 'Fitness & Wellness',
    'Delicatessen': 'Restaurants & Food',
    'Self-defense Classes': 'Fitness & Wellness',
    'Malaysian': 'Restaurants & Food',
    'Lakes': 'Recreation & Outdoor Activities',
    'Personal Assistants': 'Home & Local Services',
    'Cremation Services': 'Healthcare & Medical',
    'Ticket Sales': 'Events & Planning',
    'Mobility Equipment Sales & Services': 'Home & Local Services',
    'Water Delivery': 'Home & Local Services',
    'Pumpkin Patches': 'Recreation & Outdoor Activities',
    'Pole Dancing Classes': 'Fitness & Wellness',
    'Skate Parks': 'Recreation & Outdoor Activities',
    'Trailer Rental': 'Home & Local Services',
    'Horse Boarding': 'Home & Local Services',
    'Eatertainment': 'Restaurants & Food',
    'Psychic Mediums': 'Entertainment & Nightlife',
    'Kitchen Supplies': 'Retail & Shopping',
    'Airport Lounges': 'Travel, Hotels & Transportation',
    'Bookkeepers': 'Professional Services',
    'Themed Cafes': 'Restaurants & Food',
    'Lactation Services': 'Healthcare & Medical',
    'Fondue': 'Restaurants & Food',
    'Condominiums': 'Real Estate & Housing',
    'Champagne Bars': 'Restaurants & Food',
    'Gunsmith': 'Automotive',
    'High Fidelity Audio Equipment': 'Tech & Electronics',
    'Hearing Aid Providers': 'Healthcare & Medical',
    'CPR Classes': 'Healthcare & Medical',
    'Medical Transportation': 'Healthcare & Medical',
    'Surf Shop': 'Retail & Shopping',
    'Cabaret': 'Entertainment & Nightlife',
    'Magicians': 'Entertainment & Nightlife',
    'Pick Your Own Farms': 'Recreation & Outdoor Activities',
    'Poutineries': 'Restaurants & Food',
    'Bar Crawl': 'Events & Planning',
    'Machine & Tool Rental': 'Home & Local Services',
    'Restaurant Supplies': 'Retail & Shopping',
    'Cideries': 'Beverages & Specialty Foods',
    'Endocrinologists': 'Healthcare & Medical',
    'Virtual Reality Centers': 'Tech & Electronics',
    'Batting Cages': 'Recreation & Outdoor Activities',
    'Health Retreats': 'Health & Wellness Products',
    'Attraction Farms': 'Recreation & Outdoor Activities',
    'Medical Cannabis Referrals': 'Healthcare & Medical',
    'Clock Repair': 'Home & Local Services',
    'Shredding Services': 'Home & Local Services',
    'Architects': 'Professional Services',
    'Artificial Turf': 'Home & Local Services',
    'Face Painting': 'Arts, Crafts & Culture',
    'Honduran': 'Restaurants & Food',
    'Karate': 'Fitness & Wellness',
    'Water Parks': 'Recreation & Outdoor Activities',
    'Reproductive Health Services': 'Healthcare & Medical',
    'Belgian': 'Restaurants & Food',
    'Medical Law': 'Professional Services',
    'Religious Items': 'Retail & Shopping',
    'Indonesian': 'Restaurants & Food',
    'Halotherapy': 'Health & Wellness Products',
    'Personal Care Services': 'Beauty, Spas & Personal Care',
    'Public Markets': 'Community & Public Services',
    'Trophy Shops': 'Retail & Shopping',
    'Childbirth Education': 'Education & Learning',
    'Bird Shops': 'Pets & Animal Services',
    'Outdoor Power Equipment Services': 'Home & Local Services',
    'Addiction Medicine': 'Healthcare & Medical',
    'Races & Competitions': 'Recreation & Outdoor Activities',
    'Shaved Snow': 'Restaurants & Food',
    'Nurse Practitioner': 'Healthcare & Medical',
    'Disc Golf': 'Recreation & Outdoor Activities',
    'Oncologist': 'Healthcare & Medical',
    'Structural Engineers': 'Professional Services',
    'Pet Photography': 'Pets & Animal Services',
    'Basketball Courts': 'Recreation & Outdoor Activities',
    'Himalayan/Nepalese': 'Restaurants & Food',
    'House Sitters': 'Home & Local Services',
    'Dietitians': 'Education & Learning',
    'Memory Care': 'Healthcare & Medical',
    'Children\'s Museums': 'Education & Learning',
    'Muay Thai': 'Fitness & Wellness',
    'Pulmonologist': 'Healthcare & Medical',
    'Colonics': 'Healthcare & Medical',
    'Haitian': 'Restaurants & Food',
    'Public Art': 'Arts, Crafts & Culture',
    'Nanny Services': 'Community & Public Services',
    'Airport Terminals': 'Travel, Hotels & Transportation',
    'Massage Schools': 'Beauty, Spas & Personal Care',
    'Tai Chi': 'Fitness & Wellness',
    'Well Drilling': 'Home & Local Services',
    'Health Insurance Offices': 'Healthcare & Medical',
    'Speech Therapists': 'Healthcare & Medical',
    'Title Loans': 'Professional Services',
    'Immigration Law': 'Professional Services',
    'Car Share Services': 'Travel, Hotels & Transportation',
    'Aerial Fitness': 'Fitness & Wellness',
    'Music Production Services': 'Entertainment & Nightlife',
    'Motorsport Vehicle Repairs': 'Automotive',
    'Fingerprinting': 'Professional Services',
    'Courthouses': 'Community & Public Services',
    'Powder Coating': 'Automotive',
    'Cambodian': 'Restaurants & Food',
    'Party Bike Rentals': 'Events & Planning',
    'Valet Services': 'Automotive',
    'Art Tours': 'Entertainment & Nightlife',
    'Ukrainian': 'Restaurants & Food',
    'Flight Instruction': 'Travel, Hotels & Transportation',
    'Burmese': 'Restaurants & Food',
    'Safe Stores': 'Retail & Shopping',
    'Honey': 'Beverages & Specialty Foods',
    'Motorcycle Rental': 'Automotive',
    'Skydiving': 'Recreation & Outdoor Activities',
    'Wallpapering': 'Home & Local Services',
    'Talent Agencies': 'Professional Services',
    'Vacation Rental Agents': 'Travel, Hotels & Transportation',
    'Concierge Medicine': 'Healthcare & Medical',
    'Trinidadian': 'Restaurants & Food',
    'Shanghainese': 'Restaurants & Food',
    'Sicilian': 'Restaurants & Food',
    'Lice Services': 'Healthcare & Medical',
    'Holistic Animal Care': 'Pets & Animal Services',
    'Passport & Visa Services': 'Community & Public Services',
    'Ultrasound Imaging Centers': 'Healthcare & Medical',
    'Dive Shops': 'Recreation & Outdoor Activities',
    'Egyptian': 'Restaurants & Food',
    'Hydrotherapy': 'Health & Wellness Products',
    'Architectural Tours': 'Arts, Crafts & Culture',
    'Izakaya': 'Restaurants & Food',
    'Crane Services': 'Home & Local Services',
    'Emergency Medicine': 'Healthcare & Medical',
    'Armenian': 'Restaurants & Food',
    'Holiday Decorating Services': 'Home & Local Services',
    'Pet Breeders': 'Pets & Animal Services',
    'Parenting Classes': 'Education & Learning',
    'Religious Schools': 'Education & Learning',
    'Bounce House Rentals': 'Events & Planning',
    'Installment Loans': 'Professional Services',
}

In [4]:
# Function to map categories to industries
def map_to_industry(categories):
    if categories is None:  # Check for None type
        return 'Other'  # Return 'Other' if categories is None
    
    for category in categories:
        if category in category_to_industry:
            return category_to_industry[category]
    return 'Other'  # Default if no mapping is found

# Apply the mapping function to create a new 'industry' column
business_df['industry'] = business_df['categories'].apply(map_to_industry)

# Display the updated DataFrame with 'industry' column
print(business_df[['name', 'categories', 'industry']].head())

                       name  \
0  Abby Rappoport, LAC, CMQ   
1             The UPS Store   
2                    Target   
3        St Honore Pastries   
4  Perkiomen Valley Brewery   

                                          categories               industry  
0  [Doctors, Traditional Chinese Medicine, Naturo...             Healthcare  
1  [Shipping Centers, Local Services, Notaries, M...  Professional Services  
2  [Department Stores, Shopping, Fashion, Home & ...                 Retail  
3  [Restaurants, Food, Bubble Tea, Coffee & Tea, ...     Food & Restaurants  
4                        [Brewpubs, Breweries, Food]              Nightlife  


In [5]:
# Save the updated DataFrame to a CSV file
business_df.to_json('yelp_business_with_industries.json', index=False)

In [6]:
from IPython.display import FileLink

# Save the cleaned DataFrame to a JSON file in the current working directory
business_df.to_json('yelp_business_with_industries_V2.json', orient='records', lines=True)

# Create a download link for the JSON file
FileLink('yelp_business_with_industries_V2.json')

In [6]:
industry_count = business_df['industry'].value_counts()
print(industry_count.index)

Index(['Food & Restaurants', 'Retail', 'Home Services', 'Beauty & Spas',
       'Automobile', 'Healthcare', 'Nightlife', 'Health & Fitness',
       'Event Planning & Services', 'Pet Services', 'Professional Services',
       'Travel & Hospitality', 'Entertainment & Culture', 'Real Estate',
       'Restaurants & Food', 'Arts & Crafts', 'Automotive',
       'Travel & Transportation', 'Community Services', 'Events & Planning',
       'Home & Local Services', 'Retail & Shopping', 'Recreation',
       'Recreation & Outdoor Activities', 'Tech & Electronics', 'Education',
       'Healthcare & Medical', 'Fitness & Wellness',
       'Entertainment & Nightlife', 'Education & Learning',
       'Pets & Animal Services', 'Travel, Hotels & Transportation', 'Other',
       'Health & Wellness Products', 'Community & Public Services',
       'Beauty, Spas & Personal Care', 'Entertainment',
       'Beverages & Specialty Foods', 'Arts, Crafts & Culture',
       'Real Estate & Housing'],
      dtype='obje

In [None]:
Industry_Dataset = df

In [7]:
business_df.loc[business_df['industry'] == 'Retail', 'industry'] = 'Retail & Shopping'

business_df.loc[business_df['industry'] == 'Retail & Shopping', 'industry'] = 'Retail & Shopping'

business_df.loc[business_df['industry'] == 'Home Services', 'industry'] = 'Home & Local Services'

business_df.loc[business_df['industry'] == 'Beauty & Spas', 'industry'] = 'Beauty, Spas & Personal Care'

business_df.loc[business_df['industry'] == 'Healthcare', 'industry'] = 'Healthcare & Medical'

business_df.loc[business_df['industry'] == 'Entertainment & Nightlife', 'industry'] = 'Nightlife'

business_df.loc[business_df['industry'] == 'Health & Fitness', 'industry'] = 'Fitness & Wellness'

business_df.loc[business_df['industry'] == 'Health & Wellness Products', 'industry'] = 'Fitness & Wellness'

business_df.loc[business_df['industry'] == 'Entertainment', 'industry'] = 'Entertainment & Culture'

business_df.loc[business_df['industry'] == 'Real Estate', 'industry'] = 'Real Estate & Housing'

business_df.loc[business_df['industry'] == 'Travel & Transportation', 'industry'] = 'Travel, Hotels & Transportation'

business_df.loc[business_df['industry'] == 'Community Services', 'industry'] = 'Community & Public Services'

business_df.loc[business_df['industry'] == 'Recreation', 'industry'] = 'Recreation & Outdoor Activities'

business_df.loc[business_df['industry'] == 'Education', 'industry'] = 'Education & Learning'

business_df.loc[business_df['industry'] == 'Arts & Crafts', 'industry'] = 'Arts, Crafts & Culture'

business_df.loc[business_df['industry'] == 'Food & Restaurants', 'industry'] = 'Restaurants & Food'

business_df.loc[business_df['industry'] == 'Pet Services', 'industry'] = 'Pets & Animal Services'

business_df.loc[business_df['industry'] == 'Event Planning & Services', 'industry'] = 'Events & Planning'

business_df.loc[business_df['industry'] == 'Automotive', 'industry'] = 'Automobile'


In [8]:
industry_count = business_df['industry'].value_counts()
print(industry_count.index)

Index(['Restaurants & Food', 'Retail & Shopping', 'Home & Local Services',
       'Beauty, Spas & Personal Care', 'Automobile', 'Healthcare & Medical',
       'Nightlife', 'Fitness & Wellness', 'Events & Planning',
       'Pets & Animal Services', 'Professional Services',
       'Travel & Hospitality', 'Entertainment & Culture',
       'Real Estate & Housing', 'Recreation & Outdoor Activities',
       'Travel, Hotels & Transportation', 'Arts, Crafts & Culture',
       'Education & Learning', 'Community & Public Services',
       'Tech & Electronics', 'Other', 'Beverages & Specialty Foods'],
      dtype='object', name='industry')


In [30]:
from IPython.display import FileLink

# Save the cleaned DataFrame to a JSON file in the current working directory
business_df.to_json('yelp_business_with_industries_V3.json', orient='records', lines=True)

# Create a download link for the JSON file
FileLink('yelp_business_with_industries_V3.json')

In [None]:
business_df = pd.read_json('yelp_business_with_industries_V3.json', lines=True)
review_df = pd.read_json('yelp_academic_dataset_review.json', lines=True)


merged_df = pd.merge(review_df, business_df, on='business_id', how='inner')


print(merged_df.head())

In [None]:
import os

print(os.path.exists('yelp_business_with_industries_and_reviews.json'))

In [None]:
from IPython.display import FileLink

# Save the cleaned DataFrame to a JSON file in the current working directory
merged_df.to_json('yelp_business_with_industries_and_reviews.json', orient='records', lines=True)
print("Hello")
# Create a download link for the JSON file
FileLink('yelp_business_with_industries_and_reviews.json')

In [9]:
import pandas as pd

# Assuming df is your business dataframe
Yelp_Industry_Dataset = business_df[['industry']].drop_duplicates().reset_index(drop=True)
Yelp_Industry_Dataset['industry_id'] = Yelp_Industry_Dataset.index + 1  # create surrogate key

dim_industry = Yelp_Industry_Dataset[['industry_id', 'industry']]
Yelp_Industry_Dataset = Yelp_Industry_Dataset[['industry_id', 'industry']]

In [10]:
# Create a column to help with sorting
Yelp_Industry_Dataset['is_other'] = Yelp_Industry_Dataset['industry'] == 'Other'

# Sort so 'Other' comes last (False first, True last)
Yelp_Industry_Dataset = Yelp_Industry_Dataset.sort_values(by='is_other').reset_index(drop=True)

# Drop the helper column
Yelp_Industry_Dataset.drop(columns='is_other', inplace=True)

# Reassign industry_id
Yelp_Industry_Dataset['industry_id'] = Yelp_Industry_Dataset.index + 1

# Reorder columns
Yelp_Industry_Dataset = Yelp_Industry_Dataset[['industry_id', 'industry']]


In [11]:
Yelp_Industry_Dataset.head(22)

Unnamed: 0,industry_id,industry
0,1,Healthcare & Medical
1,2,"Travel, Hotels & Transportation"
2,3,Education & Learning
3,4,Community & Public Services
4,5,Home & Local Services
5,6,Travel & Hospitality
6,7,Real Estate & Housing
7,8,Fitness & Wellness
8,9,Entertainment & Culture
9,10,"Beauty, Spas & Personal Care"


In [12]:
df_exploded = business_df.explode('categories')

In [13]:
df_exploded['mapped_industry'] = df_exploded['categories'].map(category_to_industry)
df_exploded['mapped_industry'] = df_exploded['mapped_industry'].fillna('Other')

In [14]:
# Group categories by mapped industry
industry_categories = df_exploded.groupby('mapped_industry')['categories'] \
    .unique().reset_index()

# Join categories into a comma-separated string, filtering out None values
industry_categories['categories'] = industry_categories['categories'].apply(
    lambda x: ', '.join(sorted(set([cat for cat in x if cat is not None])))
)

# Rename to match your industry dimension
industry_categories.rename(columns={'mapped_industry': 'industry'}, inplace=True)

In [15]:
Yelp_Industry_Dataset = Yelp_Industry_Dataset.merge(industry_categories, on='industry', how='left')

In [16]:
Yelp_Industry_Dataset.head(22)

Unnamed: 0,industry_id,industry,categories
0,1,Healthcare & Medical,"Addiction Medicine, Allergists, Audiologist, B..."
1,2,"Travel, Hotels & Transportation","Airlines, Airport Lounges, Airport Terminals, ..."
2,3,Education & Learning,"Adult Education, Child Care & Day Care, Childb..."
3,4,Community & Public Services,"Community Centers, Courthouses, Departments of..."
4,5,Home & Local Services,"Artificial Turf, Awnings, Battery Stores, Carp..."
5,6,Travel & Hospitality,"Hotels, Hotels & Travel"
6,7,Real Estate & Housing,"Assisted Living Facilities, Commercial Real Es..."
7,8,Fitness & Wellness,"Aerial Fitness, Barre Classes, Boot Camps, Box..."
8,9,Entertainment & Culture,"Arts & Crafts, Arts & Entertainment, Local Fla..."
9,10,"Beauty, Spas & Personal Care","Acne Treatment, Aestheticians, Body Contouring..."


In [17]:
from IPython.display import FileLink

# Save the cleaned DataFrame to a JSON file in the current working directory
Yelp_Industry_Dataset.to_csv('Yelp_Industry_Dataset.csv')

# Create a download link for the JSON file
FileLink('Yelp_Industry_Dataset.csv')