In [None]:
import typing
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrapeDataFromSpreadsheet() -> typing.List[typing.List[str]]:
    html = requests.get(
        'https://docs.google.com/spreadsheets/d/1S14i0IK-bwM_17vhphjfUJEJq-4TMtxMLrlGQ_8JcC8/gviz/tq?tqx=out:html&tq&gid=1'
    ).text
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find_all('table')[0]
    rows = [
        [td.text.strip() for td in row.find_all("td")]
        for row in table.find_all('tr')
    ]
    return rows

def rows_to_dataframe(rows: typing.List[typing.List[str]]) -> pd.DataFrame:
    if not rows:
        return pd.DataFrame()  
    
    header = rows[0]
    data = rows[1:]
    
    df = pd.DataFrame(data, columns=header)
    return df

if __name__ == "__main__":
    rows = scrapeDataFromSpreadsheet()
    df = rows_to_dataframe(rows)
    print(df.head())


                                                                     
0                             Disorder         Disease          Breed
1              Inherited eye disorders       Cataracts  Affenpinscher
2  Inherited musculoskeletal disorders   Hip dysplasia  Affenpinscher
3        Inherited endocrine disorders  Hypothyroidism   Afghan hound
4              Inherited eye disorders       Cataracts   Afghan hound


In [2]:
headers = df.iloc[0]
new_df  = pd.DataFrame(df.values[1:], columns=headers)
new_df['Breed'] = new_df['Breed'].str.lower()
new_df

Unnamed: 0,Disorder,Disease,Breed
0,Inherited eye disorders,Cataracts,affenpinscher
1,Inherited musculoskeletal disorders,Hip dysplasia,affenpinscher
2,Inherited endocrine disorders,Hypothyroidism,afghan hound
3,Inherited eye disorders,Cataracts,afghan hound
4,Inherited eye disorders,Retinal dysplasia,afghan hound
...,...,...,...
1644,Inherited nervous system disorders,Shaker dog syndrome,yorkshire terrier
1645,Inherited respiratory disorders,tracheal collapse,yorkshire terrier
1646,Inherited skin disorders,colour dilution alopecia,yorkshire terrier
1647,Inherited skin disorders,congenital hypotrichosis,yorkshire terrier


In [3]:
breeds = pd.read_csv('akc-data-latest.csv')
breeds['Breed'] = breeds['Breed'].str.lower()
breeds

Unnamed: 0,Breed,description,temperament,popularity,min_height,max_height,min_weight,max_weight,min_expectancy,max_expectancy,...,grooming_frequency_value,grooming_frequency_category,shedding_value,shedding_category,energy_level_value,energy_level_category,trainability_value,trainability_category,demeanor_value,demeanor_category
0,affenpinscher,The Affen’s apish look has been described many...,"Confident, Famously Funny, Fearless",148,22.86,29.21,3.175147,4.535924,12.0,15.0,...,0.6,2-3 Times a Week Brushing,0.6,Seasonal,0.6,Regular Exercise,0.8,Easy Training,1.0,Outgoing
1,afghan hound,"The Afghan Hound is an ancient breed, his whol...","Dignified, Profoundly Loyal, Aristocratic",113,63.50,68.58,22.679619,27.215542,12.0,15.0,...,0.8,Daily Brushing,0.2,Infrequent,0.8,Energetic,0.2,May be Stubborn,0.2,Aloof/Wary
2,airedale terrier,The Airedale Terrier is the largest of all ter...,"Friendly, Clever, Courageous",60,58.42,58.42,22.679619,31.751466,11.0,14.0,...,0.6,2-3 Times a Week Brushing,0.4,Occasional,0.6,Regular Exercise,1.0,Eager to Please,0.8,Friendly
3,akita,"Akitas are burly, heavy-boned spitz-type dogs ...","Courageous, Dignified, Profoundly Loyal",47,60.96,71.12,31.751466,58.967008,10.0,13.0,...,0.8,Daily Brushing,0.6,Seasonal,0.8,Energetic,1.0,Eager to Please,0.6,Alert/Responsive
4,alaskan malamute,The Alaskan Malamute stands 23 to 25 inches at...,"Affectionate, Loyal, Playful",58,58.42,63.50,34.019428,38.555351,10.0,14.0,...,0.6,2-3 Times a Week Brushing,0.6,Seasonal,0.8,Energetic,0.4,Independent,0.8,Friendly
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,wirehaired vizsla,WVs are close relatives of Vizslas but a disti...,"Gentle, Loyal, Trainable",167,54.61,63.50,20.411657,29.483504,12.0,14.0,...,0.2,Occasional Bath/Brush,0.6,Seasonal,0.8,Energetic,0.6,Agreeable,0.6,Alert/Responsive
273,working kelpie,The overall appearance of the Working Kelpie i...,"Alert, Eager, Intelligent",,48.26,63.50,12.700586,27.215542,12.0,15.0,...,0.2,Occasional Bath/Brush,0.6,Seasonal,0.8,Energetic,0.4,Independent,0.6,Alert/Responsive
274,xoloitzcuintli,The Xoloitzcuintli (show-low-eats-queen-tlee) ...,"Loyal, Alert, Calm",140,25.40,58.42,4.535924,24.947580,13.0,18.0,...,0.2,Occasional Bath/Brush,0.2,Infrequent,0.8,Energetic,0.6,Agreeable,0.6,Alert/Responsive
275,yakutian laika,For centuries the Yakutian Laika was an irrepl...,"Affectionate, Intelligent, Active",,53.34,58.42,18.143695,24.947580,10.0,12.0,...,0.4,Weekly Brushing,0.6,Seasonal,0.8,Energetic,0.2,May be Stubborn,0.4,Reserved with Strangers


In [4]:
merged_df = pd.merge(new_df, breeds, on='Breed', how='inner')
merged_df

Unnamed: 0,Disorder,Disease,Breed,description,temperament,popularity,min_height,max_height,min_weight,max_weight,...,grooming_frequency_value,grooming_frequency_category,shedding_value,shedding_category,energy_level_value,energy_level_category,trainability_value,trainability_category,demeanor_value,demeanor_category
0,Inherited eye disorders,Cataracts,affenpinscher,The Affen’s apish look has been described many...,"Confident, Famously Funny, Fearless",148,22.86,29.21,3.175147,4.535924,...,0.6,2-3 Times a Week Brushing,0.6,Seasonal,0.6,Regular Exercise,0.8,Easy Training,1.0,Outgoing
1,Inherited musculoskeletal disorders,Hip dysplasia,affenpinscher,The Affen’s apish look has been described many...,"Confident, Famously Funny, Fearless",148,22.86,29.21,3.175147,4.535924,...,0.6,2-3 Times a Week Brushing,0.6,Seasonal,0.6,Regular Exercise,0.8,Easy Training,1.0,Outgoing
2,Inherited endocrine disorders,Hypothyroidism,afghan hound,"The Afghan Hound is an ancient breed, his whol...","Dignified, Profoundly Loyal, Aristocratic",113,63.50,68.58,22.679619,27.215542,...,0.8,Daily Brushing,0.2,Infrequent,0.8,Energetic,0.2,May be Stubborn,0.2,Aloof/Wary
3,Inherited eye disorders,Cataracts,afghan hound,"The Afghan Hound is an ancient breed, his whol...","Dignified, Profoundly Loyal, Aristocratic",113,63.50,68.58,22.679619,27.215542,...,0.8,Daily Brushing,0.2,Infrequent,0.8,Energetic,0.2,May be Stubborn,0.2,Aloof/Wary
4,Inherited eye disorders,Retinal dysplasia,afghan hound,"The Afghan Hound is an ancient breed, his whol...","Dignified, Profoundly Loyal, Aristocratic",113,63.50,68.58,22.679619,27.215542,...,0.8,Daily Brushing,0.2,Infrequent,0.8,Energetic,0.2,May be Stubborn,0.2,Aloof/Wary
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1242,Inherited nervous system disorders,Shaker dog syndrome,yorkshire terrier,"The Yorkshire Terrier is a compact, toy-size t...","Affectionate, Sprightly, Tomboyish",10,17.78,20.32,3.175147,3.175147,...,1.0,Specialty/Professional,0.2,Infrequent,0.6,Regular Exercise,0.2,May be Stubborn,0.8,Friendly
1243,Inherited respiratory disorders,tracheal collapse,yorkshire terrier,"The Yorkshire Terrier is a compact, toy-size t...","Affectionate, Sprightly, Tomboyish",10,17.78,20.32,3.175147,3.175147,...,1.0,Specialty/Professional,0.2,Infrequent,0.6,Regular Exercise,0.2,May be Stubborn,0.8,Friendly
1244,Inherited skin disorders,colour dilution alopecia,yorkshire terrier,"The Yorkshire Terrier is a compact, toy-size t...","Affectionate, Sprightly, Tomboyish",10,17.78,20.32,3.175147,3.175147,...,1.0,Specialty/Professional,0.2,Infrequent,0.6,Regular Exercise,0.2,May be Stubborn,0.8,Friendly
1245,Inherited skin disorders,congenital hypotrichosis,yorkshire terrier,"The Yorkshire Terrier is a compact, toy-size t...","Affectionate, Sprightly, Tomboyish",10,17.78,20.32,3.175147,3.175147,...,1.0,Specialty/Professional,0.2,Infrequent,0.6,Regular Exercise,0.2,May be Stubborn,0.8,Friendly


In [110]:
merged_df.to_csv("Disease.csv")

Scraping [from hillspet.com ](https://www.hillspet.com/dog-food)

In [None]:
import time
import random
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

CHROMEDRIVER_PATH = "C:/Users/bakht/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe" 

url = 'https://www.hillspet.com/dog-food'

chrome_options = Options()
chrome_options.add_argument('--headless')  
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--window-size=1920,1080')

driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH), options=chrome_options)

data = []

def scrape_page(url):
    driver.get(url)
    time.sleep(5)  

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    product_blocks = soup.find_all('div', class_='grid-item-product')

    for block in product_blocks:
        # Title
        title_elem = block.find('h3', class_='product-title')
        title_text = title_elem.get_text(strip=True) if title_elem else 'N/A'

        # Description
        desc_elem = block.find('div', class_='product-description')
        description_text = desc_elem.get_text(strip=True) if desc_elem else 'N/A'

        # Rating
        rating_block = block.find('div', class_='bv_text')
        rating_text = rating_block.get_text(strip=True) if rating_block else 'N/A'

        # Link
        footer = block.find('div', class_='product-footer')
        link_tag = footer.find('a') if footer else None
        link = link_tag['href'] if link_tag else 'No link found'
        if not link.startswith('http'):
            link = 'https://www.hillspet.com' + link

        data.append({
            'Title': title_text,
            'Description': description_text,
            'Rating': rating_text,
            'Link': link
        })

scrape_page(url)

time.sleep(random.uniform(2, 5))

driver.quit()

df_links = pd.DataFrame(data)
df_links
df_links.to_csv("links_selenium.csv", index=False)


In [6]:
df_links

Unnamed: 0,Title,Description,Rating,Link
0,Hill's Science Diet Perfect Digestion Small Bi...,Science Diet's breakthrough nutrition supports...,0.0,https://www.hillspet.com/dog-food/sd-canine-ad...
1,"Hill's Science Diet Adult Small Bites No Corn,...","Supports healthy immune system, digestion, lea...",4.0,https://www.hillspet.com/dog-food/sd-canine-ad...
2,Hill's Science Diet Adult 7+ Senior Vitality S...,Improves Everyday Ability to Get Up & Go,4.5,https://www.hillspet.com/dog-food/sd-canine-ad...
3,Hill's Prescription Diet z/d Low Fat Hydrolyze...,Nutrition specially formulated with hydrolyzed...,0.0,https://www.hillspet.com/dog-food/pd-canine-zd...
4,Hill's Prescription Diet c/d Multicare Chicken...,Support for your dog’s urinary health,4.4,https://www.hillspet.com/dog-food/pd-cd-multic...
...,...,...,...,...
192,Hill's Science Diet Adult Small & Mini Savory ...,A delicious complement to the nutrition of Sci...,4.9,https://www.hillspet.com/dog-food/sd-adult-sma...
193,Hill's Grain Free Crunchy Naturals with Chick...,,5.0,https://www.hillspet.com/dog-food/grain-free-c...
194,Hill's Science Diet Adult Perfect Digestion Ch...,Science Diet's breakthrough nutrition supports...,4.6,https://www.hillspet.com/dog-food/sd-canine-ad...
195,Hill's Science Diet Puppy Small & Mini Savory ...,A delicious complement to the nutrition of Sci...,4.0,https://www.hillspet.com/dog-food/sd-canine-pu...


First part

In [None]:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

all_data = []

for i in df_links['Link']:
    try:
        response = requests.get(i)
        response.raise_for_status()  
        soup = BeautifulSoup(response.content, 'html.parser')

        product_title_elem = soup.find(class_='product-detail-title')
        product_title = product_title_elem.get_text(strip=True) if product_title_elem else 'N/A'

        product_description_elem = soup.find(class_='product-detail-description')
        product_description = product_description_elem.get_text(strip=True) if product_description_elem else 'N/A'

        # Extract list items
        list_items = soup.find_all(class_='list-item')
        list_data = {}
        for item in list_items:
            title_elem = item.find(class_='list-title')
            definition_elem = item.find(class_='list-definition')
            if title_elem and definition_elem:
                list_data[title_elem.get_text(strip=True)] = definition_elem.get_text(strip=True)

        # Extract text segments
        text_segments = soup.find_all(class_='text-segments no-icon false')
        segment_data = {}
        for segment in text_segments:
            bold_text_elem = segment.find(class_='segment bold')
            none_text_elem = segment.find(class_='segment none')
            bold_text = bold_text_elem.get_text(strip=True) if bold_text_elem else ''
            none_text = none_text_elem.get_text(strip=True) if none_text_elem else ''
            segment_data[bold_text] = none_text

        # Extract accordion titles and segment none
        accordion_titles = soup.find_all(class_='cmp-accordion__title')
        accordion_data = {}
        table_data = {}
        for accordion in accordion_titles:
            accordion_title = accordion.get_text(strip=True)
            segment_none_elem = accordion.find_next(class_='segment none')
            segment_none = segment_none_elem.get_text(strip=True) if segment_none_elem else ''
            accordion_data[accordion_title] = segment_none

            # Extract table data under "Average Nutrient & Caloric Content"
            if 'Average Nutrient & Caloric Content' in accordion_title:
                table = accordion.find_next('table')
                if table:
                    rows = table.find_all('tr')
                    for row in rows:
                        columns = row.find_all('td')
                        if len(columns) == 2:
                            column_name = columns[0].get_text(strip=True)
                            column_value = columns[1].get_text(strip=True)
                            table_data[column_name] = column_value

        data = {
            'URL': i,
            'Product Title': product_title,
            'Product Description': product_description,
            **list_data,
            **segment_data,
            **accordion_data,
            **table_data
        }

        all_data.append(data)

    except requests.exceptions.RequestException as e:
        print(f'Request failed for URL {i}: {e}')
    except AttributeError as e:
        print(f'AttributeError for URL {i}: {e}')
    except Exception as e:
        print(f'An error occurred for URL {i}: {e}')

    time.sleep(random.uniform(2, 5))  

df = pd.DataFrame(all_data)

def scrape_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status() 
        soup = BeautifulSoup(response.content, 'html.parser')
    except requests.exceptions.RequestException as e:
        print(f'Request failed for URL {url}: {e}')
    except Exception as e:
        print(f'An error occurred for URL {url}: {e}')



In [8]:
df

Unnamed: 0,URL,Product Title,Product Description,Food Form,Flavor,Sizes,Unnamed: 7,Caloric Content,Ingredients,Feeding Tips,...,Glutamine + Glutamate,It looks like my pet’s food is currently out of stock. When will it be available again?,How can I know when my pet’s food will be back in stock?,How can I tell if my pet’s food formulation changed?,How much should I feed my pet?,Iron,Copper,Zinc,Vitamin K,Alpha-Linolenic Acid (ALA)
0,https://www.hillspet.com/dog-food/sd-canine-ad...,"Perfect Digestion Small Bites Chicken, Brown R...",Science Diet's breakthrough nutrition supports...,Dry Food,"Chicken, Brown Rice & Whole Oats Recipe",3.5 lb,,3620 kcal/kg 359 kcal/cup†,"Chicken, Cracked Pearled Barley, Brown Rice, B...",HELPFUL TIPSAdjust feeding amounts as necessar...,...,,,,,,,,,,
1,https://www.hillspet.com/dog-food/sd-canine-ad...,"Adult Small Bites No Corn, Wheat, Soy Chicken ...","Supports healthy immune system, digestion, lea...",dry,Chicken & Brown Rice Recipe,15 lb,,3713 kcal/kg 389 kcal/cup†,"Chicken, Brown Rice, Brewers Rice, Cracked Pea...",HELPFUL TIPSAdjust feeding amounts as necessar...,...,,,,,,,,,,
2,https://www.hillspet.com/dog-food/sd-canine-ad...,Adult 7+ Senior Vitality Small & Mini Chicken ...,Improves Everyday Ability to Get Up & Go,Dry Food,Chicken Meal & Rice Recipe,"3.5 lb, 12.5 lb",,3651 kcal/kg 342 kcal/cup†,"Chicken, Brewers Rice, Yellow Peas, Cracked Pe...",HELPFUL TIPSAdjust feeding amounts as necessar...,...,,,,,,,,,,
3,https://www.hillspet.com/dog-food/pd-canine-zd...,z/d Low Fat Hydrolyzed Soy Recipe Wet Dog Food,Nutrition specially formulated with hydrolyzed...,Wet Food,Hydrolyzed Soy Recipe,13 oz,,,"Water, corn starch, hydrolyzed soy protein, so...",Ask your veterinarian for specific feeding ins...,...,,,,,,,,,,
4,https://www.hillspet.com/dog-food/pd-cd-multic...,c/d Multicare Chicken & Vegetable Stew Dog Food,Support for your dog’s urinary health,stew,Chicken & Vegetable Stew,"12.5 oz, 5.5 oz",,266 kcal / 12.5 oz (354 g) can\n117 kcal / 5.5...,"Water, Chicken, Pork Liver, Carrots, Rice, Gre...",Ask your veterinarian for specific feeding ins...,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,https://www.hillspet.com/dog-food/sd-adult-sma...,Adult Small & Mini Savory Stew with Beef & Veg...,A delicious complement to the nutrition of Sci...,Tray,Savory Stew with Beef & Vegetables,3.5 oz,,86 kcal / 3.5 oz (99g) tray,"Water, Beef, Pork Liver, Brown Rice, Carrots, ...",HELPFUL TIPSCover and refrigerate unused porti...,...,,,,,,,,,,
193,https://www.hillspet.com/dog-food/grain-free-c...,Grain Free Crunchy Naturals with Chicken & App...,,Treats,with Chicken & Apples,8oz,,,"Chicken, Yellow Peas, Potatoes, Potato Starch,...","We know that face is hard to resist, but feed ...",...,,,,,,,,,,
194,https://www.hillspet.com/dog-food/sd-canine-ad...,,,,,,"Just like people, eating amounts vary from pet...",,,,...,,We understand how frustrating this pet food sh...,"With so many factors outside of our control, w...",Hill’s is committed to delivering high-quality...,"Just like people, eating amounts vary from pet...",,,,,
195,https://www.hillspet.com/dog-food/sd-canine-pu...,Puppy Small & Mini Savory Stew with Chicken & ...,A delicious complement to the nutrition of Sci...,Wet Food,Savory Stew with Chicken & Vegetables,3.5oz,,90 kcal / 3.5 oz (99 g) tray,"Chicken Broth, Chicken, Pork Liver, Brown Rice...",HELPFUL TIPSCover and refrigerate unused porti...,...,,,,,,,,,,


In [19]:
df.to_csv("first.csv")

scrape some sections that didn’t work out the first time

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

all_data = []

for i in df_links['Link']:
    try:
        response = requests.get(i)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        # === Title & Description ===
        title_elem = soup.find(class_='product-detail-title')
        description_elem = soup.find(class_='product-detail-description')
        product_title = title_elem.get_text(strip=True) if title_elem else 'N/A'
        product_description = description_elem.get_text(strip=True) if description_elem else 'N/A'

        # === Food Form / Flavor / Size ===
        food_form = flavor = size = 'N/A'
        segments = soup.find_all(class_='text-segments no-icon false')
        for segment in segments:
            label = segment.find(class_='segment bold')
            value = segment.find(class_='segment none')
            if label and value:
                label_text = label.get_text(strip=True)
                value_text = value.get_text(strip=True)
                if 'Food Form' in label_text:
                    food_form = value_text
                elif 'Flavor' in label_text:
                    flavor = value_text
                elif 'Size' in label_text:
                    size = value_text

        # === Accordion Fields ===
        accordion_items = soup.find_all(class_='cmp-accordion__item')
        accordion_data = {}
        nutrient_data = {}

        for item in accordion_items:
            title_elem = item.find(class_='cmp-accordion__title')
            if not title_elem:
                continue
            title_text = title_elem.get_text(strip=True)

            # if 'Key Features' in title_text:
            #     ul = item.find('ul')
            #     if ul:
            #         features = [li.get_text(strip=True) for li in ul.find_all('li')]
            #         accordion_data['Key Features'] = ' | '.join(features)
            #     continue
            if 'Key Features' in title_text:
                paragraph = item.find('p')
                ul = item.find('ul')
                features_text = ''

                if paragraph:
                    features_text += paragraph.get_text(strip=True) + ' '

                if ul:
                    bullet_items = [li.get_text(strip=True) for li in ul.find_all('li')]
                    features_text += ' '.join(bullet_items)

                accordion_data['Key Features'] = features_text.strip()
                continue

            segment = item.find(class_='segment none')
            text = segment.get_text(strip=True) if segment else ''

            if 'Ingredients' in title_text:
                accordion_data['Ingredients'] = text
            elif 'Feeding Tips' in title_text and 'Adult Maintenance' not in text:
                accordion_data['Feeding Tips'] = text
            elif 'Caloric Content' in title_text:
                accordion_data['Caloric Content'] = text

            # Nutrient Table
            # if 'Nutrient' in title_text:
            #     table = item.find('table')
            #     if table:
            #         for row in table.find_all('tr'):
            #             cols = row.find_all('td')
            #             if len(cols) == 2:
            #                 nutrient = cols[0].get_text(strip=True)
            #                 value = cols[1].get_text(strip=True)
            #                 nutrient_data[f'Nutrient - {nutrient}'] = value
            if 'Nutrient' in title_text or 'Caloric' in title_text:
                table = item.find('table')
                if table:
                    rows = table.find_all('tr')
                    for row in rows:
                        cells = row.find_all('td')
                        if len(cells) == 2:
                            key = cells[0].get_text(strip=True)
                            value = cells[1].get_text(strip=True)
                            if key.lower() == "nutrient" or "dry matter" in value.lower():
                                continue
                            nutrient_data[f'{key}'] = value

        # === Recommended (Outside Accordion) ===
        recommended_for  = 'N/A'
        segments_bold = soup.find_all(class_='segment bold')
        for seg in segments_bold:
            text = seg.get_text(strip=True)
            if 'Recommended for' in text:
                sibling = seg.find_next_sibling('div', class_='segment none')
                if sibling:
                    recommended_for = sibling.get_text(strip=True)

        # === Final Merge ===
        data = {
            'URL': i,
            'Product Title': product_title,
            'Product Description': product_description,
            'Food Form': food_form,
            'Flavor': flavor,
            'Size': size,
            'Recommended For': recommended_for,
            **accordion_data,
            **nutrient_data
        }

        all_data.append(data)

    except Exception as e:
        print(f"[ERROR] {i}: {e}")

    time.sleep(random.uniform(2, 4))

# Save to DataFrame
df1 = pd.DataFrame(all_data)
df1


Unnamed: 0,URL,Product Title,Product Description,Food Form,Flavor,Size,Recommended For,Ingredients,Feeding Tips,Key Features,...,Linoleic Acid,{{consumer_nutrientTable_name_label}},Chloride,BCAA Total,Glutamine + Glutamate,Iron,Copper,Zinc,Vitamin K,Alpha-Linolenic Acid (ALA)
0,https://www.hillspet.com/dog-food/sd-canine-ad...,"Perfect Digestion Small Bites Chicken, Brown R...",Science Diet's breakthrough nutrition supports...,,,,Adult dogs 1 - 6 years to help keep their dige...,"Chicken, Cracked Pearled Barley, Brown Rice, B...",HELPFUL TIPSAdjust feeding amounts as necessar...,Prebiotics are nutrients that feed the billion...,...,,,,,,,,,,
1,https://www.hillspet.com/dog-food/sd-canine-ad...,"Adult Small Bites No Corn, Wheat, Soy Chicken ...","Supports healthy immune system, digestion, lea...",,,,Adult dogs 1 - 6 years of age.,"Chicken, Brown Rice, Brewers Rice, Cracked Pea...",HELPFUL TIPSAdjust feeding amounts as necessar...,"Hill's Science Diet Adult Small Bites No Corn,...",...,,,,,,,,,,
2,https://www.hillspet.com/dog-food/sd-canine-ad...,Adult 7+ Senior Vitality Small & Mini Chicken ...,Improves Everyday Ability to Get Up & Go,,,,Adult dogs 7+ years of age,"Chicken, Brewers Rice, Yellow Peas, Cracked Pe...",HELPFUL TIPSAdjust feeding amounts as necessar...,Hill's Science Diet Youthful Vitality Small & ...,...,,,,,,,,,,
3,https://www.hillspet.com/dog-food/pd-canine-zd...,z/d Low Fat Hydrolyzed Soy Recipe Wet Dog Food,Nutrition specially formulated with hydrolyzed...,,,,Adult Dogs,"Water, corn starch, hydrolyzed soy protein, so...",Ask your veterinarian for specific feeding ins...,Dogs with recurrent gastrointestinal upset may...,...,,,,,,,,,,
4,https://www.hillspet.com/dog-food/pd-cd-multic...,c/d Multicare Chicken & Vegetable Stew Dog Food,Support for your dog’s urinary health,,,,Adult Dogs,"Water, Chicken, Pork Liver, Carrots, Rice, Gre...",Ask your veterinarian for specific feeding ins...,Bladder stones are collections of mineral crys...,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,https://www.hillspet.com/dog-food/sd-adult-sma...,Adult Small & Mini Savory Stew with Beef & Veg...,A delicious complement to the nutrition of Sci...,,,,Adult dogs 1 - 6 years of age.,"Water, Beef, Pork Liver, Brown Rice, Carrots, ...",HELPFUL TIPSCover and refrigerate unused porti...,Hill's Science Diet Adult Small Paws Savory St...,...,,,,,,,,,,
193,https://www.hillspet.com/dog-food/grain-free-c...,Grain Free Crunchy Naturals with Chicken & App...,,,,,"Normally active puppy (9+ weeks old), adult & ...","Chicken, Yellow Peas, Potatoes, Potato Starch,...","We know that face is hard to resist, but feed ...",Hill's Grain Free Crunchy Naturals with Chicke...,...,,,,,,,,,,
194,https://www.hillspet.com/dog-food/sd-canine-ad...,,,,,,,,,,...,,,,,,,,,,
195,https://www.hillspet.com/dog-food/sd-canine-pu...,Puppy Small & Mini Savory Stew with Chicken & ...,A delicious complement to the nutrition of Sci...,,,,Puppies up to 1 year old and pregnant or nursi...,"Chicken Broth, Chicken, Pork Liver, Brown Rice...",HELPFUL TIPSCover and refrigerate unused porti...,Hill's Science Diet Puppy Small & Mini Savory ...,...,,,,,,,,,,


In [12]:
df1.to_csv("hillspet_full_scrape.csv", index=False)

In [16]:
columns_to_remove = ["Food Form", "Flavor", "Size", "{{consumer_nutrientTable_name_label}}"]
df_cleaned = df1.drop(columns=[col for col in columns_to_remove if col in df1.columns])

In [25]:
df_cleaned

Unnamed: 0,URL,Product Title,Product Description,Recommended For,Ingredients,Feeding Tips,Key Features,Caloric Content,Protein,Fat,...,Taurine,Linoleic Acid,Chloride,BCAA Total,Glutamine + Glutamate,Iron,Copper,Zinc,Vitamin K,Alpha-Linolenic Acid (ALA)
0,https://www.hillspet.com/dog-food/sd-canine-ad...,"Perfect Digestion Small Bites Chicken, Brown R...",Science Diet's breakthrough nutrition supports...,Adult dogs 1 - 6 years to help keep their dige...,"Chicken, Cracked Pearled Barley, Brown Rice, B...",HELPFUL TIPSAdjust feeding amounts as necessar...,Prebiotics are nutrients that feed the billion...,3620 kcal/kg 359 kcal/cup†,25 %,14.5 %,...,,,,,,,,,,
1,https://www.hillspet.com/dog-food/sd-canine-ad...,"Adult Small Bites No Corn, Wheat, Soy Chicken ...","Supports healthy immune system, digestion, lea...",Adult dogs 1 - 6 years of age.,"Chicken, Brown Rice, Brewers Rice, Cracked Pea...",HELPFUL TIPSAdjust feeding amounts as necessar...,"Hill's Science Diet Adult Small Bites No Corn,...",3713 kcal/kg 389 kcal/cup†,23.6 %,16.4 %,...,,,,,,,,,,
2,https://www.hillspet.com/dog-food/sd-canine-ad...,Adult 7+ Senior Vitality Small & Mini Chicken ...,Improves Everyday Ability to Get Up & Go,Adult dogs 7+ years of age,"Chicken, Brewers Rice, Yellow Peas, Cracked Pe...",HELPFUL TIPSAdjust feeding amounts as necessar...,Hill's Science Diet Youthful Vitality Small & ...,3651 kcal/kg 342 kcal/cup†,22.4 %,14.6 %,...,,,,,,,,,,
3,https://www.hillspet.com/dog-food/pd-canine-zd...,z/d Low Fat Hydrolyzed Soy Recipe Wet Dog Food,Nutrition specially formulated with hydrolyzed...,Adult Dogs,"Water, corn starch, hydrolyzed soy protein, so...",Ask your veterinarian for specific feeding ins...,Dogs with recurrent gastrointestinal upset may...,,29.9 %,9.4 %,...,,,,,,,,,,
4,https://www.hillspet.com/dog-food/pd-cd-multic...,c/d Multicare Chicken & Vegetable Stew Dog Food,Support for your dog’s urinary health,Adult Dogs,"Water, Chicken, Pork Liver, Carrots, Rice, Gre...",Ask your veterinarian for specific feeding ins...,Bladder stones are collections of mineral crys...,266 kcal / 12.5 oz (354 g) can\n117 kcal / 5.5...,22.8 %,18.3 %,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,https://www.hillspet.com/dog-food/sd-adult-sma...,Adult Small & Mini Savory Stew with Beef & Veg...,A delicious complement to the nutrition of Sci...,Adult dogs 1 - 6 years of age.,"Water, Beef, Pork Liver, Brown Rice, Carrots, ...",HELPFUL TIPSCover and refrigerate unused porti...,Hill's Science Diet Adult Small Paws Savory St...,86 kcal / 3.5 oz (99g) tray,27.1 %,21.3 %,...,,,,,,,,,,
193,https://www.hillspet.com/dog-food/grain-free-c...,Grain Free Crunchy Naturals with Chicken & App...,,"Normally active puppy (9+ weeks old), adult & ...","Chicken, Yellow Peas, Potatoes, Potato Starch,...","We know that face is hard to resist, but feed ...",Hill's Grain Free Crunchy Naturals with Chicke...,,16.6 %,9.6 %,...,,,,,,,,,,
194,https://www.hillspet.com/dog-food/sd-canine-ad...,,,,,,,,,,...,,,,,,,,,,
195,https://www.hillspet.com/dog-food/sd-canine-pu...,Puppy Small & Mini Savory Stew with Chicken & ...,A delicious complement to the nutrition of Sci...,Puppies up to 1 year old and pregnant or nursi...,"Chicken Broth, Chicken, Pork Liver, Brown Rice...",HELPFUL TIPSCover and refrigerate unused porti...,Hill's Science Diet Puppy Small & Mini Savory ...,90 kcal / 3.5 oz (99 g) tray,30.7 %,18.1 %,...,,,,,,,,,,


In [18]:
df_cleaned.to_csv("second.csv", index=False)

Take the necessary columns from the first file and add them to the second file

In [26]:
df.columns = df.columns.str.strip()

In [27]:
print(df.columns.tolist())

['URL', 'Product Title', 'Product Description', 'Food Form', 'Flavor', 'Sizes', '', 'Caloric Content', 'Ingredients', 'Feeding Tips', 'Key Features', 'Average Nutrient & Caloric Content', 'Nutrient', 'Protein', 'Fat', 'Carbohydrate / NFE', 'Crude Fiber', 'Calcium', 'Phosphorus', 'Potassium', 'Sodium', 'Magnesium', 'Vitamin C', 'Vitamin E', 'Total Omega-3 FA', 'Total Omega-6 FA', 'Ash', 'Vitamin A', 'Vitamin D', 'Beta-Carotene', 'EPA', 'Carnitine', 'Adult Maintenance', 'DHA', 'Glucosamine', 'Chondroitin Sulfate', 'Total Dietary Fiber', 'Lysine', 'Taurine', 'Linoleic Acid', '{{consumer_nutrientTable_name_label}}', 'Chloride', 'BCAA Total', 'Glutamine + Glutamate', 'It looks like my pet’s food is currently out of stock. When will it be available again?', 'How can I know when my pet’s food will be back in stock?', 'How can I tell if my pet’s food formulation changed?', 'How much should I feed my pet?', 'Iron', 'Copper', 'Zinc', 'Vitamin K', 'Alpha-Linolenic Acid (ALA)']


In [29]:
df_first = df[['URL', 'Food Form', 'Flavor', 'Sizes']]

In [37]:
df_first

Unnamed: 0,URL,Food Form,Flavor,Sizes
0,https://www.hillspet.com/dog-food/sd-canine-ad...,Dry Food,"Chicken, Brown Rice & Whole Oats Recipe",3.5 lb
1,https://www.hillspet.com/dog-food/sd-canine-ad...,dry,Chicken & Brown Rice Recipe,15 lb
2,https://www.hillspet.com/dog-food/sd-canine-ad...,Dry Food,Chicken Meal & Rice Recipe,"3.5 lb, 12.5 lb"
3,https://www.hillspet.com/dog-food/pd-canine-zd...,Wet Food,Hydrolyzed Soy Recipe,13 oz
4,https://www.hillspet.com/dog-food/pd-cd-multic...,stew,Chicken & Vegetable Stew,"12.5 oz, 5.5 oz"
...,...,...,...,...
192,https://www.hillspet.com/dog-food/sd-adult-sma...,Tray,Savory Stew with Beef & Vegetables,3.5 oz
193,https://www.hillspet.com/dog-food/grain-free-c...,Treats,with Chicken & Apples,8oz
194,https://www.hillspet.com/dog-food/sd-canine-ad...,,,
195,https://www.hillspet.com/dog-food/sd-canine-pu...,Wet Food,Savory Stew with Chicken & Vegetables,3.5oz


In [40]:
df_merged = pd.merge(df_cleaned, df_first, on='URL', how='left')

cols = df_merged.columns.tolist()
insert_after = 'Product Description'
insert_at = cols.index(insert_after) + 1

for col in ['Food Form', 'Flavor', 'Sizes']:
    if col in cols:
        cols.remove(col)
        
for col in reversed(['Food Form','Flavor', 'Sizes']):
    cols.insert(insert_at, col)

df_reorder = df_merged[cols]

df_reorder.to_csv("final1.csv", index=False)


In [41]:
df_reorder

Unnamed: 0,URL,Product Title,Product Description,Food Form,Flavor,Sizes,Recommended For,Ingredients,Feeding Tips,Key Features,...,Taurine,Linoleic Acid,Chloride,BCAA Total,Glutamine + Glutamate,Iron,Copper,Zinc,Vitamin K,Alpha-Linolenic Acid (ALA)
0,https://www.hillspet.com/dog-food/sd-canine-ad...,"Perfect Digestion Small Bites Chicken, Brown R...",Science Diet's breakthrough nutrition supports...,Dry Food,"Chicken, Brown Rice & Whole Oats Recipe",3.5 lb,Adult dogs 1 - 6 years to help keep their dige...,"Chicken, Cracked Pearled Barley, Brown Rice, B...",HELPFUL TIPSAdjust feeding amounts as necessar...,Prebiotics are nutrients that feed the billion...,...,,,,,,,,,,
1,https://www.hillspet.com/dog-food/sd-canine-ad...,"Adult Small Bites No Corn, Wheat, Soy Chicken ...","Supports healthy immune system, digestion, lea...",dry,Chicken & Brown Rice Recipe,15 lb,Adult dogs 1 - 6 years of age.,"Chicken, Brown Rice, Brewers Rice, Cracked Pea...",HELPFUL TIPSAdjust feeding amounts as necessar...,"Hill's Science Diet Adult Small Bites No Corn,...",...,,,,,,,,,,
2,https://www.hillspet.com/dog-food/sd-canine-ad...,Adult 7+ Senior Vitality Small & Mini Chicken ...,Improves Everyday Ability to Get Up & Go,Dry Food,Chicken Meal & Rice Recipe,"3.5 lb, 12.5 lb",Adult dogs 7+ years of age,"Chicken, Brewers Rice, Yellow Peas, Cracked Pe...",HELPFUL TIPSAdjust feeding amounts as necessar...,Hill's Science Diet Youthful Vitality Small & ...,...,,,,,,,,,,
3,https://www.hillspet.com/dog-food/pd-canine-zd...,z/d Low Fat Hydrolyzed Soy Recipe Wet Dog Food,Nutrition specially formulated with hydrolyzed...,Wet Food,Hydrolyzed Soy Recipe,13 oz,Adult Dogs,"Water, corn starch, hydrolyzed soy protein, so...",Ask your veterinarian for specific feeding ins...,Dogs with recurrent gastrointestinal upset may...,...,,,,,,,,,,
4,https://www.hillspet.com/dog-food/pd-cd-multic...,c/d Multicare Chicken & Vegetable Stew Dog Food,Support for your dog’s urinary health,stew,Chicken & Vegetable Stew,"12.5 oz, 5.5 oz",Adult Dogs,"Water, Chicken, Pork Liver, Carrots, Rice, Gre...",Ask your veterinarian for specific feeding ins...,Bladder stones are collections of mineral crys...,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,https://www.hillspet.com/dog-food/sd-adult-sma...,Adult Small & Mini Savory Stew with Beef & Veg...,A delicious complement to the nutrition of Sci...,Tray,Savory Stew with Beef & Vegetables,3.5 oz,Adult dogs 1 - 6 years of age.,"Water, Beef, Pork Liver, Brown Rice, Carrots, ...",HELPFUL TIPSCover and refrigerate unused porti...,Hill's Science Diet Adult Small Paws Savory St...,...,,,,,,,,,,
193,https://www.hillspet.com/dog-food/grain-free-c...,Grain Free Crunchy Naturals with Chicken & App...,,Treats,with Chicken & Apples,8oz,"Normally active puppy (9+ weeks old), adult & ...","Chicken, Yellow Peas, Potatoes, Potato Starch,...","We know that face is hard to resist, but feed ...",Hill's Grain Free Crunchy Naturals with Chicke...,...,,,,,,,,,,
194,https://www.hillspet.com/dog-food/sd-canine-ad...,,,,,,,,,,...,,,,,,,,,,
195,https://www.hillspet.com/dog-food/sd-canine-pu...,Puppy Small & Mini Savory Stew with Chicken & ...,A delicious complement to the nutrition of Sci...,Wet Food,Savory Stew with Chicken & Vegetables,3.5oz,Puppies up to 1 year old and pregnant or nursi...,"Chicken Broth, Chicken, Pork Liver, Brown Rice...",HELPFUL TIPSCover and refrigerate unused porti...,Hill's Science Diet Puppy Small & Mini Savory ...,...,,,,,,,,,,


In [100]:
if "Feeding Tips" in df_reorder.columns:
    df_reorder["Feeding Tips"] = df_reorder["Feeding Tips"].str.replace(
        r"(?i)^helpful tips[:\s]*", "", regex=True
    )
df_reorder.to_csv("final1.csv", index=False)

In [None]:
df_reorder.columns = df_reorder.columns.str.strip().str.lower()
df_reorder.to_csv("final1.csv", index=False)

Scraping [from hillspet.co.uk](https://www.hillspet.co.uk/products/dog-food)

In [42]:
import requests
from bs4 import BeautifulSoup
import time
import random
import pandas as pd

# url = 'https://www.hillspet.com/dog-food'
url = 'https://www.hillspet.co.uk/products/dog-food'

# List to store the scraped data
data = []

# Function to scrape a single page
def scrape_page(url):
    # Send a GET request to the website
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all elements with the specified classes
        product_titles = soup.find_all(class_='articleList-title')
        product_footers = soup.find_all(class_='articleList-description')

        # Extract and store the data from each element
        for title, footer in zip(product_titles, product_footers):
            title_text = title.get_text(strip=True)
            relative_link = footer.find('a')['href'] if footer.find('a') else ''
            link = f"https://www.hillspet.co.uk{relative_link}" if relative_link else 'No link found'

            # Append the data to the list
            data.append({
                'Title': title_text,
                'Link': link
            })
    else:
        print('Failed to retrieve the webpage. Status code:', response.status_code)

# Scrape the initial page
scrape_page(url)

time.sleep(random.uniform(2, 5))  

# Create a DataFrame from the collected data
df_links = pd.DataFrame(data)
df_links.to_csv("UK_links.csv", index=False)


df_links

Unnamed: 0,Title,Link
0,Hill's Prescription Dietk/d Kidney Care Wet Do...,https://www.hillspet.co.uk/dog-food/pd-canine-...
1,Hill's Prescription Dietl/d Wet Dog Food Original,https://www.hillspet.co.uk/dog-food/pd-canine-...
2,Hill's Science PlanAdult Large Breed Dry Dog F...,https://www.hillspet.co.uk/dog-food/sp-canine-...
3,Hill's Science PlanMedium Mature Adult 7+ Dog ...,https://www.hillspet.co.uk/dog-food/sp-canine-...
4,Hill's Prescription Diett/d Mini Dog Food,https://www.hillspet.co.uk/dog-food/pd-canine-...
...,...,...
130,Hill's Science PlanPerfect Digestion Adult Lar...,https://www.hillspet.co.uk/dog-food/sp-canine-...
131,Hill's Science PlanPuppy Wet Dog Food with Chi...,https://www.hillspet.co.uk/dog-food/sp-canine-...
132,Hill's Prescription DietDerm Complete Puppy Dr...,https://www.hillspet.co.uk/dog-food/pd-canine-...
133,Hill's Vet EssentialsMulti-BenefitAdult Wet Do...,https://www.hillspet.co.uk/dog-food/ve-canine-...


In [43]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

df_links = pd.read_csv("UK_links.csv")  # 135 product URLs

all_data = []

for url in df_links['Link']:
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        # ----------- Title & Description -----------
        title_elem = soup.select_one("div.productAwareRichText h1")
        product_title = title_elem.get_text(strip=True) if title_elem else "N/A"

        desc_elem = soup.select_one("div.productAwareRichText p")
        product_description = desc_elem.get_text(strip=True) if desc_elem else "N/A"


        # ----------- Description Attributes (Need/Preference, Flavour, etc.) -----------
        attributes = {}
        desc_blocks = soup.find_all("div", class_="pdp-2019-description-row")

        for block in desc_blocks:
            paragraph = block.find("div", class_="paragraphSystem content")
            if paragraph:
                ps = paragraph.find_all("p")
                if len(ps) >= 2:
                    key = ps[0].get_text(strip=True).rstrip(":")
                    value = ps[1].get_text(strip=True)
                    if key and value:
                        attributes[key] = value

        # ----------- Accordion-like Sections: Key Benefits, Ingredients -----------
        # accordion_data = {}
        # for h3 in soup.find_all("h3", class_="accordion-title component"):
        #     section_title = h3.get_text(strip=True)
        #     content_div = h3.find_parent("li").find_next("id", class_="accordion-content-wrapper")
        #     if content_div:
        #         accordion_data[section_title] = content_div.get_text(strip=True)



        # ----------- Combine All Info -----------
        data = {
            "URL": url,
            "Product Title": product_title,
            "Product Description": product_description,
            **attributes,        # Flavour, Sizes, etc.
        }

        all_data.append(data)
        print(f" Scraped: {product_title}")

    except Exception as e:
        print(f" Failed on {url}: {e}")
    
    time.sleep(random.uniform(1.5, 3.0))

# ----------- Save to CSV -----------
df_UK1 = pd.DataFrame(all_data)
df_UK1.to_csv("UK_first.csv", index=False)
print(" Scraping complete. Data saved to UK_first.csv")


 Scraped: k/d Kidney Care Wet Dog Food Original Can
 Scraped: l/d Wet Dog Food Original
 Scraped: Adult Large Breed Dry Dog Food with Lamb & Rice
 Scraped: Medium Mature Adult 7+ Dog Food with Chicken Dry Bag
 Scraped: t/d Mini Dog Food
 Scraped: Medium Puppy Food
 Scraped: d/d Food Sensitivities Dry Dog Food with Duck & Rice Bag
 Scraped: Adult Large Breed Dry Dog Food with Chicken
 Scraped: Mature Adult Medium Dog Food
 Scraped: Light Adult Wet Dog Food with Chicken
 Scraped: b/d Brain Ageing Care Dry Dog Food with Chicken Bag
 Scraped: u/d Dry Dog Food Original
 Scraped: t/d Dog Food
 Scraped: Oral Care Adult Dry Dog Food with Chicken
 Scraped: l/d Dry Dog Food Original
 Scraped: k/d Dog Food
 Scraped: Light Adult Large Breed Dry Dog Food with Chicken
 Scraped: Puppy Medium Breed Dry Dog Food with Lamb & Rice
 Scraped: Adult Wet Dog Food with Turkey
 Scraped: Adult Medium Breed Dry Dog Food with Lamb & Rice
 Scraped: Puppy Large Breed Dry Dog Food with Chicken
 Scraped: Healthy Weig

In [61]:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Load product URLs
df_links = pd.read_csv("UK_links.csv")
all_data = []

for url in df_links['Link']:
    try:
        response = requests.get(url, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, "html.parser")

        # --- Product Title & Description ---
        title_elem = soup.select_one("div.productAwareRichText h1")
        desc_elem = soup.select_one("div.productAwareRichText p")
        product_title = title_elem.get_text(strip=True) if title_elem else "N/A"
        product_description = desc_elem.get_text(strip=True) if desc_elem else "N/A"

        # --- Description Rows (Need/Preference, Flavour, Sizes, etc.) ---
        attributes = {}
        for row in soup.select("div.pdp-2023-description-row"):
            divs = row.find_all("div", recursive=False)
            if len(divs) >= 2:
                key = divs[0].get_text(strip=True).rstrip(":")
                value = divs[1].get_text(strip=True)
                attributes[key] = value

        # --- Alternate Product Recommendation ---
        alt_elem = soup.find("div", class_="articleList-title")
        alternate_product = alt_elem.get_text(strip=True) if alt_elem else "N/A"

        # --- Accordion Data ---
        accordion_data = {
            "Key Benefits": "N/A",
            "Recommended for": "N/A",
            "Ingredients": "N/A",
            "Helpful Tips": "N/A",
            "Nutrition and Caloric Content": {}
        }

        for h3 in soup.select("h3.accordion-title.component"):
            title_text = h3.get_text(strip=True)
            content_div = h3.find_parent("li").find_next("div", class_="accordion-content-wrapper")
            content = content_div.get_text(" ", strip=True) if content_div else "N/A"

            if "nutrition" in title_text.lower():
                for table in content_div.find_all("table"):
                    for row in table.find_all("tr"):
                        cells = row.find_all("td")
                        if len(cells) == 2:
                            nutrient = cells[0].get_text(strip=True)
                            value = cells[1].get_text(strip=True)
                            accordion_data["Nutrition and Caloric Content"][nutrient] = value
            else:
                for key in accordion_data:
                    if key.lower() in title_text.lower():
                        accordion_data[key] = content

        # --- Combine and Save ---
        product_data = {
            "URL": url,
            "Product Title": product_title,
            "Product Description": product_description,
            "Need/Preference": attributes.get("Need/Preference", "N/A"),
            "Alternate Product Recommendation": alternate_product,
            "Flavour": attributes.get("Flavour", "N/A"),
            "Sizes": attributes.get("Sizes", "N/A"),
            "Breed Size": attributes.get("Breed size", "N/A"),
            "Lifestage": attributes.get("Lifestage", "N/A"),
            "Food Form": attributes.get("Food Form", "N/A"),
            "Key Benefits": accordion_data["Key Benefits"],
            "Recommended for": accordion_data["Recommended for"],
            "Ingredients": accordion_data["Ingredients"],
            "Helpful Tips": accordion_data["Helpful Tips"],
            **accordion_data["Nutrition and Caloric Content"]
        }

        all_data.append(product_data)
        print(f" Scraped: {product_title}")
        time.sleep(random.uniform(1.2, 2.5))

    except Exception as e:
        print(f" Failed on {url}: {e}")
        continue

# Save output
df = pd.DataFrame(all_data)
df.to_csv("UK_second.csv", index=False)
print(" Scraping complete. Saved to UK_second.csv")


 Scraped: k/d Kidney Care Wet Dog Food Original Can
 Scraped: l/d Wet Dog Food Original
 Scraped: Adult Large Breed Dry Dog Food with Lamb & Rice
 Scraped: Medium Mature Adult 7+ Dog Food with Chicken Dry Bag
 Scraped: t/d Mini Dog Food
 Scraped: Medium Puppy Food
 Scraped: d/d Food Sensitivities Dry Dog Food with Duck & Rice Bag
 Scraped: Adult Large Breed Dry Dog Food with Chicken
 Scraped: Mature Adult Medium Dog Food
 Scraped: Light Adult Wet Dog Food with Chicken
 Scraped: b/d Brain Ageing Care Dry Dog Food with Chicken Bag
 Scraped: u/d Dry Dog Food Original
 Scraped: t/d Dog Food
 Scraped: Oral Care Adult Dry Dog Food with Chicken
 Scraped: l/d Dry Dog Food Original
 Scraped: k/d Dog Food
 Scraped: Light Adult Large Breed Dry Dog Food with Chicken
 Scraped: Puppy Medium Breed Dry Dog Food with Lamb & Rice
 Scraped: Adult Wet Dog Food with Turkey
 Scraped: Adult Medium Breed Dry Dog Food with Lamb & Rice
 Scraped: Puppy Large Breed Dry Dog Food with Chicken
 Scraped: Healthy Weig

In [62]:
import re
for row in all_data:
    tips = row.get("Helpful Tips", "")
    match = re.split(r'(?i)kibble size', tips)
    if match:
        row["Helpful Tips"] = match[0].strip()

# Save output
df = pd.DataFrame(all_data)

In [66]:
for row in all_data:
    benefits = row.get("Key Benefits", "")
    match = re.split(r'(?i)how it helps|recommended for', benefits)
    if match:
        row["Key Benefits"] = match[0].strip()
        
df = pd.DataFrame(all_data)

In [67]:
for row in all_data:
    # Helpful Tips
    tips = row.get("Helpful Tips", "")
    match = re.split(r'(?i)kibble size', tips)
    if match:
        row["Helpful Tips"] = match[0].strip()

    # Key Benefits
    benefits = row.get("Key Benefits", "")
    benefits = re.sub(r'(?i)^key benefits\s*', '', benefits)
    match = re.split(r'(?i)how it helps|recommended for', benefits)
    if match:
        row["Key Benefits"] = match[0].strip()

    # Ingredients
    ingredients = row.get("Ingredients", "")
    ingredients = re.sub(
        r'(?i)^natural ingredients for a long, happy life.*?life\.', '', ingredients).strip()
    row["Ingredients"] = ingredients

df = pd.DataFrame(all_data)

In [68]:
for row in all_data:
    # Ingredients cleanup
    ingredients = row.get("Ingredients", "")

    
    # Обрезать всё после "About Our Ingredients"
    match = re.split(r'(?i)about our ingredients', ingredients)
    if match:
        ingredients = match[0].strip()
    
    row["Ingredients"] = ingredients

df = pd.DataFrame(all_data)

In [69]:
for row in all_data:
    tips = row.get("Helpful Tips", "")

    # Удаляем "FEEDING INSTRUCTIONS:" или "HELPFUL TIPS" в начале строки
    tips = re.sub(r'(?i)^(feeding instructions:|helpful tips)\s*', '', tips)

    row["Helpful Tips"] = tips.strip()
df = pd.DataFrame(all_data)

In [70]:
if "Nutrient" in df.columns:
    df = df.drop(columns=["Nutrient"])

In [71]:
df.to_csv("UK_second.csv", index=False)

Third


In [55]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Load your existing product links CSV
df_links = pd.read_csv("UK_links.csv")

# Define function to scrape a single product page
def scrape_product_page(url):
    result = {
        "URL": url,
        "Product title": "",
        "Product description": "",
        "Need/preference": "",
        "Alternate product recommendation": "",
        "Flavour": "",
        "Sizes": "",
        "Breed sizes": "",
        "Lifestage": "",
        "Food form": "",
        "Key benefits": "",
        "Recommended for": "",
        "Not recommended for": "",
        "Ingredients": "",
        "Helpful tips": "",
        "Nutrition and caloric content": ""
    }

    try:
        res = requests.get(url)
        if res.status_code != 200:
            return result  # Return blank if failed

        soup = BeautifulSoup(res.content, "html.parser")

        # Product Title
        h1_tag = soup.find("h1")
        if h1_tag:
            result["Product title"] = h1_tag.get_text(strip=True)

        # Product Description
        desc = soup.find("div", class_="productAwareRichText")
        if desc:
            p = desc.find("p")
            if p:
                result["Product description"] = p.get_text(strip=True)

        # Description details by labels
        detail_sections = soup.find_all("div", class_="productAwareRichText")
        for section in detail_sections:
            texts = section.get_text(strip=True)
            if "Flavour" in texts:
                result["Flavour"] = texts.replace("Flavour", "").strip()
            elif "Breed Size" in texts:
                result["Breed sizes"] = texts.replace("Breed Size", "").strip()
            elif "Food Form" in texts:
                result["Food form"] = texts.replace("Food Form", "").strip()
            elif "Lifestage" in texts:
                result["Lifestage"] = texts.replace("Lifestage", "").strip()
            elif "Sizes" in texts:
                result["Sizes"] = texts.replace("Sizes", "").strip()

        # Key Benefits
        key_benefits_section = soup.find("div", id=lambda x: x and "accordion-content" in x)
        if key_benefits_section:
            ul = key_benefits_section.find("ul")
            if ul:
                result["Key benefits"] = "; ".join([li.get_text(strip=True) for li in ul.find_all("li")])

        # Recommended and Not Recommended
        for div in soup.find_all("div", class_="richText-content"):
            if div.find("h3") and "Recommended for" in div.get_text():
                result["Recommended for"] = div.find_next("p").get_text(strip=True)
            if div.find("h3") and "Not recommended for" in div.get_text():
                result["Not recommended for"] = div.find_next("p").get_text(strip=True)

        # Helpful tips
        for h3 in soup.find_all("h3"):
            if "Transitioning to Hill" in h3.get_text():
                tip = h3.find_next("p")
                if tip:
                    result["Helpful tips"] = tip.get_text(strip=True)

        # Ingredients and Nutrition placeholder (you can expand with more specific search logic later)
        ingredients_anchor = soup.find("a", {"id": "ingredients"})
        if ingredients_anchor:
            ingredients_text = ingredients_anchor.find_next("p")
            if ingredients_text:
                result["Ingredients"] = ingredients_text.get_text(strip=True)

        # Nutrition and caloric content - similar logic, locate anchor or heading and extract following table or text

    except Exception as e:
        print(f"Error scraping {url}: {e}")

    return result

# Scrape all product pages
product_data = []
for i, row in df_links.iterrows():
    url = row["Link"]
    print(f"Scraping {i+1}/{len(df_links)}: {url}")
    product_data.append(scrape_product_page(url))
    time.sleep(random.uniform(2, 4))  

# Save the data to a new CSV
df_UK3 = pd.DataFrame(product_data)
df_UK3.to_csv("UK_third.csv", index=False)


Scraping 1/135: https://www.hillspet.co.uk/dog-food/pd-canine-prescription-diet-kd-with-chicken-canned
Scraping 2/135: https://www.hillspet.co.uk/dog-food/pd-canine-prescription-diet-ld-canned
Scraping 3/135: https://www.hillspet.co.uk/dog-food/sp-canine-science-plan-adult-advanced-fitness-large-breed-lamb-and-rice-dry
Scraping 4/135: https://www.hillspet.co.uk/dog-food/sp-canine-science-plan-mature-adult-7-plus-active-longevity-medium-with-chicken-dry
Scraping 5/135: https://www.hillspet.co.uk/dog-food/pd-canine-prescription-diet-td-mini-dry
Scraping 6/135: https://www.hillspet.co.uk/dog-food/sp-canine-science-plan-puppy-healthy-development-medium-chicken-dry
Scraping 7/135: https://www.hillspet.co.uk/dog-food/pd-canine-prescription-diet-dd-duck-and-rice-dry
Scraping 8/135: https://www.hillspet.co.uk/dog-food/sp-canine-science-plan-adult-advanced-fitness-large-breed-with-chicken-dry
Scraping 9/135: https://www.hillspet.co.uk/dog-food/sp-canine-science-plan-mature-adult-7-plus-active-l

In [56]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Загрузка ссылок
df_links = pd.read_csv("UK_links.csv")
all_data = []

for url in df_links['Link']:
    try:
        response = requests.get(url, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, "html.parser")

        # Title
        title_elem = soup.select_one("div.productAwareRichText h1")
        product_title = title_elem.get_text(strip=True) if title_elem else "N/A"

        # Nutrition and caloric content
        nutrition_kcal = "N/A"
        for h3 in soup.select("h3.accordion-title.component"):
            if "nutrition" in h3.get_text(strip=True).lower():
                content_div = h3.find_parent("li").find_next("div", class_="accordion-content-wrapper")
                if content_div:
                    kcal_div = content_div.find("div", class_="richText-content")
                    if kcal_div:
                        nutrition_kcal = kcal_div.get_text(strip=True)
                break  

        all_data.append({
            "URL": url,
            "Product Title": product_title,
            "Nutrition and caloric content": nutrition_kcal
        })

        print(f" {product_title}")
        time.sleep(random.uniform(1.2, 2.5))

    except Exception as e:
        print(f" Failed on {url}: {e}")
        continue

df_UK4 = pd.DataFrame(all_data)
df_UK4.to_csv("UK_fourth.csv", index=False)
print(" Saved to UK_fourth.csv")


 k/d Kidney Care Wet Dog Food Original Can
 l/d Wet Dog Food Original
 Adult Large Breed Dry Dog Food with Lamb & Rice
 Medium Mature Adult 7+ Dog Food with Chicken Dry Bag
 t/d Mini Dog Food
 Medium Puppy Food
 d/d Food Sensitivities Dry Dog Food with Duck & Rice Bag
 Adult Large Breed Dry Dog Food with Chicken
 Mature Adult Medium Dog Food
 Light Adult Wet Dog Food with Chicken
 b/d Brain Ageing Care Dry Dog Food with Chicken Bag
 u/d Dry Dog Food Original
 t/d Dog Food
 Oral Care Adult Dry Dog Food with Chicken
 l/d Dry Dog Food Original
 k/d Dog Food
 Light Adult Large Breed Dry Dog Food with Chicken
 Puppy Medium Breed Dry Dog Food with Lamb & Rice
 Adult Wet Dog Food with Turkey
 Adult Medium Breed Dry Dog Food with Lamb & Rice
 Puppy Large Breed Dry Dog Food with Chicken
 Healthy Weight Dog Treats
 Hypoallergenic Dog Treats
 c/d Multicare Dry Dog Food with Chicken Bag
 Perfect Weight Adult Small & Mini Breed Dog Mousse with Turkey
 Mature Adult Multipack Wet Dog Food with Chicke

In [73]:
import csv

# Ожидаемое количество колонок
expected_columns = 93

# Фиксируем строки
cleaned_rows = []

with open("UK_second.csv", encoding="utf-8") as infile:
    reader = csv.reader(infile)
    header = next(reader)
    cleaned_rows.append(header)

    for row in reader:
        if len(row) < expected_columns:
            # Если колонок меньше, дополняем N/A
            row += ["N/A"] * (expected_columns - len(row))
        elif len(row) > expected_columns:
            # Если колонок больше, обрезаем
            row = row[:expected_columns]
        cleaned_rows.append(row)

# Сохраняем в новый CSV
with open("UK_second_fixed.csv", "w", encoding="utf-8", newline="") as outfile:
    writer = csv.writer(outfile)
    writer.writerows(cleaned_rows)

print("Исправленный файл сохранён как UK_second_fixed.csv")


Исправленный файл сохранён как UK_second_fixed.csv


In [74]:
import pandas as pd
import csv

# Пути к исходному и исправленному файлу
input_file = "UK_second.csv"
fixed_file = "UK_second_fixed.csv"

# Определим ожидаемое количество колонок по заголовку
with open(input_file, encoding="utf-8") as f:
    reader = csv.reader(f)
    header = next(reader)
    expected_columns = len(header)

# Исправим строки: дополним или обрежем до нужного количества колонок
cleaned_rows = [header]
with open(input_file, encoding="utf-8") as f:
    reader = csv.reader(f)
    next(reader)  # пропускаем заголовок
    for row in reader:
        if len(row) < expected_columns:
            row += ["N/A"] * (expected_columns - len(row))
        elif len(row) > expected_columns:
            row = row[:expected_columns]
        cleaned_rows.append(row)

# Сохраним исправленный CSV
with open(fixed_file, "w", encoding="utf-8", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(cleaned_rows)

# Загрузим исправленный файл для подтверждения
df_fixed = pd.read_csv(fixed_file)
df_fixed.shape

(135, 64)

In [59]:
df_UK4.columns = df_UK4.columns.str.strip()

In [None]:
df_mer = df.merge(df_UK4[["URL", "Nutrition and caloric content"]], on="URL", how="left")

columns = list(df_mer.columns)
if "Helpful Tips" in columns and "Nutrition and caloric content" in columns:
    columns.remove("Nutrition and caloric content")
    insert_index = columns.index("Helpful Tips") + 1
    columns.insert(insert_index, "Nutrition and caloric content")
    df_mer = df_mer[columns]

final_output = "UK_24.csv"
df_mer.to_csv(final_output, index=False)
final_output

'UK_24.csv'

In [95]:
df_mer.columns = df_mer.columns.str.strip().str.lower()
df_UK3.columns = df_UK3.columns.str.strip().str.lower()

# Объединяем по url (берём нужные колонки из df_UK3)
df_merged1 = df_mer.drop(columns=['sizes', 'recommended for'], errors='ignore') \
    .merge(df_UK3[['url', 'sizes', 'recommended for']], on='url', how='left')

df_merged1.to_csv("UK_234.csv", index=False)

In [97]:
df_UK1.columns = df_UK1.columns.str.strip().str.lower()

df_final = df_merged1.drop(columns=['need/preference', 'alternate product recommendation', 'flavour', 'breed size', 'lifestage', 'food form'], errors='ignore') \
    .merge(df_UK1[['url', 'need/preference', 'alternate product recommendation', 'flavour', 'breed size', 'lifestage', 'food form']], on='url', how='left')

df_final.to_csv("UK_final.csv", index=False)

In [98]:
df_final

Unnamed: 0,url,product title,product description,key benefits,ingredients,helpful tips,nutrition and caloric content,moisture,protein,fat,...,starch,bcaa total,sizes,recommended for,need/preference,alternate product recommendation,flavour,breed size,lifestage,food form
0,https://www.hillspet.co.uk/dog-food/pd-canine-...,k/d Kidney Care Wet Dog Food Original Can,k/d Kidney Care Wet Dog Food Original Can,"When dogs have damage to kidney tissues, the k...",ActivBiome+ Kidney Defense prebiotic blend (0....,Feed only this food. It is recommended that ad...,"338.2 kcal (370 g) can, 182.8 kcal (200 g) can",73 %,4.2 %,6.8 %,...,,,"180g, 350g",,Kidney Care,Liver Care,Original,,,
1,https://www.hillspet.co.uk/dog-food/pd-canine-...,l/d Wet Dog Food Original,l/d Wet Dog Food Original,The liver is your dog's largest internal organ...,"COMPOSITION: Cereals, oils and fats, vegetable...",Ask your veterinarian for specific feeding ins...,338.2 kcal (370 g) can,74 %,4.9 %,6.1 %,...,,,370g,,Liver Care,,Original,"Small Breed, Medium Breed, Large Breed",Adult,Wet Food
2,https://www.hillspet.co.uk/dog-food/sp-canine-...,Adult Large Breed Dry Dog Food with Lamb & Rice,Adult Large Breed Dry Dog Food with Lamb & Rice,Hill's Science Plan Large Breed Adult dog food...,"Maize, lamb meal, soybean meal, maize gluten m...",Adjust feeding amounts as necessary to maintai...,3701 kcal/kg (370 kcal/100g),8 %,21.3 %,14.2 %,...,,,14kg,Large breed adult dogs 1 - 5 years of age.,,,with Lamb & Rice,Large Breed,Adult,Dry Food
3,https://www.hillspet.co.uk/dog-food/sp-canine-...,Medium Mature Adult 7+ Dog Food with Chicken D...,Medium Mature Adult 7+ Dog Food with Chicken D...,HILL'S SCIENCE PLAN Mature Adult Medium dog fo...,"COMPOSITION: Maize, wheat, chicken and turkey ...",Adjust feeding amounts as necessary to maintai...,3682 kcal/kg (368 kcal/100g),9 %,17.1 %,14 %,...,,,"2.5kg, 14kg, 18kg",Mature Adult dogs 7+ years of age.,,,with Chicken,Medium Breed,Senior,Dry Food
4,https://www.hillspet.co.uk/dog-food/pd-canine-...,t/d Mini Dog Food,t/d Mini Dog Food,Hill's Prescription Diet t/d Small Bites Dry D...,"COMPOSITION: Cereals, meat and animal derivati...",Feed only this food. It is recommended that a ...,3533 kcal/kg (353 kcal/100g),8 %,16.1 %,15.2 %,...,,,3kg,,Dental Care,Digestive Care,with Chicken,"Small Breed, Medium Breed, Large Breed","Adult, Senior",Dry Food
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,https://www.hillspet.co.uk/dog-food/sp-canine-...,Perfect Digestion Adult Large Breed Dry Dog Fo...,Perfect Digestion Adult Large Breed Dry Dog Fo...,Hill's Science Plan PERFECT DIGESTION LARGE BR...,"COMPOSITION: Wheat, maize, chicken and turkey ...",Feed the daily amount shown; adjust amount to ...,3524 kcal/kg (352 kcal/100g),8 %,18.4 %,11.7 %,...,,,14kg,Adult dogs 1 - 5 years to help keep their dige...,,,with Chicken and Brown Rice,Large Breed,Adult,Dry Food
131,https://www.hillspet.co.uk/dog-food/sp-canine-...,Puppy Wet Dog Food with Chicken,Puppy Wet Dog Food with Chicken,Your puppy will love the taste of HILL'S SCIEN...,COMPOSITION: Meat and animal derivatives (chic...,For puppies/pregnant bitches feed 2 to 4 meals...,367.5 kcal (370 g) can,76 %,6.8 %,5.3 %,...,,,370g,Puppies up to 1 year old and pregnant or nursi...,,,Chicken,"Small Breed, Medium Breed, Large Breed",Puppy/pregnant/nursing,Wet Food
132,https://www.hillspet.co.uk/dog-food/pd-canine-...,Derm Complete Puppy Dry Dog Food Original,Derm Complete Puppy Dry Dog Food Original,Hill's Prescription Diet Derm Complete Puppy D...,"COMPOSITION: Cereals, eggs and egg derivatives...",Feed only this food. It is recommended that a ...,3720 kcal/kg (372 kcal/100g),8 %,23.4 %,16.2 %,...,,,12kg,,"Skin Care, Food Sensitivities",Digestive Care,Original,,Puppy,Dry Food
133,https://www.hillspet.co.uk/dog-food/ve-canine-...,Multi-Benefit Adult Wet Dog Food,Multi-Benefit Adult Wet Dog Food,Available exclusively through your veterinaria...,"COMPOSITION: Chicken (17%), pork, carrots, ric...",Adjust feeding as necessary to maintain optimu...,292 kcal (363 g) can,,6.3 %,3.3 %,...,,,363g,Adult medium breed dogs 1-6 years of age.,,,with Tender Chicken & Vegetables,Medium Breed,Adult,Wet Food


In [105]:
df_final.columns

Index(['url', 'product title', 'product description', 'key benefits',
       'ingredients', 'helpful tips', 'nutrition and caloric content',
       'moisture', 'protein', 'fat', 'carbohydrate (nfe)', 'crude fibre',
       'crude ash', 'soluble fibre', 'calcium', 'phospohorus', 'potassium',
       'sodium', 'magnesium', 'taurine', 'vitamin a', 'vitamin c', 'vitamin d',
       'vitamin e', 'thiamine (vitamin b1)', 'riboflavin (vitamin b2)',
       'niacin (vitamin b3)', 'pyridoxine (vitamin b6)',
       'pantothenic acid (vitamin b5)', 'folic acid (vitamin b9)',
       'vitamin b12 (cobalamin)', 'biotin (vitamin b7)', 'dha', 'epa',
       'epa + dha', 'omega-3-fatty acids', 'omega-6-fatty acids',
       'beta-carotene', 'l-carnitine', 'glucosamine', 'chondroitin sulfate',
       'total dietary fibre', 'iron', 'copper', 'zinc', 'l-arginine',
       'glutamine + glutamate', 'linoleic acid', 'essential fatty acids',
       'chloride', 'sulphur', 'hydroxyproline', 'l-lysine',
       'dl-meth

In [None]:
df_reorder.columns

Index(['url', 'product title', 'product description', 'food form', 'flavor',
       'sizes', 'recommended for', 'ingredients', 'feeding tips',
       'key features', 'caloric content', 'protein', 'fat',
       'carbohydrate / nfe', 'crude fiber', 'calcium', 'phosphorus',
       'potassium', 'sodium', 'magnesium', 'vitamin c', 'vitamin e',
       'total omega-3 fa', 'total omega-6 fa', 'ash', 'vitamin a', 'vitamin d',
       'beta-carotene', 'epa', 'carnitine', 'dha', 'glucosamine',
       'chondroitin sulfate', 'total dietary fiber', 'lysine', 'taurine',
       'linoleic acid', 'chloride', 'bcaa total', 'glutamine + glutamate',
       'iron', 'copper', 'zinc', 'vitamin k', 'alpha-linolenic acid (ala)'],
      dtype='object')

In [107]:
rename_map = {
    'flavor': 'flavour',
    'feeding tips': 'helpful tips',
    'key features': 'key benefits',
    'caloric content': 'nutrition and caloric content',
    'crude fiber': 'crude fibre',
    'carbohydrate / nfe': 'carbohydrate (nfe)',
    'ash': 'crude ash',
    'phosphorus': 'phospohorus',  
    'carnitine': 'l-carnitine',
    'lysine': 'l-lysine',
    'total omega-3 fa': 'omega-3-fatty acids',
    'total omega-6 fa': 'omega-6-fatty acids'
}

In [108]:
df_reorder.rename(columns=rename_map, inplace=True)

combined_df = pd.concat([df_final, df_reorder], ignore_index=True, sort=False)

In [109]:
combined_df.to_csv("combined_dataset_clean.csv", index=False, encoding='utf-8')