## In this notebook the main aim is end up with a dataframe containing all of the main information that we are concerned with regarding a specific make(s) and model(s) of a vehicle

## In the next notebook, the taken information will be taken and analyzed, in order to create find the best deals and to be able to accuratly price a car based on its characteristics

In [2]:
# Imports
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select

import random
import time
from datetime import datetime
from bs4 import BeautifulSoup 
import pandas as pd
import numpy as np
import re
from random import randrange
from tqdm import tqdm
import urllib.parse

In [3]:
def get_model_n_make_links():
    chrome_options = webdriver.ChromeOptions()
    prefs = {"profile.managed_default_content_settings.images": 2}
    chrome_options.add_experimental_option("prefs", prefs)

    # Open Chrome in Incognito mode -> Otherwise mobile.de is likely to block you from accessing the website
    chrome_options.add_argument('--incognito')

    driver_path = ChromeDriverManager().install()
    service = Service(driver_path)
    driver = webdriver.Chrome(service=service, options=chrome_options)

    starting_link_to_scrape = "https://www.mobile.de/?lang=en"
    driver.get(starting_link_to_scrape)
    time.sleep(1)
    base_source = driver.page_source
    base_soup = BeautifulSoup(base_source, 'html.parser')

    make_list = base_soup.findAll('div', {'class': 'jtVMt'})[0]
    make_list.findAll("option")

    ids = []
    makes = []

    for option in make_list.findAll("option"):
        value = option.get('value') # get value of xml
        text = option.text # get text of xml

        if text != "Any":
            ids.append(value)
            makes.append(text)

    base_df = pd.DataFrame({
        'id1': ids,
        'car_make': makes
    })
    base_df = base_df.drop_duplicates()
    base_df = base_df[~base_df['car_make'].isin(['Any', 'Other', ''])]

    car_base_model_data = pd.DataFrame()

    for car_make_id in tqdm(base_df["id1"], "Progress: "):
        # Locate the dropdown element
        dropdown_element = driver.find_element("xpath", "//select[@data-testid='qs-select-make']")

        # Create a Select object for the dropdown
        select_dropdown = Select(dropdown_element)

        select_dropdown.select_by_value(str(car_make_id)) 

        time.sleep(3) # wait for the page to load
        
        base_source = driver.page_source
        base_soup = BeautifulSoup(base_source, 'html.parser')
        
        model_list = base_soup.findAll('div', {'class': 'jtVMt'})[1]
        
        ids_model = []
        model = []

        for option in model_list.findAll("option"):
            value = option.get('value') # get value of xml
            text = option.text # get text of xml

            if text != "Any":
                ids_model.append(value)
                model.append(text)
                
        df_model = pd.DataFrame({
        'id2': ids_model,
        'car_model': model,
        'car_make': car_make_id 
        }) 
        
        car_base_model_data = pd.concat([car_base_model_data, df_model], ignore_index=True)
        
    driver.close()

    merged_df = car_base_model_data.merge(base_df, left_on='car_make', right_on='id1')
    merged_df = merged_df.drop(["car_make_x"], axis = 1)
    merged_df.rename(columns = {'id2':'model_id', 'id1':'make_id', 'car_make_y':'car_make'}, inplace = True)
    # Create the link responsible for opening the landing page for a specific make and model 
    merged_df["link"] = ("https://suchen.mobile.de/fahrzeuge/search.html?dam=0&isSearchRequest=true&ms=" + merged_df["make_id"] + "%3B" + merged_df["model_id"] + "%3B%3B&ref=quickSearch&sb=rel&vc=Car")

    return merged_df

In [4]:
df_make_model_links = get_model_n_make_links()
display(df_make_model_links)

Progress: 100%|███████████████████████████████| 131/131 [06:45<00:00,  3.10s/it]


Unnamed: 0,model_id,car_model,make_id,car_make,link
0,2,100,1900,Audi,https://suchen.mobile.de/fahrzeuge/search.html...
1,3,200,1900,Audi,https://suchen.mobile.de/fahrzeuge/search.html...
2,5,80,1900,Audi,https://suchen.mobile.de/fahrzeuge/search.html...
3,6,90,1900,Audi,https://suchen.mobile.de/fahrzeuge/search.html...
4,25,A1,1900,Audi,https://suchen.mobile.de/fahrzeuge/search.html...
...,...,...,...,...,...
2380,4,MF 30,25650,Wiesmann,https://suchen.mobile.de/fahrzeuge/search.html...
2381,6,MF 35,25650,Wiesmann,https://suchen.mobile.de/fahrzeuge/search.html...
2382,7,MF 4,25650,Wiesmann,https://suchen.mobile.de/fahrzeuge/search.html...
2383,8,MF 5,25650,Wiesmann,https://suchen.mobile.de/fahrzeuge/search.html...


### Let's build this particular scraper with Porche 911 in mind, more specifically the 997 generation 

In [7]:
porsche_997_link = df_make_model_links["link"].iloc[2012]
# this link should be taking us directly to the porsche 997 first page of ads
print(porsche_997_link)

https://suchen.mobile.de/fahrzeuge/search.html?dam=0&isSearchRequest=true&ms=20100%3B17%3B%3B&ref=quickSearch&sb=rel&vc=Car


### However, let's make further narrow down the model version, which ads we will scrape later on

In [8]:
def generate_url(base_url="", 
                 start_year=None, end_year=None, 
                 max_mileage=None, 
                 min_power=None, max_power=None, 
                 fuel_types=None):
    
    params = []

    # For year range
    if start_year or end_year:
        year_range = f"{start_year if start_year else ''}%3A{end_year if end_year else ''}"
        params.append(f"fr={year_range}")

    # For max mileage
    if max_mileage:
        params.append(f"ml=%3A{max_mileage}")

    # For power range
    power_range = []
    if min_power:
        power_range.append(str(min_power))
    power_range.append('%3A')
    if max_power:
        power_range.append(str(max_power))
    if power_range:
        params.append(f"pw={''.join(power_range)}")

    # For fuel types
    if fuel_types:
        for fuel_type in fuel_types:
            params.append(f"ft={fuel_type}")

    # The default parameters, ensuring they are not duplicated
    default_params = ["ref=quickSearch", "sb=rel", "vc=Car"]
    for param in default_params:
        if param not in base_url:
            params.append(param)
    
    url = base_url + "&" + "&".join(params)
    return url

### Let's take all the ads for the S/4S (which had more than 295kW of power) models of the 997.2 generation, which took production between 2009 and 2011  

In [10]:
p997_carreraS = generate_url(base_url= porsche_997_link, 
                 start_year=2009, end_year=2011, 
                 max_mileage=None, 
                 min_power=None, max_power=295, 
                 fuel_types=None)
print(p997_carreraS)

https://suchen.mobile.de/fahrzeuge/search.html?dam=0&isSearchRequest=true&ms=20100%3B17%3B%3B&ref=quickSearch&sb=rel&vc=Car&fr=2009%3A2011&pw=%3A295


### Given that we are in possession of the landing page for the specific model, lets collect all the links for the individual ads. so that we could access them later on and scrape the information from them

### For this we would need to create a loop, which goes over each individual page with ads for the make & model in mind and collect the relevant links

In [14]:
def page_ad_scraper(pages_to_go_over, make_model_input_link="", ad_links=[], id_ad_all=[]):
    store_pages_error = []
    for ad_page in tqdm(pages_to_go_over):
        chrome_options = webdriver.ChromeOptions()
        prefs = {"profile.managed_default_content_settings.images": 2}
        chrome_options.add_experimental_option("prefs", prefs)
        # Open Chrome in incognito mode
        chrome_options.add_argument('--incognito')

        driver_path = ChromeDriverManager().install()
        service = Service(driver_path)
        driver = webdriver.Chrome(service=service, options=chrome_options)

        single_page_link = make_model_input_link + "&pageNumber=" + str(ad_page)
        driver.get(single_page_link)
        time.sleep(5)
        base_source = driver.page_source
        single_page_soup = BeautifulSoup(base_source, 'html.parser')

        x = len(ad_links) # before loop count
        links = single_page_soup.find_all(lambda tag: tag.name == 'a' and tag.get('href', '').startswith('/fahrzeuge/details.html'))
        for element in links:
            ad_link = "https://suchen.mobile.de" + (element["href"])
            #print(ad_link)
            ad_links.append(ad_link)

        y = len(ad_links) # after loop count 
        print("page:", ad_page, "added ads", y - x)

    driver.close()

    for link in ad_links:
        match = re.search(r'id=(\d+)', link)
        if match:
            id_ad_all.append(match.group(1))

    return ad_links, id_ad_all, store_pages_error

In [15]:
 def scrape_links_one_make_one_model(df, make_model_input_link = "", sleep = 2):
    chrome_options = webdriver.ChromeOptions()
    prefs = {"profile.managed_default_content_settings.images": 2}
    chrome_options.add_experimental_option("prefs", prefs)

    # Open Chrome in incognito mode
    chrome_options.add_argument('--incognito')

    driver_path = ChromeDriverManager().install()
    service = Service(driver_path)
    driver = webdriver.Chrome(service=service, options=chrome_options)
    
    # open the landing page 
    driver.get(make_model_input_link)
    time.sleep(sleep)
    base_source = driver.page_source
    base_soup = BeautifulSoup(base_source, 'html.parser')

    # get the number of pages 
    elements = base_soup.find_all('span', {'class': 'eseJd'})
    pages = []
    
    try:
        for elem in elements:
            elem = str(elem)
            match = re.search(r'>(\d+)<', elem)
            if match:
                extracted_number = match.group(1)
            else:
                extracted_number = None
            
            pages.append(extracted_number)
        print("max_number_pages:", pages[-2])
        # get the pages number as an integer    
    except:
        pages = [1]
        print("max_number_pages:", 1)

    driver.close()
    pages_number = int(pages[-2])

    ad_links = []
    id_ad_all = []

    pages_all = list(range(1, pages_number + 1))
    random.shuffle(pages_all)

    ad_links, id_ad_all, errors = page_ad_scraper(pages_all, make_model_input_link, ad_links, id_ad_all)
    while len(errors) != 0:
        ad_links, id_ad_all, errors = page_ad_scraper(errors, make_model_input_link, ad_links, id_ad_all)
        

    ad_dict = {"links_ads":ad_links,
               "id_ad":id_ad_all}
    
    ads_df = pd.DataFrame(ad_dict)
    ads_df = ads_df.drop_duplicates(subset='id_ad')
    ads_df.reset_index(drop = True, inplace=True)
    
    # get a dislay of amount of ads 
    print("Saved ads:", ads_df.shape[0])
    
    # get time date 
    now = datetime.now() 
    datetime_string = str(now.strftime("%d/%m/%Y %H:%M:%S"))
    
    ads_df['download_date_time'] = datetime_string
    
    # get the model and make of the car from the link of the advertisment using the keys from merged_df 
    for i in ads_df["links_ads"]:
        parsed_url = urllib.parse.urlparse(i)

        query_params = urllib.parse.parse_qs(parsed_url.query)

        ms_value = query_params.get('ms', [])

        if ms_value:
            ms_values = ms_value[0].split(';')

            if len(ms_values) >= 2:
                ads_df["make_id"] = ms_values[0]
                ads_df["model_id"] = ms_values[1]

    ads_df["model_id"] = ads_df["model_id"].astype(object)
    ads_df["make_id"] = ads_df["make_id"].astype(int)
    df["make_id"] = df["make_id"].astype(int)
    
    # merge together -> source of info merged_df 
    merged_data = pd.merge(ads_df, df, on=['make_id', 'model_id'], how='left')
    
    # reoder everything
    merged_data = merged_data[['car_make', 'make_id', 'car_model', 'model_id', 'link', 'links_ads', 'download_date_time', 'id_ad']]
    
    # drop landing link 
    merged_data = merged_data.drop(["link"], axis = 1)

    return merged_data

In [16]:
carreraS_997_ads = scrape_links_one_make_one_model(df_make_model_links, p997_carreraS)

max_number_pages: 8


 12%|█████▋                                       | 1/8 [00:08<01:01,  8.74s/it]

page: 7 added ads 20


 25%|███████████▎                                 | 2/8 [00:17<00:52,  8.68s/it]

page: 2 added ads 20


 38%|████████████████▉                            | 3/8 [00:26<00:44,  8.86s/it]

page: 6 added ads 20


 50%|██████████████████████▌                      | 4/8 [00:35<00:35,  8.86s/it]

page: 1 added ads 24


 62%|████████████████████████████▏                | 5/8 [00:43<00:26,  8.75s/it]

page: 3 added ads 20


 75%|█████████████████████████████████▊           | 6/8 [00:52<00:17,  8.75s/it]

page: 8 added ads 11


 88%|███████████████████████████████████████▍     | 7/8 [01:01<00:08,  8.73s/it]

page: 5 added ads 20


100%|█████████████████████████████████████████████| 8/8 [01:10<00:00,  8.77s/it]

page: 4 added ads 20
Saved ads: 151





In [17]:
display(carreraS_997_ads.head())

Unnamed: 0,car_make,make_id,car_model,model_id,links_ads,download_date_time,id_ad
0,Porsche,20100,997,17,https://suchen.mobile.de/fahrzeuge/details.htm...,26/11/2023 20:20:45,380043353
1,Porsche,20100,997,17,https://suchen.mobile.de/fahrzeuge/details.htm...,26/11/2023 20:20:45,376392454
2,Porsche,20100,997,17,https://suchen.mobile.de/fahrzeuge/details.htm...,26/11/2023 20:20:45,367309574
3,Porsche,20100,997,17,https://suchen.mobile.de/fahrzeuge/details.htm...,26/11/2023 20:20:45,378047641
4,Porsche,20100,997,17,https://suchen.mobile.de/fahrzeuge/details.htm...,26/11/2023 20:20:45,342406831


### Now let's go over each of the individual ads and scrape all the relevant information that we are concerned and interested in

In [19]:
ids_key = ['mileage', 'firstRegistration', 'power', 'transmission', 'numberOfPreviousOwners', 'fuel']
ids_to_extract = [
    "damageCondition",
    "category",
    "mileage",
    "cubicCapacity",
    "power",
    "fuel",
    "envkv.consumption",
    "envkv.emission",
    "envkv.petrolType",
    "doorCount",
    "transmission",
    "emissionsSticker",
    "firstRegistration",
    "constructionYear",
    "numberOfPreviousOwners",
    "climatisation",
    "manufacturerColorName",
    "color",
    "interior"]

In [20]:
def individual_ad_scraper_errors(ads, key_info, misc_info, sleep=7, max_retries=2):
    all_ads_info = pd.DataFrame()
    errored_ads = []
    

    def scrape_ad(ad, sleep_timer):
        try:
            # 1 open a window to get the "soup"
            chrome_options = webdriver.ChromeOptions()
            prefs = {"profile.managed_default_content_settings.images": 2}
            chrome_options.add_experimental_option("prefs", prefs)

            # Open Chrome in incognito mode
            chrome_options.add_argument('--incognito')

            driver_path = ChromeDriverManager().install()
            service = Service(driver_path)
            driver = webdriver.Chrome(service=service, options=chrome_options)

            # open the landing page 
            driver.get(ad)
            time.sleep(sleep_timer)                                       
            base_source = driver.page_source
            ad_soup = BeautifulSoup(base_source, 'html.parser')

            # 2 extract key info 
            ad_key_properties = {}
            for i_k in key_info:                                      
                label_text = ad_soup.find('div', {'id': f'{i_k}-l'}).find('strong').text if ad_soup.find('div', {'id': f'{i_k}-l'}) else None
                value_text = ad_soup.find('div', {'id': f'{i_k}-v'}).get_text(strip=True) if ad_soup.find('div', {'id': f'{i_k}-v'}) else None

                if label_text and value_text:
                    ad_key_properties[label_text] = value_text

            # 3 extract non-key info 
            ad_not_so_important_properties = {}                
            for i_n in misc_info:                            
                label_element = ad_soup.find('div', {'id': f'{i_n}-l'})

                if label_element:
                    label_text = label_element.find('strong').text

                    value_element = ad_soup.find('div', {'id': f'{i_n}-v'})
                    if value_element:
                        value_text = value_element.get_text(strip=True)

                    ad_not_so_important_properties[label_text] = value_text

            # 4 extract price 
            price_element = ad_soup.find('span', {'data-testid': 'prime-price'})
            price = price_element.text.replace('\xa0', ' ').strip()

            # 5 extract seller info 
            seller_element = ad_soup.find('a', {'class': 'seller-title'})
            seller_name = seller_element.text.strip()

            # 6 extract ad description
            #description_element = ad_soup.find('div', {'class': 'description'})
            #vehicle_description = description_element.text.strip()

            driver.close()

            # creating a dataframe containing everything relevant for the advertisment
            combined_ad_dict = {**ad_key_properties, **ad_not_so_important_properties, 'Price': price, 'Dealer': seller_name, 'Ad_link': ad} 
            combined_ad_df = pd.DataFrame([combined_ad_dict])

            return combined_ad_df
        except Exception as e:
            print(f"Error scraping ad {ad}. Error: {e}")
            driver.close()
            return None

    for ad in tqdm(ads):
        ad_df = scrape_ad(ad, sleep)
        if ad_df is not None:
            all_ads_info = pd.concat([all_ads_info, ad_df], ignore_index=True)
        else:
            errored_ads.append(ad)

    retries = 0
    while errored_ads and retries < max_retries:
        retries += 1
        sleep += 4 
        new_errored_ads = []
        for ad in tqdm(errored_ads):
            ad_df = scrape_ad(ad, sleep)
            if ad_df is not None:
                all_ads_info = pd.concat([all_ads_info, ad_df], ignore_index=True)
            else:
                new_errored_ads.append(ad)
        errored_ads = new_errored_ads

    return all_ads_info, errored_ads


In [21]:
carreraS_977_full, errored_ads = individual_ad_scraper_errors(carreraS_997_ads["links_ads"], ids_key, ids_to_extract)

  1%|▎                                          | 1/151 [00:10<25:54, 10.36s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=380043353&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=7&pw=%3A295&ref=srp&refId=88a9c63e-950f-1558-9fc1-cca59e925faf&sb=rel&searchId=88a9c63e-950f-1558-9fc1-cca59e925faf&vc=Car. Error: 'NoneType' object has no attribute 'text'


 13%|█████▎                                    | 19/151 [03:13<22:23, 10.18s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=368853954&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=7&pw=%3A295&ref=srp&refId=88a9c63e-950f-1558-9fc1-cca59e925faf&sb=rel&searchId=88a9c63e-950f-1558-9fc1-cca59e925faf&vc=Car. Error: 'NoneType' object has no attribute 'text'


 23%|█████████▍                                | 34/151 [05:46<19:53, 10.20s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=376495076&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=2&pw=%3A295&ref=srp&refId=6198d15e-739c-eec0-402b-9ddcb60400e6&sb=rel&searchId=6198d15e-739c-eec0-402b-9ddcb60400e6&vc=Car. Error: 'NoneType' object has no attribute 'text'


 34%|██████████████▏                           | 51/151 [08:40<17:06, 10.26s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=358029118&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=6&pw=%3A295&ref=srp&refId=572bdb20-3aef-d80f-9abc-1a836ba7cd70&sb=rel&searchId=572bdb20-3aef-d80f-9abc-1a836ba7cd70&vc=Car. Error: 'NoneType' object has no attribute 'text'


 46%|███████████████████▍                      | 70/151 [11:56<13:38, 10.11s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=376721892&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=1&pw=%3A295&ref=srp&refId=967bcd39-8c23-1ad8-77f0-fe48611cccda&sb=rel&searchId=967bcd39-8c23-1ad8-77f0-fe48611cccda&vc=Car. Error: 'NoneType' object has no attribute 'text'


 48%|████████████████████                      | 72/151 [12:16<13:13, 10.05s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=377950740&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=1&pw=%3A295&ref=srp&refId=967bcd39-8c23-1ad8-77f0-fe48611cccda&sb=rel&searchId=967bcd39-8c23-1ad8-77f0-fe48611cccda&vc=Car. Error: 'NoneType' object has no attribute 'text'


 54%|██████████████████████▊                   | 82/151 [13:58<11:38, 10.13s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=379081477&action=eyeCatcher&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=1&pw=%3A295&ref=srp&refId=967bcd39-8c23-1ad8-77f0-fe48611cccda&sb=rel&searchId=967bcd39-8c23-1ad8-77f0-fe48611cccda&vc=Car. Error: 'NoneType' object has no attribute 'text'


 60%|█████████████████████████                 | 90/151 [15:20<10:24, 10.24s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=377489632&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=3&pw=%3A295&ref=srp&refId=cd7682aa-07c1-d4ba-5b1c-28258fd96c82&sb=rel&searchId=cd7682aa-07c1-d4ba-5b1c-28258fd96c82&vc=Car. Error: 'NoneType' object has no attribute 'text'


 60%|█████████████████████████▎                | 91/151 [15:30<10:13, 10.22s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=362097191&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=3&pw=%3A295&ref=srp&refId=cd7682aa-07c1-d4ba-5b1c-28258fd96c82&sb=rel&searchId=cd7682aa-07c1-d4ba-5b1c-28258fd96c82&vc=Car. Error: 'NoneType' object has no attribute 'text'


 62%|██████████████████████████▏               | 94/151 [16:01<09:42, 10.21s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=373454991&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=3&pw=%3A295&ref=srp&refId=cd7682aa-07c1-d4ba-5b1c-28258fd96c82&sb=rel&searchId=cd7682aa-07c1-d4ba-5b1c-28258fd96c82&vc=Car. Error: 'NoneType' object has no attribute 'text'


 86%|███████████████████████████████████▎     | 130/151 [22:12<03:34, 10.22s/it]

Error scraping ad https://suchen.mobile.de/fahrzeuge/details.html?id=374288795&dam=0&fr=2009%3A2011&isSearchRequest=true&ms=20100%3B17%3B%3B&pageNumber=5&pw=%3A295&ref=srp&refId=f2bd8bb2-a611-8af9-df04-059412b354a6&sb=rel&searchId=f2bd8bb2-a611-8af9-df04-059412b354a6&vc=Car. Error: 'NoneType' object has no attribute 'text'


100%|█████████████████████████████████████████| 151/151 [25:48<00:00, 10.25s/it]
100%|███████████████████████████████████████████| 11/11 [02:36<00:00, 14.20s/it]


In [27]:
if carreraS_977_full.shape[0] == carreraS_997_ads.shape[0]:
    print("All ads have been scraped successfuly!")
else:
    print("Not all ads have been scraped succesfuly, check the second dataframe")

All ads have been scraped successfuly!


In [29]:
carreraS_977_full.head()

Unnamed: 0,Kilometerstand,Erstzulassung,Leistung,Getriebe,Anzahl der Fahrzeughalter,Kraftstoffart,Fahrzeugzustand,Kategorie,Hubraum,Verbrauch,...,Klimatisierung,Farbe,Innenausstattung,Price,Dealer,Ad_link,Umweltplakette,Farbe (Hersteller),Baujahr,Zugr.-lgd. Treibstoffart
0,148.925 km,01/2009,283 kW (385 PS),Automatik,3.0,Benzin,Unfallfrei,Sportwagen / Coupé,3.800 cm³,"ca. 11,0 l/100km (kombiniert)ca. 16,5 l/100km ...",...,2-Zonen-Klimaautomatik,Schwarz Metallic,"Vollleder, Schwarz",59.900 €,Privatanbieter,https://suchen.mobile.de/fahrzeuge/details.htm...,,,,
1,188.997 km,08/2009,283 kW (385 PS),Automatik,,Benzin,,Sportwagen / Coupé,3.800 cm³,"ca. 10,7 l/100km (kombiniert)ca. 15,8 l/100km ...",...,2-Zonen-Klimaautomatik,Schwarz Metallic,"Vollleder, Schwarz",49.990 €,Autozentrum Rheinland,https://suchen.mobile.de/fahrzeuge/details.htm...,4 (Grün),Schwarz,,
2,22.500 km,12/2009,283 kW (385 PS),Automatik,2.0,Benzin,Unfallfrei,Cabrio / Roadster,3.800 cm³,"ca. 11,0 l/100km (kombiniert)ca. 16,5 l/100km ...",...,Klimaautomatik,Weiß,"Vollleder, Schwarz",95.000 €,Hans Schlund Automobile,https://suchen.mobile.de/fahrzeuge/details.htm...,4 (Grün),Carraraweiß,,
3,82.295 km,07/2011,283 kW (385 PS),Schaltgetriebe,,Benzin,Unfallfrei,Cabrio / Roadster,3.800 cm³,"ca. 11,0 l/100km (kombiniert)ca. 16,5 l/100km ...",...,Klimaautomatik,Rot Metallic,"Vollleder, Schwarz",85.999 €,GDCauto - De Clercq Geert NV,https://suchen.mobile.de/fahrzeuge/details.htm...,,RUBY RED,,
4,85.000 km,10/2010,283 kW (385 PS),Automatik,,Benzin,Unfallfrei,Cabrio / Roadster,3.800 cm³,"ca. 10,7 l/100km (kombiniert)ca. 16,1 l/100km ...",...,Klimaautomatik,Grau Metallic,"Vollleder, Braun",82.250 €,Nordic Drive ApS,https://suchen.mobile.de/fahrzeuge/details.htm...,,Meteorgraumetallic,,


In [31]:
# Save the scraped ads data, so that it can be utalized in the second notebook
carreraS_977_full.to_csv("porsche_997_facelift_S.csv")

In [33]:
# Save the data with the links, so that we can make an interactive plot in the second notebook
carreraS_997_ads.to_csv("porsche_997_facelift_S_links.csv")