In [1]:
import requests
import hashlib
from datetime import datetime
import time
import random
import os
import csv
from bs4 import BeautifulSoup
import re 
import pandas as pd

# Parsing functions

In [2]:
def parse_listing_html(text,file_name):
    #define the html text
    page_html = BeautifulSoup(text, 'html.parser')
    
    if deleted_listing(page_html):
        print('listing {} has been deleted'.format(file_name))
        return
    
    if page_html.find('meta',attrs={'property':'og:url'}) is None: 
        return
    else:
        listing_url = page_html.find('meta',attrs={'property':'og:url'}).attrs['content']
    try:
        listing_id = page_html.findAll('p', class_='postinginfo')[1].text[9:]

        listing_title = page_html.find('span',attrs={'id':'titletextonly'}).text
        
        listing_nh = page_html.find('small').text[2:-1]
        
        listing_city = page_html.find('meta',attrs={'name':'geo.placename'}).attrs['content']
        
        listing_date = page_html.find('time', class_='date timeago')['datetime']
        
        if page_html.find('span', class_='price') is None: 
            return
        else:
            listing_price = int(page_html.find('span', class_='price').text.strip('$,').replace(",",""))

        
        listing_bedrooms = page_html.find('span', class_='shared-line-bubble').text.split('BR')[0]
        
        listing_bathrooms = page_html.find('span', class_='shared-line-bubble').text.split('/')[1][:-2]
        
        # Extract sqft info from bubbles class and use regex to find the digits ending in ft2
        bubbles_sqft = page_html.findAll('span', class_='shared-line-bubble')
        listing_sqft = None
        if len(bubbles_sqft) >= 2:
            x = re.search("[0-9]+ft2$", bubbles_sqft[1].text)
            if x:
                 listing_sqft = x.string[:-3]

        listing_address_element = page_html.find('div', class_='mapaddress')
        listing_address = listing_address_element and listing_address_element.text

        listing_info = page_html.findAll('p', class_='attrgroup')[1].text

        listing_body = page_html.find('section',attrs={'id':'postingbody'}).text[30:]

        listing_first_image = page_html.find('div',class_='slide first visible').img['src']
    except Exception as e:
        print('Error parsing url: {} file: {}'.format(listing_url, file_name))
        raise e

    return {
        'listing_title': listing_title,
        'listing_nh': listing_nh,
        'listing_city':listing_city,
        'listing_date':listing_date,
        'listing_price': listing_price,
        'listing_bedrooms':listing_bedrooms,
        'listing_bathrooms':listing_bathrooms,
        'listing_sqft':listing_sqft,
        'listing_address':listing_address,
        'listing_info':listing_info,
        'listing_body':listing_body,
        'listing_id':listing_id,
        'listing_url':listing_url,
        'listing_first_image':listing_first_image
       }


def deleted_listing(page_html):
    h2 = page_html.find('h2')
    return h2 and h2.text == ' This posting has been deleted by its author. '

# Create df from dictionary

In [3]:
list_of_dicts = []
path = "/Users/pandabear/springboard/CapstoneTwoProject/data/raw"
dir_list = os.listdir(path)


### Testing parsing for one folder scraped by playwright
# path_testing = '/Users/pandabear/springboard/CapstoneTwoProject/data/raw/2022-08-17T22:01:42'
# file_list = os.listdir(path_testing)
# for file in file_list:
#     if len(file) > 15:
#         with open(path_testing + '/' + file, 'r') as f:
#             text = f.read()
#             parsed_data = parse_listing_html(text, file)
#             if parsed_data:
#                 list_of_dicts.append(parsed_data)

# listing_df = pd.DataFrame(list_of_dicts)
### Testing code


for folder in dir_list:
    if not folder.startswith('.'):
        path_html = path + '/' + folder
        file_list = os.listdir(path_html)
#         print(file_list)
        
        for file in file_list:
            if len(file) > 15:
                with open(path_html + '/' + file, 'r') as f:
                    text = f.read()
                    parsed_data = parse_listing_html(text, file)
                    if parsed_data:
                        list_of_dicts.append(parsed_data)
#                 print('Number of parsing errors: {}'.format(count_parsing_error))
        new_listing_df = pd.DataFrame(list_of_dicts)


listing a660d34c3d055751c7c75cf53c5ec04cb7ec75343f06972eebdef11309cf1117 has been deleted
listing 48fc42305b6e0e673ade36447492f663be18398e23158fb50b3e94ea2427091c has been deleted
listing 18198c7320b10e1720c5a2812c2e15849aad58ad73e5ede4e358ce54e11d018b has been deleted
listing b75bf9e6ee658dfd32c6ece25e269bf2daa73584b02bdfa911dd5ae78cb5df10 has been deleted
listing 6f88f71f66764367d67d00a72bd77f3b978628add5a39b219dbb616e19b2ef99 has been deleted
listing 930890b337519252e56ce2b72cf32c17eefa1f57d39866af018ae34da5764511 has been deleted
listing f4e41927ac05269751859baab5ec607f80ffd2618032718a3cef1486dab5a449 has been deleted
listing 6110badfd66e672449330d3dfa1128b886dc4da6316578328ee482acc91bcba5 has been deleted
listing 7125827eff6437b0b6094843c8d806df2b3855faa7590d477ba970cf81c78e4f has been deleted
listing b1b4dc7aca16e69e2af587206e8ab68bcf012f3443ee2b702df6da8374c3a88c has been deleted
listing 8a1aadc1e5964bc98d50ed18ae30dc1dc10b68bb545ec4749d4ab2fd76f36650 has been deleted
listing b8

listing 1e3d5a1eddfaeb8f5cf7d8e1509361d07ef0132c489b5eff9fa49828ca41e9e8 has been deleted
listing 791deba6efcb4b425719574cbc7b3e132f7cf5e53f04d687d0ffb525df89816f has been deleted
listing fd01ed5872de4344a97b5c516865cdff1c3c58097d37cce7ed2a5474acaad309 has been deleted
listing 86129e517a2613b0dc196249c49c2ba5673607b97fd46f46b9f4f42adb5809a3 has been deleted
listing 103c3cca54753dfc5f9696f35ead6a94dfe71c1b32315ef95448592d81e735ea has been deleted
listing c2b2f910b98fc0ef109fa86c1a9d43b11c38dcd9a5408f362281ea73a8a0e5d2 has been deleted
listing 7281ca4355fe5d4b5015617cff14efaffcec8731f4a6dea426cccd92b6263eec has been deleted
listing d24d019b754ed31ecfa1eadbd0a3251f148439371a3c556273ae487a46f33dcd has been deleted
listing 78c8d6d5d03a33111ab52cc497873a0daefcddb93524f07ca0ef56ae9392f780 has been deleted
listing b951e656cd35634c5c7358a5ae454830fe09941f4538628f250c46a69ec70e3e has been deleted
listing dd1b32b71c6b62c6c64e4965c4cf954cb8545d1511229c9db0418615929b513d has been deleted
listing c7

listing c3ebbc8374a4a0367c33930b9febd2912bcc50ccfa81c45072ae53949ac2ad43 has been deleted
listing 95f54c7e80ec2ae6f2bdf7371b3b2dea03330e002cf921a8d19a1fdc820a4a79 has been deleted
listing 5f5d6aa9c2a056b916dfcbb535378b33b576991f97f284c3cb406700ce8e61b0 has been deleted
listing 24a300fb37daf5d4d8cb67a405c34b6400725e5155947d91802c93d6f931c226 has been deleted
listing 0da55b4c7bc2af1f9c0497dcfbfbefaf70be6823798f6b1bb7559f1119cf5bff has been deleted
listing 1c7d466e26470911970082b8ff98f8d56c73fb1dcc12b4832b09cf65c6558a6c has been deleted
listing a44960338939555bdda94f5f36b638db95f40591d5d404835184b779b148c27c has been deleted
listing c66c6f3463892225ded092809ce2eaa19f6492788739e6545849dc7175baceea has been deleted
listing cf65b41e50cf39c4968b016513bb6918bcbbe86e08a8785e018ed9642d01902f has been deleted
listing 936d12ecf2c09bd11ee243acd3cbba689506512ad1bfde5b184bf87c4a03565d has been deleted
listing bf123a3c60778b5f169520a2e98678408e2b6be892ed09928f3fab8c7a3563b7 has been deleted
listing 8a

listing a3bf59fe21c359ec092da5beaee88469f7fe834ffd81a0e2bb7fc136bbca7a12 has been deleted
listing 760d51fb3efe5fdeeb4f539e89fd7cea58ebcdc6a4bb05f90d56f24c4df49189 has been deleted
listing f3adba5ea0207671bc1c0a9cdbf3a8210fd65b5ed76455440d11fcbcdcd1a8d4 has been deleted
listing 9b64b0dfe9e24db673ca6e0dddbec55adc4dfd89c9c30294e372802bad726086 has been deleted
listing af67244eba92f2e371c8d92d9cefd561be000d68412366ca4577d29cbead3cc1 has been deleted
listing 2b32da9603eafb73243ad858b25c3b85bb73dfd831b2ac649fad54656ac9914c has been deleted
listing f12f247b75edcdca1959016ce2d2c27e2eee956098618a48f01d4a483636f7de has been deleted
listing c176912e21395ff7a12977a44d32d9f25045f922e430009a5dedbfff21196e78 has been deleted
listing 5e30c2841520e77a561b8d3a2d24189eac2d6cf7266843023eabf8f8cf0f1940 has been deleted
listing 1097e2c9a1a4a4c92bcf208477d9c45e52e624c4048dac7a150c59a593314a9e has been deleted
listing 2d592df408593b8a53830e04d4bcbe515dc85981b870f899aeaf74bd7844790b has been deleted
listing c2

listing b58bc73e5d5ee5137958a45453b2a54fd86aba0825945e467c0651ffcde6bf6a has been deleted
listing 20685a7e6b65f30d926bc83fdadf0b891f30831eefcec05845fca8b00e53ae7c has been deleted
listing a5f95b59ce094672e40caf1c851f5bb3c3604f347ff68baf5566b7687d0cd945 has been deleted
listing c79209589ef3fb8cd0e799318638ad0ea77427e15f5dc70d53b152233bf778a3 has been deleted
listing b676531875e78aee7a3539ff0e61c96f5279b1e328a39fa7394f7396fa5bc8e8 has been deleted
listing f049255a67b7a3747ed54d5603a0f7e99b8e3fae40c81235842f4d4cf2e72088 has been deleted
listing 47e25ded0f831643c898250cba4431cf5612b88418caa032a666950421011738 has been deleted
listing 43b33b6f2d6021aace479120beb137ce0b7479787125d3d19541ab21db5d1a03 has been deleted
listing 95fc7c00397e9ac7432b283eb98d8beb6bf801c9b070c2b26a171dd4bcd6e6d5 has been deleted
listing 04fe86ece7356d31709b37d485064abcc4199fde867f097eae1b5f10001a3c52 has been deleted
listing e4a9ebe1c24cafc67953791a1590c808390333941508e2199e3e7b5667719503 has been deleted
listing 2b

listing 36b32191cb23eea42ade0c2192a60df9aca4d1edb0d8aeb76ad1f5c8d636d38c has been deleted
listing 07848e891359c97ae03bbbf3d33c18e9b5a83eae01659a5778916547d341ada7 has been deleted
listing c96a213f7598f0f638fc6e4e44ee10eaa71e447c0717fe48617931ac56855244 has been deleted
listing 7181b2c419adc4a77ba7d09ded0546eae12988661ef2538deac4a757c1ca44e9 has been deleted
listing 1de44c2cec2d73af2295c483af8711a3fc9bffbb59575f24d6459c8af0da3c88 has been deleted
listing fe2c59d44a36349a949538a03f90123208dc7337224e5b311cc8b5a662b1a17c has been deleted
listing 128167b7c711c55649901af709346ad4028c1d851b0dd90fb48ad770a35b3737 has been deleted
listing 80ce16662be609abcc7db873a9aa97a546d19748a81b267fa2fe02e527100b3b has been deleted
listing 880173748149349b110132f1321aaee2c4d3593f8c5baf1c2d9d7a299c46913b has been deleted
listing dbc15717c94e95b028e3451df8d23bce969a3bf904c3b448002f31cd64ca25b9 has been deleted
listing 9b7cac5c32025c1b8930dc7612b7af14e832a90f605dcaf117bb5569695837d1 has been deleted
listing 42

listing d29d9a82c31968ebdc5c749c2c2834f44961a43468a679c23e6faf97ceafe466 has been deleted
listing 3a144529dc866e61498c71622902b7571bd88b167dc6cf4189a7e8c6afe55fb6 has been deleted
listing ff4c9260dd211d52291e5c02acb3866d0c2436166f52ca5b1651d5e2ed3e8561 has been deleted
listing 31f524bc37baa44bd1401c724a27f568f6125ecda78e356d99f4d67f19e56c0d has been deleted
listing f254ed064152583e18b1c8e889ee91707cbe9beef7c3b7238cb720e1b5872e7b has been deleted
listing b5bc9106291d1dd14df5555ece7ea5a1cd88beac479d0fcfcc589ec310245bb2 has been deleted
listing 70f59e3407458f3ce48c8c0a132820b7cb7eec68fcaefdb377bcc4d8323d62a4 has been deleted
listing 578fc72505d29009613571ead377407fbee6397640bd4abd0e9ef09a6f0a4102 has been deleted
listing 58d90f7260aaad754a99c64366cd7e4b777b6e149848b458f4fb85a513c4eccb has been deleted
listing 08d764f4320d0e37592aa600929bc98e226d4bd9b307dc72fca50237e03ac4e6 has been deleted
listing 7e7414564664523f63dbecb91d7e66301b45c66e48e67866a5d210d5eb324ae6 has been deleted
listing 8a

listing 933d442cb461233f54a0d300c9139a579fbb949b0905af4d325c67cd52647951 has been deleted
listing b4f5ffdcaef063661d18266f73a725a8e7c6abe919b4c4e7c3e599bb6915f992 has been deleted
listing 68ae322da2e379e955b213a841f59ed885395bd5327e989b9012cf6ada14b873 has been deleted
listing eb6c182226977eacb94b97c6c8df5f6d09b5a18c207f710e38b4db3370a273a6 has been deleted
listing de0d35c3d7a1d9c81cab4280cef5cdc638b1ed4555fd5827e7fe82bb5e6db671 has been deleted
listing a6fbece23f56bde7aab6aea9c7f0132c8f0512d8ad5e38163a9ef5101d661d7e has been deleted
listing 570719231df3748adb57efb61dc11ca992de1835c916041b8db62c291ebeb792 has been deleted
listing 75e229ba90afd4d35c9feb38c9053cb8c47f45fb5611d52d63e3e22a70f2a457 has been deleted
listing 9a889943620252ac50041c3df8abfa21dc3b2d0a0dd7a1d601942b27fae61c65 has been deleted
listing 48c25c720a7231e492e5e8c62b48c70abde13c48185f1aab5dbb8649b7cdf1f2 has been deleted
listing 2989b70edf9d2a72efdc1b1ba2bb056cdb6d0fddab8a090099f5a26037fd3161 has been deleted
listing 62

listing bbdf5388e25385a6629a7b68be7e2f45874f2e88369d51bda71e3414d04e7704 has been deleted
listing 1a0fac6ce125d0bb15fe0ecd2ef5207527d94a01da00382ddb5d34976bbab391 has been deleted
listing d09c04cb92317af0b38414e2b325353f866f437287cf1fe743c80de39d678d76 has been deleted
listing d7e8c92472ee1359f9593e0d5d7bc187c6526b8b76fad1eda78f4bddf9706920 has been deleted
listing 750af0e646ec5d5f32f06e4ad8ef44fbe9e911afd8b4dd8ede2ad32d3535f4eb has been deleted
listing 18a3c117d2f8648d994832089bec28b53d3c1ed31fea67137739ea5d60cb8a2b has been deleted
listing ff8842cd0d6d591e9b0b98e346867a0d679ece532dfb9cfe2a3b9cbf44122b7e has been deleted
listing 0376a69bf2918453d5b6fb07da357481901167a48e91b6701fc9b23c54e1119b has been deleted
listing 722f0d527207b90ccac63cceab377cdf7678730ba89fa0f89b22f1e216e063c3 has been deleted
listing 02de9ab5f97072fd364fbbae216b6f76d58a3ba283781bd3fc20bacb41d7aa8c has been deleted
listing 03ee22687473cd8698ad0c24c6439db6488581b631e041cc8e1b49955687402d has been deleted
listing 7f

listing 1dd8e88699df2fdec1666052d7fcbe216b1e1291a39c80c3d5f78a25c2b20659 has been deleted
listing 872ea5de1ce549dacb4f5b89b0a5bfc5a31db6c2dddb0a8a5055f55244cd8789 has been deleted
listing 6d969e1a6fd19f2770e009964c62726b17b07c2cc1b2e3a45097938cd6845c06 has been deleted
listing ad87b55ab2d8fb03ecdefd17faf25d4f38000a187ae8c770a8d83360e437ead9 has been deleted
listing 9160a4862e43061c848c9c843758b6e7f75dd90416dcdab93bb0df048f955e0a has been deleted
listing 05241b9f51aa400c533252d607eac80d51dca36918bac9e24942116397292f89 has been deleted
listing 3e7b9d46bff6aaf5ec52261aa623dcbb534280b530bd2f9fc92351593fccc036 has been deleted
listing 332044e6e45c7403605c198716a6b8505f52bc3ae9ed8f1303e6e02dd057b5ee has been deleted
listing 5f7b86e5c824e685c0fd40276511e5206c83b68c13793b7a30f3ae25157d0808 has been deleted
listing 6a7d099dcaac784dba5655a04b1dac6a8213dcd1ebe99f6b7912552eb7640dee has been deleted
listing 9d601e47c62309911eb6aa06857f6a8c3beb95a9f72df1eace0e4ac8dd05e368 has been deleted
listing a8

listing f68fe686123e1481d67c406d63fc06780335b1c13e0735d5013f9aba4edbf085 has been deleted
listing 890ee17d3cf6f92d5359627ef96b426b6f0cd57db4712c455171caf3c8b0562b has been deleted
listing 4abc3de00dec643d4f91770ab2f49b53070b239f7b5c3488aeb05b5be7e30b8f has been deleted
listing 139b8214c967c1a1e835925ccee735c911116c0e6b916027953c60ad0a01c01e has been deleted
listing f56a4eefd2afbd9fce09d06470cdbfa1fa8950fb36dbba6df11f4cf2725f21c4 has been deleted
listing 2df24f2d1f714e08b82736b3becba1b799d098a18e57381cdec1cdbf43912396 has been deleted
listing 2a5d4c57e4b557f22c11461aeab1084bccb63638863f62366a76aae5ed9c56c0 has been deleted
listing 5ba6330321be314c0c4d880c62450aae94b9b9f17de72280d0863aa2bed69650 has been deleted
listing e7da25f3149bdab65773f0bebbc78531579d57aa35f5fa5244ce3617953a4f0e has been deleted
listing 2600c5b081c762b7f5a99d5069129dae3b816e50e471b8ae2f7fbd13eed84d8c has been deleted
listing 98cfdd8ab06a8f6d0eed3f00916efad5b70ce8705f34f968f2f6a71d880e1c0a has been deleted
listing 6d

listing 41bd002c8c9fba2a17c7303d84427d111a3f4d5fd71ae17052b73047ac062d74 has been deleted
listing 93dfb0ca5f1c755a35bee28901a4d529fc6f616de4e38df0d09ec9b6bdfe1335 has been deleted
listing 71e0f561d18045f54b95058a98a73a518265382e89f61a1fb25d1bf3cfdc4356 has been deleted
listing 6a45057acc2ee86c2378d0d7cef3e3cc7bcf67ce9ee4460c4c062f5a00e1031a has been deleted
listing c43c269d639d74b72e7413891d29e310f227b2144030324aa89403200275195a has been deleted
listing 7f7b595c4a2dd6d6ec2a2eb8c6187ac4d80c76764cec00f6f0075def3717a49a has been deleted
listing 1a58ea2756d223017e8af0126bb7518cce0b6bddb9a567e42e0705aee615c24c has been deleted
listing ac4e5ce7f7254d8130431bb3133c15d9d14590d33136fdbbfc206cbe7c109a59 has been deleted
listing a9c20410837bace24c19e81ed191c26b05302f11938f4d89f03be88b8f813fec has been deleted
listing 668e27207ad3bb82d2d0309e61785331b7c2585f7e914a55c068752adc50b471 has been deleted
listing a088d695461aada360a8cdffd6b58f26b56726fec7e275a19fbbc6c0df831ecd has been deleted
listing d3

listing 270cd4bc3add8d0f95f81fc3745c4d122367d5a4ce036a1dbdc972d6585026f1 has been deleted
listing 835c42d9c6ff30d7ffad80314e0de3f65dfe1e746d19ac0c8e2c822aff5295f2 has been deleted
listing 5bf5affbf7db995a04044c44261753421fa78d9d3f8970fde0594cd3c0a982d2 has been deleted
listing 5f8b3d162c181e47f18707d5dd59d4e2324d45760cde4ebcf38ddd11e1f1458d has been deleted
listing d9c5d321b98640f462beb06a220454668827b41bea211d07875b9117f69aac6c has been deleted
listing 9cb1edd39c806ed9cb7da0d6d36435632bcc0c93039ea391310df3d57a7f435e has been deleted
listing 7fcc5225e2bb1305b283bb570b03dc3c443e39dff28f1f9b076cb2ee9c11e501 has been deleted
listing 15d1ed0e53ab3580a389bde834e1b8edc025d617cc949e5b4839edde9d470c15 has been deleted
listing 4d9df4c26cef4f47617b64a30955d9638da0c8ac5854228306b1850c0d8a714c has been deleted
listing 1c544dfacd11a1c20c6f6ca91a2c79f2e946e1e2a573e755df5bdc69ca29de3a has been deleted
listing e22a723a0a2296a310ad43573d03f1c2d81edbde5bccc6d4000b858d52d8a374 has been deleted
listing 62

listing 4103330f7a6a6b3fb50254943d3135d7f2202e07c3ed0628b2f017090202143e has been deleted
listing 79a42997a0f222e3014767570871a80eef1881b04dcc5fb666433f20f35f82d2 has been deleted
listing 1e85f7b3c1b0f06feb95ceae2f1b15961ea76d0d5f88244af7340bd5eb4a9b4a has been deleted
listing b8a23bd6de76251e4378250dc5f9e32f9edd046102875a37af960f778659cb64 has been deleted
listing b11018b8532746da6437101c460a923c090a1fb018f7320492df5a54686651c6 has been deleted
listing b689765f00df2c209011599155d02da774b9238da0349036e76445ffc475e423 has been deleted
listing e4b9dac3be72c90e37d4bc392ced128d4f8a70fe63f50dac399a679927f3776d has been deleted
listing 17562d2978debf358c5529ce589db0c7be58bb2118761ae65fd791a49fd5926e has been deleted
listing 72861608a5b6780af7bda24883efc2da4ca1eb6ef12f30525275d68c0cd35576 has been deleted
listing b5b6a8783965a8759e02d7c4f25a99cde3cad781e48992ee0e430cbaafc8559c has been deleted
listing c59d7982752bcb649762262c734b2ea4c354834a9d905790d2f97ea5ad6eb515 has been deleted
listing 59

listing 890594a4467ae890f91056234e827f662ef8039b6b01a99e67a18570f36dfcc9 has been deleted
listing 3ab0694677670b8f1cd2d7ab32edf58a120b3fc8f5333c04d474b3774b0bc4b0 has been deleted
listing 31a1f0b0afbad35048201475de3af2c8feccda28bdfeeedd395d8dff8632613b has been deleted
listing 0ff274886923300bbb33d7bdae24b4a0f65d192143995d3d22da1bf6490b24f8 has been deleted
listing 73584586c52d1b8d778f2cc4b56d8ac51c079c1acef947c46cea39942a885627 has been deleted
listing 2cfddc4d75ba853fcd54e8ef32c4502b6a5aeb7a89f3a9cae9f9ab0ae15190d0 has been deleted
listing 9b6720e33bcf8940cdfe34d5ab2524cbf2d93ba166f82281277ff539bf0c9ab0 has been deleted
listing 4446f5ee85f1995e3c5620e4e6dfe68c3036b18c2b7c4ae16c693e9e462e15fb has been deleted
listing b5fdf61c6f8f97372e8f85a11c673430967ebaf8c52a2b33693f7992d67201e3 has been deleted
listing be9c408c5e1f8cf57f5d157f301d6aa4e8b73e7561954859af5fcdce4174821b has been deleted
listing 76797e5df6ac8db35b1283280c65ad4b365d328bcf7d33f5853c16933f2181d7 has been deleted
listing b4

listing bd7be56c3b6142c0a3875c59e1b91d7d8b95609ae18b733704581f1fbe699bf9 has been deleted
listing eea7265814a0e1cf2cdc510a91bfa75222e440046878fe10f1bf031af7986def has been deleted
listing 985139523c3dfaf66e75e0bf94457b8957d898f8a3ca4cde213d77416d22db6f has been deleted
listing f244f48be5137b1c13bf5ca3355f8872174bfb965fe865796744a7f67dec3160 has been deleted
listing b0948530e126da7ca0a03c5925b7075902b0afb44e9bfb2e15967cf79168e977 has been deleted
listing 17c43c7114400f3cc314bf10c10eadf203909b80de63aa202d89969697cbd764 has been deleted
listing 1de994658e0b861b5d24cc9181e06af26264b0585b74aed35c560f0fed63f58c has been deleted
listing 642933e57bdfca60759e0c46bab19947b279cc98f4980eb259fd77a4c4105a9d has been deleted
listing cfffc80b2ddcb7f419dd37467964813696ecdfb100f69842c6761a4b9f96696e has been deleted
listing 09d5b6608295566e158c35c2368042502328488cf4f508bed80c880c211a3470 has been deleted
listing 53129fbe62032a5a40a40078efc579cb4115ace77cc958f1161d993389b30061 has been deleted
listing b9

listing f6ca9b909db26295670a68c0bc1684bd1a8e66585035f29553ad471e24266a04 has been deleted
listing 28e7f587914b572442fe1f3d097841a7a63c5c0b968d0a5b4ff8fec8ad350475 has been deleted
listing d3618e0672fa116fb55df200b182bc04001f2a5d78abbd8672dc9bfef875272e has been deleted
listing 5310947772c00653197509062bf175d25929a212e610690c0320fd99f1cdf200 has been deleted
listing 6f47dadff2ea2840a09a8ea22811aa5fe5959ed51f546123e15d58f59d1dc8cf has been deleted
listing e9a71178c045124331b1cbcda83b71140c9a2c4be598e3f807238a241bd75a61 has been deleted
listing 666a6861f8db17030ea751e7a5e2e5238ce47317380ad717f2545899d17ed42f has been deleted
listing 741d0d2411fd93cb893681a913c2dc12c961b8b43490664682455077f05da0f1 has been deleted
listing d72a180855b7c86ede7f81a83e66473ff318002a2cc77ac3c71a5734f3e40f3f has been deleted
listing dd86c1369ac8ce5231346c26676aacbc12dc4f5f176c3568b21a9b7eb4212877 has been deleted
listing 82005e819ff7cd9d27bad1af3961dcae2625162caf24d7ec25bd459e9bdb9024 has been deleted
listing b9

listing 7e8152e6cf218e7034fc38a79d41b8e218e3aee3fd45a52d2eb06e123bfd4933 has been deleted
listing ad555a417342e05c5d7430e8771e337070731c94987827008a85cc70c527251d has been deleted
listing 7e064ee44443c17a1b00b12fd8b4d4d3576c88287dd6cfa56dd3c25613b74902 has been deleted
listing 225b28a925ba7c80b9282dfaaeb079f765378fa7065aa7876b2c093c86758043 has been deleted
listing b9da8176e5d3f9e2ddf0a015074b044f860bc51f4c4594737ba197a9b9b0168a has been deleted
listing 68b4307de50be07b4b68d13b20da8f14aee98a770d23fd49bd676fcf9a24a66f has been deleted
listing 8371b249186f0940d71260f44bafa95ced8819cf4639b15af2f49c944d6f8168 has been deleted
listing c97c82d95cfbc93579f47b46ae7751c65a920b6fcd84c322cf68935f53d60f8d has been deleted
listing 507362bd23f99d3894d19274fba99807a4e9bcf16538171141be8a3032d1e4bc has been deleted
listing e7c0467f98ff14e622ee6d886a0d9f8fe679d1f77fb6174fc38e61c2ca88a887 has been deleted
listing a3f9938b8dbb41e426ab80b2946ecd20d57b63b88673010d6592d82926064d68 has been deleted
listing 69

listing 22511ccc8a11deeb9b94ae5625a312f5c835caef037fa93bf658d59ff63fcc51 has been deleted
listing f949437732936d2643c016b50ab844aadb3d8b805508eaee58c8f3b113b50125 has been deleted
listing e74a77ac479eb9f24eab754c2880bf600bacc3204225665ca58a4d60a47ade48 has been deleted
listing c6407cb0ba1611c2fd182af1392abddc808d0df86ad851a4901202e1bb5abff4 has been deleted
listing 59c066cb1a4e55f08b5b869ee08234ecdcc28343c50d2d3baa53a877b9f68ebf has been deleted
listing 69008d54f5c92056381bfc24af579913c5798ad941ce4bdb23b287eecd30c4dc has been deleted
listing 5e7328de5c3f97b55c883d69bba8ebcc1f252a5c38ec638c8e83ec650ba43ae3 has been deleted
listing 9ef05a1984b1184a7fce5bffdbf956decf50cd84bbbd79d0abbafc8428c6ddb3 has been deleted
listing 3607fcf5d2aba28f8abfcd4f2ec5cf8b5e406d881d08185d02ebf8311dbb03ea has been deleted
listing 4990eca4c18b373f3839c572989301d474dee4983d4ec67d0adf3fcdfccd7749 has been deleted
listing dc2ab5ebd43a3a36ff0beb43bf6f29c638833038a61e52bc0a127f7a37940c45 has been deleted
listing e7

listing e747c6ea446599b9db184002d47239c5c8d3e79035bbe6652362962e26ae7295 has been deleted
listing 0b60f292d1872c16ef93d612c46a80af72989ed3ec1f113b3d67df3df213c4ef has been deleted
listing 5843eef3aecda27fa45aa695c96cf057d295f429ba7fbfce5ea4b63641851db7 has been deleted
listing c39de4abc135786e983ab7598584925497ac4dc0bc644981c911fe8d99f1d630 has been deleted
listing e1ab40ed76458018182c72fc79c16df91ef2ce79d63321a7dff71147e4fcb291 has been deleted
listing ad066a51869537e3ad82b57f4101e16a7bf8190433dcd6946bd30c055f57ec22 has been deleted
listing 5492b046c6937fc437031916488a6614748a1be88cd2c58e9a85d5a4f973e8f9 has been deleted
listing 40a29d0d0bddd3d997c4c493cbd3295024ef62c8c4bb3f077069582517367984 has been deleted
listing 80a83d5789b39dbef539923977a6fd2b9f02a53ad2e2723a04a2c44184920503 has been deleted
listing ad246bf1e93e6f8dd688bf7d5709cd1fa3cffdc276e7f0dd2ac514d450aa9bf4 has been deleted
listing 26e114a095086ade001ed2372af055b1c4bcb73009caa3b16ce0ec73e1264b8f has been deleted
listing e6

listing 512122fcfc0b204399d5c3316d3167d47ce443d8113da51ea922ea10aaff6706 has been deleted
listing 025418cd74dac66a9f77dceb9e7bcc56af6fcb1a55b591178704c1f34c555763 has been deleted
listing 8aca38b2539fddbc5099a01211646323ab5e458174e07be206ed6aa807682db8 has been deleted
listing 1214343c4b72ad25dc103858080bd2e09ed33db9ad8c6f9bf0b421e8a4422329 has been deleted
listing b5c6bdc00c0a4680743b177be29ae1896507c4e08d2335855f4adb9be3cdbe47 has been deleted
listing e1ca4f9272d6e72430ac6d107de5c34149648a8c4b821db497a4e6c739e259cd has been deleted
listing 842f1ab4ea62e02b8b12634b22b6c8ac8699e294b0d0a2d3304eaeb5d741b1a6 has been deleted
listing 5a954cd2154896713d33cb9d8f357ac62aee9607d0c21d6044ac87959014f1e2 has been deleted
listing 0f2cc33db0dcd64deae2ae4f63ef0fca060e0d84051f900489129612dcd84724 has been deleted
listing 5c5a8a01fb82d38b538851724de172ceb61b121c7922dba752a329b9756bd594 has been deleted
listing e8e2788092ab81fe0b791f6cbc58567eb1afaa5be032475d96b5b17d513d87a6 has been deleted
listing 83

listing aba831319080c1d3824706c36587a075f989b296a8855be9f6a87d0c0e138515 has been deleted
listing 670a5bcb510cc708ad4557ab221b67a53c0ca6b4a73a16aa3666509e07abeaa0 has been deleted
listing d389328179bd3c5b7cd6a4d52dcf3f0b244343fdc7a7c0b742697c9b16043d63 has been deleted
listing bea60d38e74cc096f53b28a2d3a62e00bff00c438d205057c41a3f7ddf5b34b4 has been deleted
listing 7d845afb0f593daeeac2630009bc937f8c1af218a57f38a22397a8f8df04a67b has been deleted
listing 0e92dd5a91edbc415066d7b648b575913936d2eae9ecc3cda2645653e33c5dab has been deleted
listing 37ccae84638d48b016b02bd3c6ceb217cf3887f6f6cdfb3db95d0d17aa42ed55 has been deleted
listing 3da3969bc85a8acce5ffa591d06a28c031e55fedb741f28f5dc05cadca75f55e has been deleted
listing 2ebde0cce7db70a90e6978da94ae4054503da8aa2804fbfcfc3927c5ea89312d has been deleted
listing 76691677773a0db882b80ca8dd879e7aae26637892e7666bafd95427f9517447 has been deleted
listing 410ef240a74c0fde3095b765043939819041e1a2a8b71fefbe0ef43216783328 has been deleted
listing 95

listing 3a02945ce86bd2f0f38583aaf742fd82fa309587a2d05ec10ce22971c392484d has been deleted
listing 9dda20949d6e4f89d113bb03c26da05b16ee99f54b8772efde53eb69fb22e9e6 has been deleted
listing 8764271559c05b76b1eac4ebc216b7855392331142746df0b1164c7ea3d92ebc has been deleted
listing 729ed0928476f0d2d6e9a7856b6a996e7a19c91365a4cd496f38cf401db9c2e5 has been deleted
listing de11741065ddedec0cce4d217913af49880a4391f4dc4e13fb210f3bb8b52318 has been deleted
listing 04ea2f3ebb268e5f094b07bb637fa67d7c076107ccae9a1e214367c8e3eb65e0 has been deleted
listing ce0bc8ac33b0c78b07482ab63ee61bc1d91eb051708c127d6f22cea5c37a9511 has been deleted
listing c04462c452fabd07f4da105209a26876367675ccd68e5e51804f511745990921 has been deleted
listing 7607fc5752c8efa2dfd8c9cf60e1b00b6bda54ab22f8cfb69b93ed587035cec7 has been deleted
listing bbf1106993f3edd8a50dd85cfd4869c8440f693d4e02d7eff879d44960961d83 has been deleted
listing 783982260425b63429c6840b6aa1b9e1b3cf382428df42ed2363a388d7fda926 has been deleted
listing 5e

listing 90752ae083c31e4bf1a1a5ee47ee9b73c817c7bad8321e2e32952807fb762488 has been deleted
listing 7f5860fe676cd1d9603cb4efa3f7f2527e9d79912f11f3e2db2214618fed5178 has been deleted
listing 3a0f2646afee93003c24da7479bf46616ccfe844a3477b7f4fefc5b175b5d5ed has been deleted
listing d87e792ba5b702e53501fc1ebc90c9c837434d22997432a5a7cc9c178aa0769a has been deleted
listing c99dbb19686ccd9927b60c2e553d1c463b88ca29984dab49847848e50865eebf has been deleted
listing a643a679b24ed8b3c380d362617326ca568cd424da5d8baac0867136f179cf7f has been deleted
listing cdb90b387f978ce64346bfe0776cb46722d7af616def9bddb596e4d553148772 has been deleted
listing 3478081343f8ceabd20acd14c84be74cf5d219a8a158c314b221fdaf01bc8709 has been deleted
listing d03f20118224630f5915e73a4e14515adf31f4a6e50334aa64f1db66896c1d6f has been deleted
listing 8c97d99cd8f228662aec39c9d128c9ae81527873e6e0281073137e5fc3001dbb has been deleted
listing e63d4351ead7b48167a45c416797af039baac03df2df09f0b9bad10b1c936ced has been deleted
listing a7

In [4]:
new_listing_df.set_index('listing_id', inplace = True)
new_listing_df.head()

Unnamed: 0_level_0,listing_title,listing_nh,listing_city,listing_date,listing_price,listing_bedrooms,listing_bathrooms,listing_sqft,listing_address,listing_info,listing_body,listing_url,listing_first_image
listing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
7544341056,Avail of the limited offer - 2 MONTHS FREE!,south san francisco,South San Francisco,2022-10-11T09:22:06-0700,3870,1,1,692.0,988 El Camino Real,EV charging air conditioning cats are OK - ...,Call Now - show contact info x 116OR Text 116 ...,https://sfbay.craigslist.org/pen/apa/d/south-s...,https://images.craigslist.org/00505_klHelxMNBn...
7533119488,Welcome to the Central Park neighborhood of Sa...,san mateo,San Mateo,2022-09-12T15:52:15-0700,2900,2,1,750.0,612 s el camino real,application fee details: $41.99 application f...,Welcome to the Central Park neighborhood of Sa...,https://sfbay.craigslist.org/pen/apa/d/san-mat...,https://images.craigslist.org/00707_jLLpHEtFm9...
7546896831,💡 Beautiful Location! 2BED/BATH available NOW!,palo alto,Palo Alto,2022-10-17T18:01:59-0700,3450,2,2,1000.0,3375 Alma Street,open house dates tuesday 2022-10-18 thur...,To schedule a tour We now book our tour appoin...,https://sfbay.craigslist.org/pen/apa/d/palo-al...,https://images.craigslist.org/00h0h_wi7SjXA3VO...
7542991210,CALL NOW 1 MONTH FREE - 2 BEDROOM APARTMENT,redwood city,Redwood City,2022-10-07T15:23:41-0700,2995,2,2,900.0,1887 woodside rd,application fee details: $35 application fee ...,FEATURES Bedrooms: 2 Bathrooms: 2 Located on F...,https://sfbay.craigslist.org/pen/apa/d/redwood...,https://images.craigslist.org/00s0s_6YYC7esZDK...
7548286030,"Great location! Minutes to shopping, Bart, Cal...",burlingame,Burlingame,2022-10-21T09:12:32-0700,2495,1,1,,1830 Sequoia Ave near Murchinson Drive,cats are OK - purrr dogs are OK - wooof apa...,Welcome Home to Classic Peninsula Living at Bu...,https://sfbay.craigslist.org/pen/apa/d/millbra...,https://images.craigslist.org/00202_6KfgOHGKET...


In [5]:
new_listing_df.shape

(299576, 13)

In [6]:
# Concatenate with existing df along rows
dir_folder = '/Users/pandabear/springboard/CapstoneTwoProject/data/interim/'
existing_df = pd.read_csv(dir_folder + 'listing_df_parsed.csv',index_col='listing_id')

combined_df = pd.concat([existing_df, new_listing_df], axis=0)

In [7]:
existing_df.shape

(2207887, 13)

In [8]:
combined_df.shape

(2507463, 13)

In [9]:
combined_df.to_csv(r'/Users/pandabear/springboard/CapstoneTwoProject/data/interim/listing_df_parsed.csv', header=True)
print('Saved to csv file')

Saved to csv file
