In [1]:
import pandas as pd
import requests
import time
import json
from tqdm import tqdm

df = pd.read_csv('UnivCity.csv')

In [None]:
auth_token = ''

In [45]:
df['CITY'] = 'Philadelphia'
df['DISTRICT'] = 'University City'
df['REGION'] = 'West Philadelphia'

df['preciselyID'] = None
df['livingSquareFootage'] = None
df['saleAmount'] = None

access_token = 'Bearer ' +  auth_token

headers = {
  'cache-disabled': 'true',
  'Content-Type': 'application/json',
  'Authorization': access_token
}

url = "https://api.cloud.precisely.com/data-graph/graphql"

def fetch_property_data(row):
    address = f"{row['NUMBER']} {row['STREET']}, {row['CITY']} PA".strip()
    payload = f"""
    {{
      "query": "query propertyAttributesByAddress {{\\n  getByAddress(address: \\"{address}\\") {{\\n    propertyAttributes {{\\n      data {{\\n        preciselyID\\n        livingSquareFootage\\n        saleAmount\\n      }}\\n    }}\\n  }}\\n}}",
      "variables": {{}}
    }}
    """
    response = requests.request("POST", url, headers=headers, data=payload)
    if response.status_code == 200:
        try:
            result = response.json()
            data = result['data']['getByAddress']['propertyAttributes']['data'][0]
            return data['preciselyID'], data.get('livingSquareFootage'), data.get('saleAmount')
        except (KeyError, IndexError, TypeError):
            return None, None, None
    else:
        return None, None, None

def update_dataframe_with_api_data(df):
    batch_size = 200
    total_entries = len(df)
    with tqdm(total=total_entries, desc="Processing", unit="row") as pbar:
        for i in range(0, total_entries, batch_size):
            batch = df[i:i + batch_size]
            for idx, row in batch.iterrows():
                preciselyID, livingSquareFootage, saleAmount = fetch_property_data(row)
                df.at[idx, 'preciselyID'] = preciselyID
                df.at[idx, 'livingSquareFootage'] = livingSquareFootage
                df.at[idx, 'saleAmount'] = saleAmount
                pbar.update(1)
            time.sleep(1)
    
    return df

df = update_dataframe_with_api_data(df)

Processing: 100%|██████████| 6947/6947 [24:53<00:00,  4.65row/s] 


In [48]:
def fetch_building_data(row):
    address = f"{row['NUMBER']} {row['STREET']}, {row['CITY']} PA".strip()
    payload = f"""
    {{
      "query": "query getBuildingByAddress {{\\n  getByAddress(address: \\"{address}\\") {{\\n    buildings {{\\n      data {{\\n        buildingID\\n        buildingType {{\\n          description\\n        }}\\n        elevation\\n        maximumElevation\\n        minimumElevation\\n        buildingArea\\n      }}\\n    }}\\n  }}\\n}}",
      "variables": {{}}
    }}
    """
    response = requests.request("POST", url, headers=headers, data=payload)
    if response.status_code == 200:
        try:
            result = response.json()
            data = result['data']['getByAddress']['buildings']['data'][0]
            return (data.get('buildingID'), 
                    data['buildingType'].get('description'), 
                    data.get('elevation'), 
                    data.get('maximumElevation'), 
                    data.get('minimumElevation'), 
                    data.get('buildingArea'))
        except (KeyError, IndexError, TypeError):
            return None, None, None, None, None, None
    else:
        return None, None, None, None, None, None

def update_dataframe_with_building_data(df):
    batch_size = 200
    total_entries = len(df)
    with tqdm(total=total_entries, desc="Processing Buildings", unit="row") as pbar:
        for i in range(0, total_entries, batch_size):
            batch = df[i:i + batch_size]
            for idx, row in batch.iterrows():
                (buildingID, buildingType, elevation, maximumElevation, 
                 minimumElevation, buildingArea) = fetch_building_data(row)
                df.at[idx, 'buildingID'] = buildingID
                df.at[idx, 'buildingType'] = buildingType
                df.at[idx, 'elevation'] = elevation
                df.at[idx, 'maximumElevation'] = maximumElevation
                df.at[idx, 'minimumElevation'] = minimumElevation
                df.at[idx, 'buildingArea'] = buildingArea
                pbar.update(1)
            time.sleep(1)
    
    return df

# Initialize new columns in the dataframe
df['buildingID'] = None
df['buildingType'] = None
df['elevation'] = None
df['maximumElevation'] = None
df['minimumElevation'] = None
df['buildingArea'] = None

# Update the dataframe with building data
df = update_dataframe_with_building_data(df)

Processing Buildings: 100%|██████████| 6947/6947 [26:25<00:00,  4.38row/s]


In [2]:
df1 = pd.read_csv('UnivCity_first5k.csv')
df2 = pd.read_csv('UnivCity_last7k.csv')

df = pd.concat([df1, df2], axis=0, ignore_index=True)
df.shape

(11947, 20)

In [3]:
df.to_csv('final_UnivCity.csv', index = False)