### LISTING AND RENT PRICES

In [1]:
import pandas as pd
import os

Data about listing and rent prices in the United States is extracted from the following page:<br> https://www.zillow.com/

### We extract the data from our S3 bucket

In [8]:
from private.s3_aws import access_key, secret_access_key

pip install boto3 pandas s3fs

In [9]:
df = pd.read_csv(f"s3://rawdatagrupo07/City_time_series.csv",
    storage_options={
        "key": access_key,
        "secret": secret_access_key
    },
)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3762566 entries, 0 to 3762565
Data columns (total 81 columns):
 #   Column                                                         Dtype  
---  ------                                                         -----  
 0   Date                                                           object 
 1   RegionName                                                     object 
 2   InventorySeasonallyAdjusted_AllHomes                           float64
 3   InventoryRaw_AllHomes                                          float64
 4   MedianListingPricePerSqft_1Bedroom                             float64
 5   MedianListingPricePerSqft_2Bedroom                             float64
 6   MedianListingPricePerSqft_3Bedroom                             float64
 7   MedianListingPricePerSqft_4Bedroom                             float64
 8   MedianListingPricePerSqft_5BedroomOrMore                       float64
 9   MedianListingPricePerSqft_AllHomes            

In [81]:
df.Date = pd.to_datetime(df.Date)

We just keep data from after 2011.

In [82]:
df = df[df['Date']>='2011-01-01']

In [83]:
df.rename(columns = {'RegionName':'Unique_City_ID'}, inplace = True)

### Rental

In [84]:
rental_prices = df[['Unique_City_ID','Date','MedianRentalPrice_1Bedroom',
       'MedianRentalPrice_2Bedroom', 'MedianRentalPrice_3Bedroom',
       'MedianRentalPrice_4Bedroom', 'MedianRentalPrice_5BedroomOrMore',
       'MedianRentalPrice_AllHomes', 'MedianRentalPrice_CondoCoop',
       'MedianRentalPrice_DuplexTriplex','MedianRentalPrice_MultiFamilyResidence5PlusUnits',
       'MedianRentalPrice_SingleFamilyResidence', 'MedianRentalPrice_Studio']]

In [85]:
rental_prices = rental_prices.drop_duplicates(subset=['MedianRentalPrice_1Bedroom','MedianRentalPrice_2Bedroom', 'MedianRentalPrice_3Bedroom','MedianRentalPrice_4Bedroom', 'MedianRentalPrice_5BedroomOrMore','MedianRentalPrice_AllHomes', 'MedianRentalPrice_CondoCoop','MedianRentalPrice_DuplexTriplex','MedianRentalPrice_MultiFamilyResidence5PlusUnits','MedianRentalPrice_SingleFamilyResidence', 'MedianRentalPrice_Studio'])

In [86]:
rental_prices.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 60939 entries, 2381417 to 3762540
Data columns (total 13 columns):
 #   Column                                            Non-Null Count  Dtype         
---  ------                                            --------------  -----         
 0   Unique_City_ID                                    60939 non-null  object        
 1   Date                                              60939 non-null  datetime64[ns]
 2   MedianRentalPrice_1Bedroom                        16919 non-null  float64       
 3   MedianRentalPrice_2Bedroom                        32160 non-null  float64       
 4   MedianRentalPrice_3Bedroom                        32651 non-null  float64       
 5   MedianRentalPrice_4Bedroom                        9949 non-null   float64       
 6   MedianRentalPrice_5BedroomOrMore                  1108 non-null   float64       
 7   MedianRentalPrice_AllHomes                        51397 non-null  float64       
 8   MedianRentalPrice_

In [87]:
rental_prices.max()

Unique_City_ID                                        zionsvilleboonein
Date                                                2017-12-31 00:00:00
MedianRentalPrice_1Bedroom                                       3631.5
MedianRentalPrice_2Bedroom                                       4950.0
MedianRentalPrice_3Bedroom                                      44000.0
MedianRentalPrice_4Bedroom                                      15950.0
MedianRentalPrice_5BedroomOrMore                                 5890.0
MedianRentalPrice_AllHomes                                      49000.0
MedianRentalPrice_CondoCoop                                      5500.0
MedianRentalPrice_DuplexTriplex                                  4500.0
MedianRentalPrice_MultiFamilyResidence5PlusUnits                 5300.0
MedianRentalPrice_SingleFamilyResidence                         50000.0
MedianRentalPrice_Studio                                         4650.0
dtype: object

In [88]:
rental_prices.fillna(0,inplace=True)

### Listing

In [64]:
listing_prices = df[['Unique_City_ID', 'Date','MedianListingPrice_1Bedroom', 'MedianListingPrice_2Bedroom',
       'MedianListingPrice_3Bedroom', 'MedianListingPrice_4Bedroom',
       'MedianListingPrice_5BedroomOrMore', 'MedianListingPrice_AllHomes',
       'MedianListingPrice_CondoCoop', 'MedianListingPrice_DuplexTriplex',
       'MedianListingPrice_SingleFamilyResidence']]

In [65]:
listing_prices = listing_prices.drop_duplicates(subset=['MedianListingPrice_1Bedroom', 'MedianListingPrice_2Bedroom',
       'MedianListingPrice_3Bedroom', 'MedianListingPrice_4Bedroom',
       'MedianListingPrice_5BedroomOrMore', 'MedianListingPrice_AllHomes',
       'MedianListingPrice_CondoCoop', 'MedianListingPrice_DuplexTriplex',
       'MedianListingPrice_SingleFamilyResidence'])

In [66]:
listing_prices.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 293701 entries, 2381417 to 3762559
Data columns (total 11 columns):
 #   Column                                    Non-Null Count   Dtype         
---  ------                                    --------------   -----         
 0   Unique_City_ID                            293701 non-null  object        
 1   Date                                      293701 non-null  datetime64[ns]
 2   MedianListingPrice_1Bedroom               6145 non-null    float64       
 3   MedianListingPrice_2Bedroom               53919 non-null   float64       
 4   MedianListingPrice_3Bedroom               177642 non-null  float64       
 5   MedianListingPrice_4Bedroom               108841 non-null  float64       
 6   MedianListingPrice_5BedroomOrMore         27116 non-null   float64       
 7   MedianListingPrice_AllHomes               270577 non-null  float64       
 8   MedianListingPrice_CondoCoop              37228 non-null   float64       
 9   MedianLi

In [67]:
listing_prices.max()

Unique_City_ID                                zionsvilleboonein
Date                                        2017-12-31 00:00:00
MedianListingPrice_1Bedroom                            879000.0
MedianListingPrice_2Bedroom                           1757500.0
MedianListingPrice_3Bedroom                           6499999.5
MedianListingPrice_4Bedroom                           5750000.0
MedianListingPrice_5BedroomOrMore                    12950000.0
MedianListingPrice_AllHomes                           6199500.0
MedianListingPrice_CondoCoop                          4642500.0
MedianListingPrice_DuplexTriplex                      2872500.0
MedianListingPrice_SingleFamilyResidence              7375000.0
dtype: object

In [68]:
listing_prices.fillna(0,inplace=True)