In [None]:
import requests
import os
import gzip
import shutil
import pandas as pd


url = ''

def download_file(url):
    local_filename = url.split('/')[-1]
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

def decompress_file(file):
    with gzip.open(file, 'rb') as f_in:
        with open(file.replace('.gz', ''), 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

def delete_file(file):
    os.remove(file)

def read_file(file):
    return pd.read_csv(file)


file = download_file(url)
decompress_file(file)
data = read_file(file.replace('.gz', ''))
delete_file(file.replace('.gz', ''))
print(data)

         listing_id        date available    price  adjusted_price  \
0           8635835  2024-09-05         f  $250.00             NaN   
1           8635835  2024-09-06         f  $250.00             NaN   
2           8635835  2024-09-07         f  $250.00             NaN   
3           8635835  2024-09-08         f  $250.00             NaN   
4           8635835  2024-09-09         f  $250.00             NaN   
...             ...         ...       ...      ...             ...   
2273945    11104939  2025-08-31         t  $160.00             NaN   
2273946    11104939  2025-09-01         t  $160.00             NaN   
2273947    11104939  2025-09-02         t  $160.00             NaN   
2273948    11104939  2025-09-03         t  $160.00             NaN   
2273949    11104939  2025-09-04         t  $160.00             NaN   

         minimum_nights  maximum_nights  
0                     5            1125  
1                     5            1125  
2                     5          

In [2]:
data.to_csv('calendar.csv', index=False)

In [3]:
data.head()

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,8635835,2024-09-05,f,$250.00,,5,1125
1,8635835,2024-09-06,f,$250.00,,5,1125
2,8635835,2024-09-07,f,$250.00,,5,1125
3,8635835,2024-09-08,f,$250.00,,5,1125
4,8635835,2024-09-09,f,$250.00,,5,1125


In [5]:
data.duplicated().sum() 

np.int64(0)

In [6]:
data.columns

Index(['listing_id', 'date', 'available', 'price', 'adjusted_price',
       'minimum_nights', 'maximum_nights'],
      dtype='object')

### Descripción de las Columnas

| Columna          | Descripción                                                                 |
|------------------|-----------------------------------------------------------------------------|
| **listing_id**   | Identificador único de la propiedad en la base de datos de Airbnb.          |
| **date**         | Fecha específica a la que se refieren los datos de disponibilidad y precio. |
| **available**    | Indica si la propiedad está disponible para alquilar en la fecha especificada. (t: disponible, f: no disponible) |
| **price**        | Precio de la propiedad para la fecha especificada.                          |
| **adjusted_price** | Precio ajustado de la propiedad, considerando descuentos o aumentos.      |
| **minimum_nights** | Número mínimo de noches requerido para alquilar la propiedad.             |
| **maximum_nights** | Número máximo de noches permitido para alquilar la propiedad.             |


In [7]:
data.dtypes

listing_id          int64
date               object
available          object
price              object
adjusted_price    float64
minimum_nights      int64
maximum_nights      int64
dtype: object

In [9]:
data.drop(['adjusted_price'], axis=1, inplace=True) 

In [14]:
data.head()

Unnamed: 0,listing_id,date,available,price,minimum_nights,maximum_nights
0,8635835,2024-09-05,f,$250.00,5,1125
1,8635835,2024-09-06,f,$250.00,5,1125
2,8635835,2024-09-07,f,$250.00,5,1125
3,8635835,2024-09-08,f,$250.00,5,1125
4,8635835,2024-09-09,f,$250.00,5,1125


In [18]:
data.isnull().sum() / data.shape[0]*100

listing_id        0.0
date              0.0
available         0.0
price             0.0
minimum_nights    0.0
maximum_nights    0.0
dtype: float64

In [20]:
data['available'] = data['available'].map({'t': True, 'f': False})
data.dtypes

listing_id         int64
date              object
available           bool
price             object
minimum_nights     int64
maximum_nights     int64
dtype: object

In [22]:
data.head()

Unnamed: 0,listing_id,date,available,price,minimum_nights,maximum_nights
0,8635835,2024-09-05,False,$250.00,5,1125
1,8635835,2024-09-06,False,$250.00,5,1125
2,8635835,2024-09-07,False,$250.00,5,1125
3,8635835,2024-09-08,False,$250.00,5,1125
4,8635835,2024-09-09,False,$250.00,5,1125


In [23]:
data.to_csv('calendar.csv', index=False)