In [4]:
import pandas as pd
df = pd.read_csv('AB_NYC_2019.csv')
print(df.head())

     id                                              name  host_id  \
0  2539                Clean & quiet apt home by the park     2787   
1  2595                             Skylit Midtown Castle     2845   
2  3647               THE VILLAGE OF HARLEM....NEW YORK !     4632   
3  3831                   Cozy Entire Floor of Brownstone     4869   
4  5022  Entire Apt: Spacious Studio/Loft by central park     7192   

     host_name neighbourhood_group neighbourhood  latitude  longitude  \
0         John            Brooklyn    Kensington  40.64749  -73.97237   
1     Jennifer           Manhattan       Midtown  40.75362  -73.98377   
2    Elisabeth           Manhattan        Harlem  40.80902  -73.94190   
3  LisaRoxanne            Brooklyn  Clinton Hill  40.68514  -73.95976   
4        Laura           Manhattan   East Harlem  40.79851  -73.94399   

         room_type  price  minimum_nights  number_of_reviews last_review  \
0     Private room    149               1                  9  20

In [5]:
# Task 2: Preprocess the data and handle missing values or inconsistencies

# Check for missing values
missing_values = df.isnull().sum()
print("Missing Values:")
print(missing_values)

# Handling missing values: Drop rows with missing values in 'latitude' and 'longitude' columns
columns_with_missing_values = ['latitude', 'longitude']
df.dropna(subset=columns_with_missing_values, inplace=True)

# Handling inconsistencies : Checking for negative 'price' or 'minimum_nights' values
inconsistent_price = df[df['price'] < 0]
inconsistent_min_nights = df[df['minimum_nights'] < 0]

# Display inconsistent rows
if not inconsistent_price.empty:
    print("\nRows with Negative Price:")
    print(inconsistent_price)
if not inconsistent_min_nights.empty:
    print("\nRows with Negative Minimum Nights:")
    print(inconsistent_min_nights)

# Handle negative 'price' and 'minimum_nights'
# For example, set negative values to zero or drop rows depending on your use case
df['price'] = df['price'].apply(lambda x: max(x, 0))
df['minimum_nights'] = df['minimum_nights'].apply(lambda x: max(x, 0))

# Verify the changes after handling missing values and inconsistencies
print("\nAfter Handling Missing Values and Inconsistencies:")
print(df.info())

Missing Values:
id                                    0
name                                 16
host_id                               0
host_name                            21
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
last_review                       10052
reviews_per_month                 10052
calculated_host_listings_count        0
availability_365                      0
dtype: int64

After Handling Missing Values and Inconsistencies:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48895 entries, 0 to 48894
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              48895 non-n

In [6]:
import folium

# Step 3: Create a base map using Folium to represent the geographical area of New York City.

# Coordinates for the center of New York City
nyc_coordinates = [40.7128, -74.0060]

# Create a map object with an initial zoom level of 11
nyc_map = folium.Map(location=nyc_coordinates, zoom_start=11)

# Display the base map
nyc_map


In [11]:
import folium

# Step 3: Create a base map using Folium to represent the geographical area of New York City.
nyc_coordinates = [40.7128, -74.0060]
nyc_map = folium.Map(location=nyc_coordinates, zoom_start=11)

# Step 4: Extract the latitude and longitude coordinates from the dataset and plot them as markers on the map.
for index, row in df.iterrows():
    listing_coordinates = [row['latitude'], row['longitude']]

    # Customize marker icon based on listing type
    icon = folium.Icon(color='green' if row['room_type'] == 'Private room' else 'blue')

    # Customize marker popup based on listing attributes
    popup_text = f"Name: {row['name']}<br>Price: ${row['price']}<br>Room Type: {row['room_type']}<br>Availability: {row['availability_365']} days"

    # Add marker with popup to the map
    marker = folium.Marker(location=listing_coordinates, popup=folium.Popup(popup_text, max_width=200), icon=icon)

    # Add tooltip to the marker (displays listing name when hovering over the marker)
    tooltip_text = row['name']
    tooltip = folium.Tooltip(tooltip_text)
    marker.add_child(tooltip)

    marker.add_to(nyc_map)


# Step 9: Customize the style and layout of the map
# Add a legend to the map
legend_html = '''
     <div style="position: fixed; bottom: 50px; left: 50px; z-index:9999; font-size:14px; background-color: white; padding: 10px; border: 2px solid grey; border-radius: 5px;">
         <i class="fa fa-circle" style="color: green;"></i> Private room &nbsp;&nbsp;
         <i class="fa fa-circle" style="color: blue;"></i> Entire home/apt
     </div>
     '''
nyc_map.get_root().html.add_child(folium.Element(legend_html))

# Set the map background color and tiles
nyc_map.get_root().html.add_child(folium.Element('<style> .leaflet-container { background-color: #f0f0f0; } </style>'))
folium.TileLayer('cartodbpositron').add_to(nyc_map)


# Display the map with the customized markers, tooltips, popups, and additional layers
nyc_map
