# KPI

In [38]:
# import libraries 
import os
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

warnings.filterwarnings("ignore")

## Data Loading

In [39]:
PATH = 'Data'
SPRINT = 1
filename = f'data_clean_sprint_{SPRINT}.csv'

In [40]:
# define function to read file
def read_file(filename):
    
    """Input: number of sprint.
    Function reads .csv data of desired sprint. 
    Output: Dataframe or error.
    """
    
    # reading file
    try:
        df = pd.read_csv(os.path.join('..', PATH, filename), 
                         parse_dates=['insert_date', 'first_review_date', 'last_review_date'], 
                         date_format='%d/%m/%Y')
        display(df.head())
        return df
    except Exception as error:
        print("An exception occurred:", error)
        return error 

In [41]:
df = read_file(filename)

Unnamed: 0,apartment_id,name,description,host_id,neighbourhood_name,neighbourhood_district,room_type,accommodates,bathrooms,bedrooms,beds,amenities_list,price,minimum_nights,maximum_nights,has_availability,availability_30,availability_60,availability_90,availability_365,number_of_reviews,first_review_date,last_review_date,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,is_instant_bookable,reviews_per_month,country,city,insert_date
0,11964,A ROOM WITH A VIEW,Private bedroom in our attic apartment. Right ...,45553,Centro,,Private room,2,2.0,1.0,1.0,"TV,Internet,Wifi,Air conditioning,Elevator,Buz...",400.0,3,365,VERDADERO,7,20,40,130,78,2010-01-02,2017-09-05,970.0,100.0,100.0,100.0,100.0,100.0,100.0,FALSO,75.0,spain,malaga,2018-07-31
1,21853,Bright and airy room,We have a quiet and sunny room with a good vie...,83531,C�rmenes,Latina,Private room,1,1.0,1.0,1.0,"TV,Internet,Wifi,Air conditioning,Kitchen,Free...",170.0,4,40,VERDADERO,0,0,0,162,33,2014-10-10,2018-07-15,920.0,90.0,90.0,100.0,100.0,80.0,90.0,FALSO,52.0,spain,madrid,2020-01-10
2,32347,Explore Cultural Sights from a Family-Friendly...,Open French doors and step onto a plant-filled...,139939,San Vicente,Casco Antiguo,Entire home/apt,4,1.0,2.0,2.0,"TV,Internet,Wifi,Air conditioning,Wheelchair a...",990.0,2,120,VERDADERO,26,31,31,270,148,2011-01-05,2019-07-22,980.0,100.0,100.0,100.0,100.0,100.0,100.0,VERDADERO,142.0,spain,sevilla,2019-07-29
3,35379,Double 02 CasanovaRooms Barcelona,Room at a my apartment. Kitchen and 2 bathroom...,152232,l'Antiga Esquerra de l'Eixample,Eixample,Private room,2,2.0,1.0,1.0,"TV,Internet,Wifi,Kitchen,Breakfast,Elevator,Bu...",400.0,2,730,VERDADERO,9,23,49,300,292,2012-03-13,2020-01-04,940.0,100.0,90.0,100.0,100.0,100.0,90.0,VERDADERO,306.0,spain,barcelona,2020-01-10
4,35801,Can Torras Farmhouse Studio Suite,Lay in bed & watch sunlight change the mood of...,153805,Quart,,Private room,5,1.0,2.0,5.0,"Wifi,Pool,Free parking on premises,Breakfast,P...",900.0,1,180,VERDADERO,0,19,49,312,36,2011-07-08,2018-08-08,970.0,100.0,100.0,100.0,100.0,100.0,100.0,FALSO,39.0,spain,girona,2019-02-19


In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6733 entries, 0 to 6732
Data columns (total 35 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   apartment_id                 6733 non-null   int64  
 1   name                         6730 non-null   object 
 2   description                  6706 non-null   object 
 3   host_id                      6733 non-null   int64  
 4   neighbourhood_name           6733 non-null   object 
 5   neighbourhood_district       4075 non-null   object 
 6   room_type                    6733 non-null   object 
 7   accommodates                 6733 non-null   int64  
 8   bathrooms                    6702 non-null   float64
 9   bedrooms                     6704 non-null   float64
 10  beds                         6730 non-null   float64
 11  amenities_list               6717 non-null   object 
 12  price                        6612 non-null   float64
 13  minimum_nights    

## KPI occupancy rates
### 1. Occupancy Rate (Monthly) 

Occupancy Rate = (Number of Occupied Days / Number of Days) × 100

In [43]:
# occupancy rate for different periods
for period in ['30', '60', '90', '365']:
    print(f'\nOccupancy Rate {period} days:')
    print(
        ((int(period) - df[f"availability_{period}"]) / int(period)).mean()*100)


Occupancy Rate 30 days:
59.02767463735829

Occupancy Rate 60 days:
54.292786771622346

Occupancy Rate 90 days:
50.78007162070729

Occupancy Rate 365 days:
48.65978039059305


In [44]:
# 1. Occupancy Rate for 30 days
df['occupancy_rate'] = (30 - df.availability_30).div(30).mul(100)

occupancy_rate = df['occupancy_rate'].mean()

# print results
print(f"Occupancy Rate for 30 days: {occupancy_rate:.2f}")

Occupancy Rate for 30 days: 59.03


### 2. City with the Highest Occupancy (Monthly) 

City with the Highest Occupancy = max (Number of Occupied Days / Number of Available Days)


In [45]:
df.groupby('city').occupancy_rate.mean().sort_values()

city
menorca      50.048309
girona       51.376344
sevilla      53.137830
valencia     54.702581
mallorca     55.307178
malaga       59.518191
barcelona    62.952801
madrid       65.284145
Name: occupancy_rate, dtype: float64

In [46]:
# 2. City with the Highest Occupancy
best_city = df.groupby('city').occupancy_rate.mean().idxmax()

# print results
print(f"City with the Highest Occupancy: {best_city}")

City with the Highest Occupancy: madrid


## KPI customers satisfaction

### 3. General Satisfaction Index (average review_scores_value)

In [47]:
general_satisfaction_index = df['review_scores_rating'].mean()
print(f"General Satisfaction Index: {general_satisfaction_index:.2f}")

General Satisfaction Index: 919.99


### 4. Compute the average score for each category

In [41]:
avg_scores = {
    'accuracy': df['review_scores_accuracy'].mean(),
    'cleanliness': df['review_scores_cleanliness'].mean(),
    'checkin': df['review_scores_checkin'].mean(),
    'communication': df['review_scores_communication'].mean(),
    'location': df['review_scores_location'].mean(),
    'value': df['review_scores_value'].mean() ## Added to cover all columns of review_scores
}

# Find the category with the highest average satisfaction
highest_satisfaction_item = max(avg_scores, key=avg_scores.get)

# Print results
print(f"Highest Satisfaction Item: {highest_satisfaction_item} ({avg_scores[highest_satisfaction_item]:.2f})")

Highest Satisfaction Item: communication (96.43)


#Business Questions
## 1 Marketing | Average Price by Room Type and City

In [None]:
#business question | week 1, avg by city and room_type

#just to be sure, no nulls
df_no_null_price = df.dropna(subset=['price'])

#mean
avg_price_by_room_type_city = df_no_null_price.groupby(['room_type', 'city'])['price'].mean()

#print avg 
print("Average price by room_type y city:")
print(avg_price_by_room_type_city)

#
plt.figure(figsize=(12, 6))
avg_price_by_room_type_city.unstack().plot(kind='bar')
plt.title('Average price by room type and city')
plt.xlabel('Room type')
plt.ylabel('Price')
plt.xticks(rotation=45, ha='right')
plt.legend(title='City')
plt.tight_layout()
plt.show()
