<a href="https://colab.research.google.com/github/nezihaksu/Airbnb_Istanbul/blob/main/airbnb_eda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', None)
from sklearn.impute import SimpleImputer
import re

#Explatory Data Analysis

In [9]:
DF = r'/content/drive/MyDrive/listings.csv'
FILE_TYPE = "csv"
IMPUTE = True
ALLOWED_NAN_PERCENTAGE = 10
DROP_KEYWORDS = ["code","zipcode","link","url","id","name","thumbnail","picture","pic","description","note"]
NONE_VALUES = [np.nan,None,"None","Null","NONE","NULL","none","null","nan",""," ",0]


class Explore():
  """Explore the dataset."""
  def __init__(self,df,file_type:str):
    if file_type == "xlsx" or  file_type == "xls":
      self.df = pd.read_excel(df,engine="python")
    self.df = pd.read_csv(df,engine="python")
    self.file_type = file_type

  def __call__(self):
    return self.df

  def intro(self):
    return "===INFO===",self.df.info(),"===DESCRIPTION===",self.df.describe(),"===DTYPES==",self.df.dtypes
  
  def unique_values(self):
    #Unique values that are in features.
    for column in self.df.columns:
      print(column.upper()+ " UNIQUE VALUES")
      print(str(df[column].unique())+"\n")

  def missing_values(self):
	  missing_percentage = self.df.isnull().sum()*100/len(self.df)
	  plt.figure(figsize=(5, 15))
	  missing_percentage.plot(kind='barh')
	  plt.xticks(rotation=90, fontsize=10)
	  plt.yticks(fontsize=5)
	  plt.xlabel("Missing Percentage", fontsize=14)
	  plt.show()
   
  #Plotting histograms of the numerical features to see the distribution of each of them.
  def dtype_histogram(self,data_type:str):
    numerical_features = self.df.dtypes[self.df.dtypes == data_type].index.to_list()
    self.df[numerical_features].hist(bins = 50,figsize = (20,15))
    plt.show()

In [10]:
explore = Explore(df=DF,file_type=FILE_TYPE)

#Cleaning Dataset

In [22]:
class Cleaner():
  """Clean the dataset."""
  def __init__(self,df,file_type:str):
    if file_type == "xlsx" or  file_type == "xls":
      self.df = pd.read_excel(df,engine="python")
    self.df = pd.read_csv(df,engine="python")
    self.file_type = file_type

  def __call__(self):
    return self.df

  def _drop_type_column(self,pattern:str,inplace:bool):
    for column in self.df.columns:
      if any(self.df[column].astype(str).str.contains(pattern,regex=True)):
        self.df.drop(column,axis=1,inplace=inplace)
    return self.df

  #Expanding one column dataframe into multiple columns according to split character.
  def split_column_into_df(self,column_index:int,split_char:str):
    if len(df.columns) == 1:
      quotes_strip = list(self.df.columns)[0].replace(strip_char,'')
      columns_split = quotes_strip.split(split_char)
      self.df = self.df[self.df.iloc[:,0].name].str.split(pat = split_char,expand = True)
      self.df.columns =  columns_split
      self.df.replace(split_char,'',regex = True,inplace = True)
    print("This method is only for explanding single column dataframes!")
    return self.df

  def drop_missing_columns(self,percentage):
    self.df.dropna(how="all",axis=1,inplace=True)
    #In case of dropna method does not work as expect because of value type \
    #this loop over columns would solve some of the problems.
    for column in self.df.columns:
      if len(self.df[column].unique()) == 1:
        self.df.drop(column,axis=1,inplace=True)
    missing_percentage = self.df.isnull().sum()*100/len(self.df)
    features_left = missing_percentage[missing_percentage < percentage].index
    self.df = self.df[features_left] 
    return self.df

  #Drop columns by their names.
  def drop_column_contains(self,keywords:list):
    for keyword in keywords:
      keyword_pattern = re.compile(keyword)
      for column in self.df.columns:
        if keyword_pattern.search(column):
          self.df.drop(column,axis=1,inplace=True)
    return self.df

  def drop_text_columns(self,inplace):
    sentence_pattern = r'[A-z][A-z]+?\W'
    link_pattern = r'[A-z][A-z]+?://'
    text_pattern = r'|'.join((sentence_pattern,link_pattern))  
    return self._drop_type_column(text_pattern,inplace)
  
  def drop_date_columns(self,inplace:bool):
    date_pattern_dash = r"([12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01]))"
    date_pattern_dot = r"([12]\d{3}.(0[1-9]|1[0-2]).(0[1-9]|[12]\d|3[01]))"
    date_pattern_slash = r"([12]\d{3}/(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01]))"
    date_pattern_dash_text =  r"([12]\d{3}-([A-z]+)-(0[1-9]|[12]\d|3[01]))"
    date_pattern_dot_text = r"([12]\d{3}.([A-z]+).(0[1-9]|[12]\d|3[01]))"
    date_pattern_slash_text = r"([12]\d{3}/([A-z]+)/(0[1-9]|[12]\d|3[01]))"
    date_pattern = r'|'.join((date_pattern_dash,
                              date_pattern_dot,
                              date_pattern_slash,
                              date_pattern_dash_text,
                              date_pattern_dot_text,
                              date_pattern_slash_text))
    return self._drop_type_column(date_pattern,inplace)
  
  def strip_signs(self):
    num_pattern = r"[0-9]"
    non_num_pattern = r"[^0-9]"
    for column in self.df.columns:
      if all(self.df[column].astype(str).str.contains(num_pattern,regex=True)):
        self.df[column].replace(non_num_pattern,"",regex=True,inplace=True)
    return self.df

  def imputer(self,strategy="most_frequent"):
    simple_imputer = SimpleImputer(strategy=strategy)
    for column in self.df.columns:
      if pd.DataFrame.any(self.df[column].isnull()):
        self.df[column] = simple_imputer.fit_transform(self.df[column].values.reshape(-1,1))
    print(self.df.describe())
    return self.df 
     


In [23]:
cleaner = Cleaner(DF,FILE_TYPE)

In [24]:
cleaner.drop_column_contains(DROP_KEYWORDS)

Unnamed: 0,last_scraped,summary,space,experiences_offered,neighborhood_overview,transit,access,interaction,house_rules,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,market,smart_location,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,2019-04-28,My place is close to great views. My place is ...,A double bed apartment on the Asian side of Is...,none,,,,,,2009-01-14,"Istanbul, Istanbul, Turkey","Hello...\r\nI am Kaan and I am from Istanbul, ...",,,,f,Üsküdar,1.0,1.0,"['email', 'phone', 'facebook', 'reviews']","Istanbul Province, Istanbul, Turkey",Üsküdar,Uskudar,,Istanbul Province,Istanbul,Istanbul,"Istanbul Province, Turkey",Turkey,41.05650,29.05367,f,Apartment,Entire home/apt,2,1.0,0.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,Kitchen,""Smoking ...",700.0,$624.00,,,,,2,$154.00,1,730,1,1,730,730,1.0,730.0,24 months ago,t,30,60,90,365,2019-04-28,1,0,2009-06-01,2009-06-01,100.0,,,,,,,f,,f,f,flexible,f,1,1,0,0,0.01
1,2019-04-28,Watch The Bosphorus from The Comfy Hill. A spa...,"The low green hills of Arnavutköy, the lovely,...",none,"The lovely neighborhood, Arnavutkoy, is locate...","The city center, Taksim is about 6-7 kilometer...",Our dear guests may enjoy the oven and the was...,"Depending on our time available, we may even t...",- Windows facing the street should not be left...,2010-02-08,"Ankara, Ankara, Turkey",Başar Family\r\n\r\nGülder (mother): Owner of ...,,,,f,Beşiktaş,2.0,2.0,"['email', 'phone', 'facebook', 'reviews', 'wor...","Istanbul, Turkey",Beşiktaş,Besiktas,,Istanbul,,Istanbul,"Istanbul, Turkey",Turkey,41.06984,29.04545,t,Apartment,Entire home/apt,3,1.0,2.0,2.0,Real Bed,"{TV,""Cable TV"",""Air conditioning"",Kitchen,""Fre...",,$113.00,"$1,348.00","$4,615.00",$588.00,,4,$208.00,30,210,30,30,210,210,30.0,210.0,3 months ago,t,0,0,0,117,2019-04-28,41,5,2010-03-24,2018-11-07,90.0,9.0,9.0,10.0,10.0,10.0,9.0,f,,f,f,moderate,f,2,1,1,0,0.37
2,2019-04-27,Evimiz Bebek Koru nun içinde . Şehrin içinde ...,This place so popular place in Istanbul.Bospho...,none,Clean and elegand friends . U can find everyth...,"Otobus , Vapur , Metro , taksi",,Yes I can arrange it,,2010-04-09,"Istanbul, Istanbul, Turkey",Merhabalar . 6 yasında Oğlum ile birlikte yaşa...,,,,f,Beşiktaş,1.0,1.0,"['email', 'phone', 'offline_government_id', 's...","Beşiktaş/ bebek, İstanbul, Turkey",Beşiktaş,Besiktas,,Beşiktaş/ bebek,İstanbul,Istanbul,"Beşiktaş/ bebek, Turkey",Turkey,41.07731,29.03891,t,Serviced apartment,Entire home/apt,6,1.0,2.0,5.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Air conditioning...",969.0,$232.00,"$2,980.00","$9,934.00","$19,869.00",$0.00,1,$0.00,21,1125,21,21,1125,1125,21.0,1125.0,2 weeks ago,t,0,0,0,2,2019-04-27,0,0,,,,,,,,,,f,,t,f,strict_14_with_grace_period,f,1,1,0,0,
3,2019-04-27,This is a very nicely decorated apartment in a...,My place is an authentic apartment with very h...,none,Cihangir is one of Istanbul's cosmopolitan sub...,All is within close walking distance. Also sit...,,I will be away from Istanbul during your stay ...,This is the place I am living. Not like the pl...,2010-05-02,"Istanbul, Istanbul, Turkey",I like traveling and when I have the opportuni...,,,,f,Cihangir,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...","Istanbul Province, Turkey",Cihangir,Beyoglu,,Istanbul Province,,Istanbul,"Istanbul Province, Turkey",Turkey,41.03220,28.98216,f,Apartment,Entire home/apt,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,Kitchen,""Paid parking off pr...",,$267.00,"$1,523.00","$5,431.00",$662.00,$265.00,2,$99.00,5,90,5,5,90,90,5.0,90.0,3 months ago,t,0,8,8,277,2019-04-27,2,1,2014-10-20,2018-05-04,100.0,10.0,9.0,10.0,10.0,10.0,10.0,f,,f,f,moderate,f,1,1,0,0,0.04
4,2019-04-27,,The first advantage to stay in our apartments ...,none,,,,,,2010-05-10,"Istanbul, Turkey",I have been working as a property manager in İ...,within an hour,100%,,f,Şişli,19.0,19.0,"['email', 'phone', 'reviews']","Istanbul, Turkey",Şişli,Sisli,,Istanbul,,Istanbul,"Istanbul, Turkey",Turkey,41.04471,28.98567,t,Serviced apartment,Entire home/apt,5,1.0,1.0,3.0,Real Bed,"{TV,Internet,Wifi,""Air conditioning"",Kitchen,E...",753.0,$654.00,,,,$66.00,2,$66.00,3,360,3,3,360,360,3.0,360.0,a week ago,t,12,34,45,302,2019-04-27,0,0,,,,,,,,,,f,,t,f,moderate,f,19,19,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17323,2019-04-28,"本人是一名中国人在土国的伊斯坦布尔大学的学生,学习土语专业,现在土国已经1年半了｡ 喜欢干净...",,none,,"从这里到机场坐地铁约10多分钟,附近步行几分钟有地铁,公交方便出行到任何地方｡",,需要提前两天预定,"- 有时间段需要保持安静,房间内只能穿拖鞋",2017-08-09,"Istanbul, Istanbul, Turkey",,,,,f,,1.0,1.0,"['email', 'phone']","Bahçelievler, İstanbul, Turkey",,Bahcelievler,,Bahçelievler,İstanbul,Istanbul,"Bahçelievler, Turkey",Turkey,40.99535,28.84067,t,House,Private room,4,,1.0,2.0,Real Bed,"{Wifi,Kitchen,Heating,Washer,Essentials,Shampo...",,$107.00,,,$0.00,$30.00,1,$0.00,3,1125,3,3,1125,1125,3.0,1125.0,today,t,29,59,89,269,2019-04-28,0,0,,,,,,,,,,f,,t,f,flexible,f,1,0,1,0,
17324,2019-04-28,,,none,,,,,Bulaşıklar yıkanmalı Evde ayakkabı ile gezilme...,2015-05-22,"Istanbul, İstanbul, Turkey","Hi, this is Göksel. The flat is located in 19 ...",within an hour,100%,,f,,1.0,1.0,"['email', 'phone', 'facebook']","Şişli, İstanbul, Turkey",Şişli,Sisli,,Şişli,İstanbul,Istanbul,"Şişli, Turkey",Turkey,41.05814,28.99161,f,Apartment,Entire home/apt,3,1.0,2.0,2.0,Real Bed,"{TV,Wifi,""Air conditioning"",Kitchen,""Smoking a...",,$303.00,,,,,1,$0.00,3,15,3,3,15,15,3.0,15.0,today,t,23,29,41,131,2019-04-28,0,0,,,,,,,,,,f,,f,f,flexible,f,1,1,0,0,
17325,2019-04-28,"Binaya bakan geniş bir pencere, bir yatak, dol...",,none,,,,,,2019-04-27,TR,,,,,f,,1.0,1.0,"['email', 'phone']","Maltepe, İstanbul, Turkey",,Maltepe,,Maltepe,İstanbul,Istanbul,"Maltepe, Turkey",Turkey,40.95070,29.12381,f,Apartment,Private room,1,1.0,1.0,1.0,Real Bed,"{Wifi,Kitchen,""Pets allowed"",Elevator,Washer,D...",,$148.00,,,,,1,$0.00,1,5,1,1,5,5,1.0,5.0,today,t,0,7,33,33,2019-04-28,0,0,,,,,,,,,,f,,f,f,flexible,f,1,0,1,0,
17326,2019-04-28,"Cozy flat with balcony in city centre, 4 mins ...",,none,,,Whole flat except owner’s bedroom,,,2013-06-12,"Istanbul, Istanbul, Turkey",İsmail from İstanbul-Turkey,,,,f,Beşiktaş,1.0,1.0,"['email', 'phone', 'google', 'reviews', 'jumio...","Beşiktaş, İstanbul, Turkey",Beşiktaş,Besiktas,,Beşiktaş,İstanbul,Istanbul,"Beşiktaş, Turkey",Turkey,41.07760,29.02340,t,Apartment,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Wifi,""Air conditioning"",Kitchen,""Free park...",,$148.00,,,,,1,$0.00,1,7,1,1,7,7,1.0,7.0,today,t,29,49,79,115,2019-04-28,0,0,,,,,,,,,,f,,f,f,moderate,f,1,0,1,0,


In [25]:
cleaner.drop_text_columns(inplace=True)

Unnamed: 0,last_scraped,experiences_offered,host_since,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,is_location_exact,accommodates,bathrooms,bedrooms,beds,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,2019-04-28,none,2009-01-14,,,f,1.0,1.0,Üsküdar,Uskudar,,41.05650,29.05367,f,2,1.0,0.0,1.0,700.0,$624.00,,,,,2,$154.00,1,730,1,1,730,730,1.0,730.0,t,30,60,90,365,2019-04-28,1,0,2009-06-01,2009-06-01,100.0,,,,,,,f,,f,f,flexible,f,1,1,0,0,0.01
1,2019-04-28,none,2010-02-08,,,f,2.0,2.0,Beşiktaş,Besiktas,,41.06984,29.04545,t,3,1.0,2.0,2.0,,$113.00,"$1,348.00","$4,615.00",$588.00,,4,$208.00,30,210,30,30,210,210,30.0,210.0,t,0,0,0,117,2019-04-28,41,5,2010-03-24,2018-11-07,90.0,9.0,9.0,10.0,10.0,10.0,9.0,f,,f,f,moderate,f,2,1,1,0,0.37
2,2019-04-27,none,2010-04-09,,,f,1.0,1.0,Beşiktaş,Besiktas,,41.07731,29.03891,t,6,1.0,2.0,5.0,969.0,$232.00,"$2,980.00","$9,934.00","$19,869.00",$0.00,1,$0.00,21,1125,21,21,1125,1125,21.0,1125.0,t,0,0,0,2,2019-04-27,0,0,,,,,,,,,,f,,t,f,strict_14_with_grace_period,f,1,1,0,0,
3,2019-04-27,none,2010-05-02,,,f,1.0,1.0,Cihangir,Beyoglu,,41.03220,28.98216,f,2,1.0,1.0,1.0,,$267.00,"$1,523.00","$5,431.00",$662.00,$265.00,2,$99.00,5,90,5,5,90,90,5.0,90.0,t,0,8,8,277,2019-04-27,2,1,2014-10-20,2018-05-04,100.0,10.0,9.0,10.0,10.0,10.0,10.0,f,,f,f,moderate,f,1,1,0,0,0.04
4,2019-04-27,none,2010-05-10,100%,,f,19.0,19.0,Şişli,Sisli,,41.04471,28.98567,t,5,1.0,1.0,3.0,753.0,$654.00,,,,$66.00,2,$66.00,3,360,3,3,360,360,3.0,360.0,t,12,34,45,302,2019-04-27,0,0,,,,,,,,,,f,,t,f,moderate,f,19,19,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17323,2019-04-28,none,2017-08-09,,,f,1.0,1.0,,Bahcelievler,,40.99535,28.84067,t,4,,1.0,2.0,,$107.00,,,$0.00,$30.00,1,$0.00,3,1125,3,3,1125,1125,3.0,1125.0,t,29,59,89,269,2019-04-28,0,0,,,,,,,,,,f,,t,f,flexible,f,1,0,1,0,
17324,2019-04-28,none,2015-05-22,100%,,f,1.0,1.0,Şişli,Sisli,,41.05814,28.99161,f,3,1.0,2.0,2.0,,$303.00,,,,,1,$0.00,3,15,3,3,15,15,3.0,15.0,t,23,29,41,131,2019-04-28,0,0,,,,,,,,,,f,,f,f,flexible,f,1,1,0,0,
17325,2019-04-28,none,2019-04-27,,,f,1.0,1.0,,Maltepe,,40.95070,29.12381,f,1,1.0,1.0,1.0,,$148.00,,,,,1,$0.00,1,5,1,1,5,5,1.0,5.0,t,0,7,33,33,2019-04-28,0,0,,,,,,,,,,f,,f,f,flexible,f,1,0,1,0,
17326,2019-04-28,none,2013-06-12,,,f,1.0,1.0,Beşiktaş,Besiktas,,41.07760,29.02340,t,2,1.0,1.0,1.0,,$148.00,,,,,1,$0.00,1,7,1,1,7,7,1.0,7.0,t,29,49,79,115,2019-04-28,0,0,,,,,,,,,,f,,f,f,moderate,f,1,0,1,0,


In [26]:
cleaner.drop_date_columns(inplace=True)

  return func(self, *args, **kwargs)


Unnamed: 0,experiences_offered,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,is_location_exact,accommodates,bathrooms,bedrooms,beds,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,has_availability,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,none,,,f,1.0,1.0,Üsküdar,Uskudar,,41.05650,29.05367,f,2,1.0,0.0,1.0,700.0,$624.00,,,,,2,$154.00,1,730,1,1,730,730,1.0,730.0,t,30,60,90,365,1,0,100.0,,,,,,,f,,f,f,flexible,f,1,1,0,0,0.01
1,none,,,f,2.0,2.0,Beşiktaş,Besiktas,,41.06984,29.04545,t,3,1.0,2.0,2.0,,$113.00,"$1,348.00","$4,615.00",$588.00,,4,$208.00,30,210,30,30,210,210,30.0,210.0,t,0,0,0,117,41,5,90.0,9.0,9.0,10.0,10.0,10.0,9.0,f,,f,f,moderate,f,2,1,1,0,0.37
2,none,,,f,1.0,1.0,Beşiktaş,Besiktas,,41.07731,29.03891,t,6,1.0,2.0,5.0,969.0,$232.00,"$2,980.00","$9,934.00","$19,869.00",$0.00,1,$0.00,21,1125,21,21,1125,1125,21.0,1125.0,t,0,0,0,2,0,0,,,,,,,,f,,t,f,strict_14_with_grace_period,f,1,1,0,0,
3,none,,,f,1.0,1.0,Cihangir,Beyoglu,,41.03220,28.98216,f,2,1.0,1.0,1.0,,$267.00,"$1,523.00","$5,431.00",$662.00,$265.00,2,$99.00,5,90,5,5,90,90,5.0,90.0,t,0,8,8,277,2,1,100.0,10.0,9.0,10.0,10.0,10.0,10.0,f,,f,f,moderate,f,1,1,0,0,0.04
4,none,100%,,f,19.0,19.0,Şişli,Sisli,,41.04471,28.98567,t,5,1.0,1.0,3.0,753.0,$654.00,,,,$66.00,2,$66.00,3,360,3,3,360,360,3.0,360.0,t,12,34,45,302,0,0,,,,,,,,f,,t,f,moderate,f,19,19,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17323,none,,,f,1.0,1.0,,Bahcelievler,,40.99535,28.84067,t,4,,1.0,2.0,,$107.00,,,$0.00,$30.00,1,$0.00,3,1125,3,3,1125,1125,3.0,1125.0,t,29,59,89,269,0,0,,,,,,,,f,,t,f,flexible,f,1,0,1,0,
17324,none,100%,,f,1.0,1.0,Şişli,Sisli,,41.05814,28.99161,f,3,1.0,2.0,2.0,,$303.00,,,,,1,$0.00,3,15,3,3,15,15,3.0,15.0,t,23,29,41,131,0,0,,,,,,,,f,,f,f,flexible,f,1,1,0,0,
17325,none,,,f,1.0,1.0,,Maltepe,,40.95070,29.12381,f,1,1.0,1.0,1.0,,$148.00,,,,,1,$0.00,1,5,1,1,5,5,1.0,5.0,t,0,7,33,33,0,0,,,,,,,,f,,f,f,flexible,f,1,0,1,0,
17326,none,,,f,1.0,1.0,Beşiktaş,Besiktas,,41.07760,29.02340,t,2,1.0,1.0,1.0,,$148.00,,,,,1,$0.00,1,7,1,1,7,7,1.0,7.0,t,29,49,79,115,0,0,,,,,,,,f,,f,f,moderate,f,1,0,1,0,


In [27]:
cleaner.drop_missing_columns(percentage=10)

Unnamed: 0,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood_cleansed,latitude,longitude,is_location_exact,accommodates,bathrooms,bedrooms,beds,price,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,instant_bookable,cancellation_policy,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms
0,f,1.0,1.0,Uskudar,41.05650,29.05367,f,2,1.0,0.0,1.0,$624.00,2,$154.00,1,730,1,1,730,730,1.0,730.0,30,60,90,365,1,0,f,flexible,f,1,1,0,0
1,f,2.0,2.0,Besiktas,41.06984,29.04545,t,3,1.0,2.0,2.0,$113.00,4,$208.00,30,210,30,30,210,210,30.0,210.0,0,0,0,117,41,5,f,moderate,f,2,1,1,0
2,f,1.0,1.0,Besiktas,41.07731,29.03891,t,6,1.0,2.0,5.0,$232.00,1,$0.00,21,1125,21,21,1125,1125,21.0,1125.0,0,0,0,2,0,0,t,strict_14_with_grace_period,f,1,1,0,0
3,f,1.0,1.0,Beyoglu,41.03220,28.98216,f,2,1.0,1.0,1.0,$267.00,2,$99.00,5,90,5,5,90,90,5.0,90.0,0,8,8,277,2,1,f,moderate,f,1,1,0,0
4,f,19.0,19.0,Sisli,41.04471,28.98567,t,5,1.0,1.0,3.0,$654.00,2,$66.00,3,360,3,3,360,360,3.0,360.0,12,34,45,302,0,0,t,moderate,f,19,19,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17323,f,1.0,1.0,Bahcelievler,40.99535,28.84067,t,4,,1.0,2.0,$107.00,1,$0.00,3,1125,3,3,1125,1125,3.0,1125.0,29,59,89,269,0,0,t,flexible,f,1,0,1,0
17324,f,1.0,1.0,Sisli,41.05814,28.99161,f,3,1.0,2.0,2.0,$303.00,1,$0.00,3,15,3,3,15,15,3.0,15.0,23,29,41,131,0,0,f,flexible,f,1,1,0,0
17325,f,1.0,1.0,Maltepe,40.95070,29.12381,f,1,1.0,1.0,1.0,$148.00,1,$0.00,1,5,1,1,5,5,1.0,5.0,0,7,33,33,0,0,f,flexible,f,1,0,1,0
17326,f,1.0,1.0,Besiktas,41.07760,29.02340,t,2,1.0,1.0,1.0,$148.00,1,$0.00,1,7,1,1,7,7,1.0,7.0,29,49,79,115,0,0,f,moderate,f,1,0,1,0


In [28]:
cleaner.imputer()

       host_listings_count  host_total_listings_count      latitude  \
count         17328.000000               17328.000000  17328.000000   
mean              4.735630                   4.735630     41.027063   
std              19.099787                  19.099787      0.043440   
min               0.000000                   0.000000     40.814690   
25%               1.000000                   1.000000     41.004620   
50%               2.000000                   2.000000     41.031660   
75%               4.250000                   4.250000     41.048240   
max             944.000000                 944.000000     41.414420   

          longitude  accommodates     bathrooms      bedrooms          beds  \
count  17328.000000  17328.000000  17328.000000  17328.000000  17328.000000   
mean      28.985293      3.028105      1.171572      1.331371      1.977147   
std        0.116290      2.163664      0.791866      1.219332      1.904710   
min       28.032000      1.000000      0.000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Unnamed: 0,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood_cleansed,latitude,longitude,is_location_exact,accommodates,bathrooms,bedrooms,beds,price,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,instant_bookable,cancellation_policy,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms
0,f,1.0,1.0,Uskudar,41.05650,29.05367,f,2,1.0,0.0,1.0,$624.00,2,$154.00,1,730,1,1,730,730,1.0,730.0,30,60,90,365,1,0,f,flexible,f,1,1,0,0
1,f,2.0,2.0,Besiktas,41.06984,29.04545,t,3,1.0,2.0,2.0,$113.00,4,$208.00,30,210,30,30,210,210,30.0,210.0,0,0,0,117,41,5,f,moderate,f,2,1,1,0
2,f,1.0,1.0,Besiktas,41.07731,29.03891,t,6,1.0,2.0,5.0,$232.00,1,$0.00,21,1125,21,21,1125,1125,21.0,1125.0,0,0,0,2,0,0,t,strict_14_with_grace_period,f,1,1,0,0
3,f,1.0,1.0,Beyoglu,41.03220,28.98216,f,2,1.0,1.0,1.0,$267.00,2,$99.00,5,90,5,5,90,90,5.0,90.0,0,8,8,277,2,1,f,moderate,f,1,1,0,0
4,f,19.0,19.0,Sisli,41.04471,28.98567,t,5,1.0,1.0,3.0,$654.00,2,$66.00,3,360,3,3,360,360,3.0,360.0,12,34,45,302,0,0,t,moderate,f,19,19,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17323,f,1.0,1.0,Bahcelievler,40.99535,28.84067,t,4,1.0,1.0,2.0,$107.00,1,$0.00,3,1125,3,3,1125,1125,3.0,1125.0,29,59,89,269,0,0,t,flexible,f,1,0,1,0
17324,f,1.0,1.0,Sisli,41.05814,28.99161,f,3,1.0,2.0,2.0,$303.00,1,$0.00,3,15,3,3,15,15,3.0,15.0,23,29,41,131,0,0,f,flexible,f,1,1,0,0
17325,f,1.0,1.0,Maltepe,40.95070,29.12381,f,1,1.0,1.0,1.0,$148.00,1,$0.00,1,5,1,1,5,5,1.0,5.0,0,7,33,33,0,0,f,flexible,f,1,0,1,0
17326,f,1.0,1.0,Besiktas,41.07760,29.02340,t,2,1.0,1.0,1.0,$148.00,1,$0.00,1,7,1,1,7,7,1.0,7.0,29,49,79,115,0,0,f,moderate,f,1,0,1,0


In [29]:
cleaner.strip_signs()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


Unnamed: 0,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood_cleansed,latitude,longitude,is_location_exact,accommodates,bathrooms,bedrooms,beds,price,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,instant_bookable,cancellation_policy,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms
0,f,1.0,1.0,Uskudar,41.05650,29.05367,f,2,1.0,0.0,1.0,62400,2,15400,1,730,1,1,730,730,1.0,730.0,30,60,90,365,1,0,f,flexible,f,1,1,0,0
1,f,2.0,2.0,Besiktas,41.06984,29.04545,t,3,1.0,2.0,2.0,11300,4,20800,30,210,30,30,210,210,30.0,210.0,0,0,0,117,41,5,f,moderate,f,2,1,1,0
2,f,1.0,1.0,Besiktas,41.07731,29.03891,t,6,1.0,2.0,5.0,23200,1,000,21,1125,21,21,1125,1125,21.0,1125.0,0,0,0,2,0,0,t,strict_14_with_grace_period,f,1,1,0,0
3,f,1.0,1.0,Beyoglu,41.03220,28.98216,f,2,1.0,1.0,1.0,26700,2,9900,5,90,5,5,90,90,5.0,90.0,0,8,8,277,2,1,f,moderate,f,1,1,0,0
4,f,19.0,19.0,Sisli,41.04471,28.98567,t,5,1.0,1.0,3.0,65400,2,6600,3,360,3,3,360,360,3.0,360.0,12,34,45,302,0,0,t,moderate,f,19,19,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17323,f,1.0,1.0,Bahcelievler,40.99535,28.84067,t,4,1.0,1.0,2.0,10700,1,000,3,1125,3,3,1125,1125,3.0,1125.0,29,59,89,269,0,0,t,flexible,f,1,0,1,0
17324,f,1.0,1.0,Sisli,41.05814,28.99161,f,3,1.0,2.0,2.0,30300,1,000,3,15,3,3,15,15,3.0,15.0,23,29,41,131,0,0,f,flexible,f,1,1,0,0
17325,f,1.0,1.0,Maltepe,40.95070,29.12381,f,1,1.0,1.0,1.0,14800,1,000,1,5,1,1,5,5,1.0,5.0,0,7,33,33,0,0,f,flexible,f,1,0,1,0
17326,f,1.0,1.0,Besiktas,41.07760,29.02340,t,2,1.0,1.0,1.0,14800,1,000,1,7,1,1,7,7,1.0,7.0,29,49,79,115,0,0,f,moderate,f,1,0,1,0


'some'