In [1]:
# It's time to find the possible fraudulent posts
# With limited data, we'll take a strategy of signaling the posts
# that are too cheap for a given location and a given type

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import stats
from iteration_utilities import deepflatten

%matplotlib inline

In [2]:
house_data = pd.read_csv('house_data.csv', index_col=0)


# FUNCTION THAT WILL EXTRACT OUTLIERS FROM PRICE LIST
def detect_outliers(data):
    
    # find q1 and q3 values
    q1, q3 = np.percentile(sorted(data), [25, 75])
    # compute IRQ
    iqr = q3 - q1
    # find lower and upper bounds
    lower_bound = q1 - (2 * iqr)

    outliers = [x for x in data if x <= lower_bound]       

    return outliers


# cycle through each municipality and each house type and extract outlier indexes
outlier_indexes = [] #
for mun in house_data.municipality.unique():
    for t in house_data.type.unique():
        
        prices = house_data.price[(house_data.municipality == mun) & (house_data.type == t)].to_list()
        
        # pass if there are no house for rent in a municipality and type
        if prices != []:                
            data_outliers = detect_outliers(prices)
        else:
            pass
        
        # pass if there are no outliers in the price
        if data_outliers == []:
            pass
        
        else:
            for o in data_outliers:
                indexes = house_data.index[(house_data.municipality == mun) 
                                           & (house_data.type == t) 
                                           & (house_data.price == o)].to_list()          
        
                outlier_indexes.append(indexes)
        
outlier_indexes = list(deepflatten(outlier_indexes, depth=1))
outlier_links = pd.DataFrame(house_data.link[outlier_indexes])
outlier_links = outlier_links.reset_index(drop=True)
outlier_links.to_csv('outlier_links')

Unnamed: 0,link
0,https://www.imovirtual.com/pt/anuncio/porto-t2...
1,https://www.imovirtual.com/pt/anuncio/t6-sem-f...
2,https://www.imovirtual.com/pt/anuncio/alugo-ap...
3,https://www.imovirtual.com/pt/anuncio/apartame...
4,https://www.imovirtual.com/pt/anuncio/apartame...
...,...
810,https://www.imovirtual.com/pt/anuncio/apartame...
811,https://www.imovirtual.com/pt/anuncio/casa-mob...
812,https://www.imovirtual.com/pt/anuncio/t2-cidad...
813,https://www.imovirtual.com/pt/anuncio/arrenda-...


In [4]:
outlier_links.link

0      https://www.imovirtual.com/pt/anuncio/porto-t2...
1      https://www.imovirtual.com/pt/anuncio/t6-sem-f...
2      https://www.imovirtual.com/pt/anuncio/alugo-ap...
3      https://www.imovirtual.com/pt/anuncio/apartame...
4      https://www.imovirtual.com/pt/anuncio/apartame...
                             ...                        
810    https://www.imovirtual.com/pt/anuncio/apartame...
811    https://www.imovirtual.com/pt/anuncio/casa-mob...
812    https://www.imovirtual.com/pt/anuncio/t2-cidad...
813    https://www.imovirtual.com/pt/anuncio/arrenda-...
814    https://www.imovirtual.com/pt/anuncio/t1-furna...
Name: link, Length: 815, dtype: object