In [68]:
from datetime import datetime, timedelta
import json
import os
import pandas as pd
import numpy as np

In [69]:
data_file = 'oxl_download_rent.json'

In [70]:
def get_param_by_key(params, param_key, value_key):
    value = None
    for param in params:
        if param['key'] == param_key:
            if 'value' in param:
                if value_key in param['value']:
                    value = param['value'][value_key]
            return value
    return value

In [71]:
with open(data_file, 'r') as f:
    data = json.load(f)

KeyboardInterrupt: 

In [None]:
listings = []
i = 0
for record in data:
    i = i + 1
    print(f'Processing record: {i}, property id: {record["id"]}')
    listing = dict()
    listing['id'] = str(record['id'])
    listing['sourceUrl'] = record['url']
    listing['price'] = get_param_by_key(record['params'], 'price', 'value')
    listing['currency'] = get_param_by_key(record['params'], 'price', 'currency')
    if listing['currency'] == 'UYE':
        listing['priceUSD'] = listing['price']
    else:
        listing['priceUSD'] = int(get_param_by_key(record['params'], 'price', 'converted_value'))
    listing['title'] = record['title']
    listing['createdAt'] = record['created_time']
    listing['refreshedAt'] = record['last_refresh_time']
    listing['description'] = record['description']
    listing['promoted_top'] = record['promotion']['top_ad']
    numberOfRooms = get_param_by_key(record['params'], 'number_of_rooms', 'key')
    if numberOfRooms:
        listing['numberOfRooms'] = int(numberOfRooms)
    else:
        listing['numberOfRooms'] = None
    totalArea = get_param_by_key(record['params'], 'total_area', 'key')
    if totalArea:
        listing['totalArea'] = float(totalArea)
    else:
        listing['totalArea'] = None
    floor = get_param_by_key(record['params'], 'floor', 'key')
    if floor:
        listing['floor'] = int(floor)
    else:
        listing['floor'] = None
    totalFloors = get_param_by_key(record['params'], 'total_floors', 'key')
    if totalFloors:
        listing['totalFloors'] = int(totalFloors)
    else:
        listing['totalFloors'] = None
    listing['photos'] = []
    listing['location'] = dict()
    listing['location']['lat'] = record['map']['lat']
    listing['location']['lng'] = record['map']['lon']
    listing['show_detailed'] = record['map']['show_detailed']
    listing['district'] = record['location']['district']['name']
    
    for p in record['photos']:
        photo = dict()
        photo['id'] = str(p['id'])
        photo['rotation'] = int(p['rotation'])
        photo['width'] = int(p['width'])
        photo['height'] = int(p['height'])
        photo['url'] = str(p['link']).replace(';s={width}x{height}','')
        listing['photos'].append(photo)
        
    if listing['location']['lat'] and listing['location']['lng'] and listing['price']:
        listings.append(listing)

Processing record: 1, property id: 50686312
Processing record: 2, property id: 50596584
Processing record: 3, property id: 50596220
Processing record: 4, property id: 50351697
Processing record: 5, property id: 50267397
Processing record: 6, property id: 35100730
Processing record: 7, property id: 50573478
Processing record: 8, property id: 50705887
Processing record: 9, property id: 50706155
Processing record: 10, property id: 50586858
Processing record: 11, property id: 50674616
Processing record: 12, property id: 50176046
Processing record: 13, property id: 50572176
Processing record: 14, property id: 49148388
Processing record: 15, property id: 50338562
Processing record: 16, property id: 50579571
Processing record: 17, property id: 45242321
Processing record: 18, property id: 46032778
Processing record: 19, property id: 49321315
Processing record: 20, property id: 50571795
Processing record: 21, property id: 50317707
Processing record: 22, property id: 48089647
Processing record: 

In [24]:
listings_df = pd.json_normalize(listings)

In [25]:
listings_df['location.string'] = listings_df['location.lat'].astype('str') + ',' + listings_df['location.lng'].astype('str')
count_by_location_df = listings_df.groupby(by=['district', 'location.string']).agg({
    'id': 'count'
}).reset_index()
generic_locations = set(count_by_location_df[count_by_location_df['id']>20]['location.string'])

In [26]:
filtered_listings_df = listings_df[~listings_df['location.string'].isin(generic_locations)]
len(filtered_listings_df)

4819

In [27]:
max_listing_age_days = 10
min_date = datetime.now() - timedelta(days = max_listing_age_days)
min_date_str = min_date.strftime('%Y-%m-%d')
filtered_listings_df = filtered_listings_df[
    filtered_listings_df['refreshedAt'] > min_date_str
]
len(filtered_listings_df)

3741

In [28]:
count_by_description_df = filtered_listings_df.groupby(by=['description']).agg({
    'id': 'count'
}).reset_index()
count_by_description_df.sort_values(by='id', ascending=False).head(20)

Unnamed: 0,description,id
2775,Сдаётся квартира на долгий срок для семи или и...,61
2823,Сдаётся отличная квартира в центре города. Со...,35
3380,квартире имеется все необходимое мебель и техн...,20
3421,• В евроремонт квартире имеется все необходимо...,20
1452,В квартире имеется все необходимое Мебель и те...,16
2829,Сдаётся отличная квартира в центре города. Со ...,14
1611,Квартира для Вашей семьи.Мы приглашаем вас оку...,13
3428,• Сдаётся отличная квартира в центре города. С...,11
1882,Отличное расположение дома - всё рядом - много...,10
3430,• Сдаётся отличная квартира в центре города. С...,9


In [29]:
count_by_description_df.to_excel('descritpions.xlsx')

In [30]:
count_by_title_df = filtered_listings_df.groupby(by=['title']).agg({
    'id': 'count'
}).reset_index()
count_by_title_df.sort_values(by='id', ascending=False).head(20)

Unnamed: 0,title,id
1689,В аренда квартера,27
2844,Сдаётся квартира,10
1922,Квартира в аренду,10
291,Chilonzor arenda srocni,8
1019,Аренда Максим Горький,8
1207,Аренда алмазар сити,5
666,Аренда Квартирь,5
2789,Сдаётся в аренду,5
1346,Аренда квартиры,4
2351,Сдается евро квартира на Ц1,3


In [31]:
filtered_listings_df = filtered_listings_df[filtered_listings_df['photos'].apply(lambda x: len(x)) > 0]

In [57]:
filtered_ids = set(filtered_listings_df['id'])

In [58]:

filtered_listings = [l for l in listings if l['id'] in filtered_ids]
with open('listings.json', 'w') as f:
    json.dump(filtered_listings, f)

In [59]:
len(filtered_listings)

3736

In [65]:
des = count_by_description_df.sort_values(by='id', ascending=False).iloc[0]['description']

In [66]:
des

'Сдаётся квартира на долгий срок для семи или иностранцев квартире всё необходим есть звоните или пишите в телеграмм +998971662222'

In [72]:
listings_df[
    listings_df['description'] == des]

Unnamed: 0,id,sourceUrl,price,currency,priceUSD,title,createdAt,refreshedAt,description,promoted_top,numberOfRooms,totalArea,floor,totalFloors,photos,show_detailed,district,location.lat,location.lng,location.string
498,47465884,https://www.olx.uz/d/obyavlenie/maftunkuli-nov...,600,UYE,600,Мафтункули новастройка 1/1/9,2023-04-05T15:50:26+05:00,2023-10-13T11:34:08+05:00,Сдаётся квартира на долгий срок для семи или и...,False,1,45.0,1,9,"[{'id': '79226022', 'rotation': 0, 'width': 10...",False,Яшнабадский район,41.305299,69.308612,"41.30529875,69.30861217"
2725,47255880,https://www.olx.uz/d/obyavlenie/golden-hause-e...,500,UYE,500,Golden Hause evra kvartira 1/5/9,2023-03-25T14:19:11+05:00,2023-10-14T15:30:09+05:00,Сдаётся квартира на долгий срок для семи или и...,False,1,45.0,5,9,"[{'id': '78290991', 'rotation': 0, 'width': 56...",False,Яшнабадский район,41.303441,69.323956,"41.30344105,69.32395636"
2754,47377124,https://www.olx.uz/d/obyavlenie/golden-haus-no...,450,UYE,450,Голден хаус новастройка новый 1/5/9,2023-03-31T20:15:34+05:00,2023-10-12T11:54:07+05:00,Сдаётся квартира на долгий срок для семи или и...,False,1,42.0,5,9,"[{'id': '78826953', 'rotation': 0, 'width': 75...",False,Яшнабадский район,41.306491,69.334995,"41.30649123,69.33499508"
3051,47510477,https://www.olx.uz/d/obyavlenie/maftunkuli-nov...,500,UYE,500,Мафтункули новастройка 1/7/8 евра квартира,2023-04-07T21:40:30+05:00,2023-10-14T15:24:10+05:00,Сдаётся квартира на долгий срок для семи или и...,False,1,40.0,7,8,"[{'id': '79433949', 'rotation': 0, 'width': 10...",False,Яшнабадский район,41.304348,69.322605,"41.3043484,69.32260486"
3902,47463831,https://www.olx.uz/d/obyavlenie/chilanzar-7kv-...,500,UYE,500,Чиланзар 7кв евра квартира 2/1/5,2023-04-05T14:14:36+05:00,2023-10-14T12:24:10+05:00,Сдаётся квартира на долгий срок для семи или и...,False,2,55.0,1,5,"[{'id': '79216418', 'rotation': 0, 'width': 10...",False,Чиланзарский район,41.279655,69.225278,"41.2796548,69.22527791"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33638,47522298,https://www.olx.uz/d/obyavlenie/ts5-evra-kvart...,700,UYE,700,Ц5 евра квартира 4/3/9,2023-04-08T15:15:07+05:00,2023-10-13T11:32:09+05:00,Сдаётся квартира на долгий срок для семи или и...,False,4,90.0,3,9,"[{'id': '79488042', 'rotation': 0, 'width': 75...",False,Юнусабадский район,41.325407,69.278703,"41.32540695,69.27870301"
33784,47501268,https://www.olx.uz/d/obyavlenie/baurum-4-3-8-o...,1000,UYE,1000,Баурум 4/3/8 олмазор,2023-04-07T12:54:55+05:00,2023-10-12T15:38:08+05:00,Сдаётся квартира на долгий срок для семи или и...,False,4,120.0,3,8,"[{'id': '79390680', 'rotation': 0, 'width': 10...",False,Алмазарский район,41.331614,69.250573,"41.33161439,69.25057326"
35078,48055775,https://www.olx.uz/d/obyavlenie/gospitalnoy-ba...,900,UYE,900,Госпитальной базар четырёх комнатная квартира ...,2023-05-09T16:11:44+05:00,2023-10-12T15:38:11+05:00,Сдаётся квартира на долгий срок для семи или и...,False,4,100.0,7,9,"[{'id': '81872157', 'rotation': 0, 'width': 10...",False,Мирабадский район,41.297442,69.277917,"41.29744247,69.27791717"
35651,48137189,https://www.olx.uz/d/obyavlenie/m-tinchlik-ban...,800,UYE,800,М. Тинчлик Банковский дом 250м2 5/4/4,2023-05-14T13:13:15+05:00,2023-10-12T15:40:12+05:00,Сдаётся квартира на долгий срок для семи или и...,False,5,250.0,4,4,"[{'id': '82236336', 'rotation': 0, 'width': 75...",False,Шайхантахурский район,41.329971,69.224369,"41.32997112,69.22436883"
