In [1]:
import numpy as np
import pandas as pd

dataset = pd.read_csv('ParisHousingClass.csv')
dataset.head()

Unnamed: 0,squareMeters,numberOfRooms,hasYard,hasPool,floors,cityCode,cityPartRange,numPrevOwners,made,isNewBuilt,hasStormProtector,basement,attic,garage,hasStorageRoom,hasGuestRoom,price,category
0,75523,3,0,1,63,9373,3,8,2005,0,1,4313,9005,956,0,7,7559081.5,Basic
1,80771,39,1,1,98,39381,8,6,2015,1,0,3653,2436,128,1,2,8085989.5,Luxury
2,55712,58,0,1,19,34457,6,8,2021,0,0,2937,8852,135,1,9,5574642.1,Basic
3,32316,47,0,0,6,27939,10,4,2012,0,1,659,7141,359,0,3,3232561.2,Basic
4,70429,19,1,1,90,38045,3,7,1990,1,0,8435,2429,292,1,4,7055052.0,Luxury


## Размер датасета

In [2]:
dataset.shape

(10000, 18)

## Фильтрация данных по диапазону цены и вывод сгруппированных данных по категории

In [3]:
# фильтруеем данные
price_range = [100_000, 200_000]
filtered_data = dataset[(dataset['price'].between(price_range[0], price_range[1]))]

# группируем и выводим кол-во категорий домов
house_categories_count = filtered_data.groupby('category').size()
house_categories_count

category
Basic     103
Luxury     14
dtype: int64

## Колонки в датасете

In [4]:
dataset.columns

Index(['squareMeters', 'numberOfRooms', 'hasYard', 'hasPool', 'floors',
       'cityCode', 'cityPartRange', 'numPrevOwners', 'made', 'isNewBuilt',
       'hasStormProtector', 'basement', 'attic', 'garage', 'hasStorageRoom',
       'hasGuestRoom', 'price', 'category'],
      dtype='object')

## Вывод данных

In [5]:
# первая строка
dataset.loc[:, 'squareMeters']

0       75523
1       80771
2       55712
3       32316
4       70429
        ...  
9995     1726
9996    44403
9997    83841
9998    59036
9999     1440
Name: squareMeters, Length: 10000, dtype: int64

In [6]:
# третья строка
dataset.loc[:, 'hasYard']

0       0
1       1
2       0
3       0
4       1
       ..
9995    0
9996    1
9997    0
9998    0
9999    0
Name: hasYard, Length: 10000, dtype: int64

In [7]:
# первая и третья строки вместе
dataset.loc[:, ['squareMeters', 'hasYard']]

Unnamed: 0,squareMeters,hasYard
0,75523,0
1,80771,1
2,55712,0
3,32316,0
4,70429,1
...,...,...
9995,1726,0
9996,44403,1
9997,83841,0
9998,59036,0


## Выводим первые 10 строк

In [8]:
# выводим первые 10 строк
dataset.head(10)

Unnamed: 0,squareMeters,numberOfRooms,hasYard,hasPool,floors,cityCode,cityPartRange,numPrevOwners,made,isNewBuilt,hasStormProtector,basement,attic,garage,hasStorageRoom,hasGuestRoom,price,category
0,75523,3,0,1,63,9373,3,8,2005,0,1,4313,9005,956,0,7,7559081.5,Basic
1,80771,39,1,1,98,39381,8,6,2015,1,0,3653,2436,128,1,2,8085989.5,Luxury
2,55712,58,0,1,19,34457,6,8,2021,0,0,2937,8852,135,1,9,5574642.1,Basic
3,32316,47,0,0,6,27939,10,4,2012,0,1,659,7141,359,0,3,3232561.2,Basic
4,70429,19,1,1,90,38045,3,7,1990,1,0,8435,2429,292,1,4,7055052.0,Luxury
5,39223,36,0,1,17,39489,8,6,2012,0,1,2009,4552,757,0,1,3926647.2,Basic
6,58682,10,1,1,99,6450,10,9,1995,1,1,5930,9453,848,0,5,5876376.5,Luxury
7,86929,100,1,0,11,98155,3,4,2003,1,0,6326,4748,654,0,10,8696869.3,Basic
8,51522,3,0,0,61,9047,8,3,2012,1,1,632,5792,807,1,5,5154055.2,Basic
9,39686,42,0,0,15,71019,5,8,2021,1,1,5198,5342,591,1,3,3970892.1,Basic


## Выводим 23 случайные строки

In [9]:
dataset.sample(23)

Unnamed: 0,squareMeters,numberOfRooms,hasYard,hasPool,floors,cityCode,cityPartRange,numPrevOwners,made,isNewBuilt,hasStormProtector,basement,attic,garage,hasStorageRoom,hasGuestRoom,price,category
6462,97831,19,0,1,75,73394,4,9,2004,0,1,1728,5426,613,1,3,9790034.4,Basic
3542,64180,36,1,1,100,46756,6,6,2002,0,1,1986,770,321,1,4,6425560.2,Basic
262,96526,36,1,1,34,61875,5,2,2020,0,1,2131,710,680,1,0,9661068.0,Basic
1482,61453,7,1,1,86,1874,5,4,2006,1,0,8951,8470,558,1,2,6152438.6,Luxury
223,22854,41,1,0,89,74420,4,7,2001,0,1,6216,521,481,1,0,2292467.1,Basic
2795,94972,19,0,1,23,1116,9,2,1990,0,0,9628,2016,151,0,1,9500113.0,Basic
8543,79445,43,0,1,51,11894,6,8,1990,0,0,414,7680,381,0,6,7948637.0,Basic
5539,32078,91,0,0,65,66113,4,3,2012,0,1,5986,2720,377,1,5,3209311.2,Basic
9895,3814,3,1,0,20,28671,10,4,2007,0,1,4221,7909,149,1,8,386744.7,Basic
9600,68339,36,0,1,86,43765,7,9,2003,1,0,5303,338,129,1,9,6839127.3,Basic


## Статистика по числовым показателям

In [10]:
dataset.describe()

Unnamed: 0,squareMeters,numberOfRooms,hasYard,hasPool,floors,cityCode,cityPartRange,numPrevOwners,made,isNewBuilt,hasStormProtector,basement,attic,garage,hasStorageRoom,hasGuestRoom,price
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,49870.1312,50.3584,0.5087,0.4968,50.2763,50225.4861,5.5101,5.5217,2005.4885,0.4991,0.4999,5033.1039,5028.0106,553.1212,0.503,4.9946,4993448.0
std,28774.37535,28.816696,0.499949,0.500015,28.889171,29006.675799,2.872024,2.856667,9.30809,0.500024,0.500025,2876.729545,2894.33221,262.05017,0.500016,3.17641,2877424.0
min,89.0,1.0,0.0,0.0,1.0,3.0,1.0,1.0,1990.0,0.0,0.0,0.0,1.0,100.0,0.0,0.0,10313.5
25%,25098.5,25.0,0.0,0.0,25.0,24693.75,3.0,3.0,1997.0,0.0,0.0,2559.75,2512.0,327.75,0.0,2.0,2516402.0
50%,50105.5,50.0,1.0,0.0,50.0,50693.0,5.0,5.0,2005.5,0.0,0.0,5092.5,5045.0,554.0,1.0,5.0,5016180.0
75%,74609.75,75.0,1.0,1.0,76.0,75683.25,8.0,8.0,2014.0,1.0,1.0,7511.25,7540.5,777.25,1.0,8.0,7469092.0
max,99999.0,100.0,1.0,1.0,100.0,99953.0,10.0,10.0,2021.0,1.0,1.0,10000.0,10000.0,1000.0,1.0,10.0,10006770.0


In [11]:
# группировка данных
dataset.groupby('category').agg({'price': 'max', 'squareMeters': 'min'})

Unnamed: 0_level_0,price,squareMeters
category,Unnamed: 1_level_1,Unnamed: 2_level_1
Basic,10006771.2,89
Luxury,9984910.3,152
