In [57]:
import pandas as pd

searches_file_path = 'searches.tsv'
searches_data = pd.read_csv(searches_file_path, sep='\t')

searches_data.head(), searches_data.dtypes

searches_data

Unnamed: 0,ds,id_user,ds_checkin,ds_checkout,n_searches,n_nights,n_guests_min,n_guests_max,origin_country,filter_price_min,filter_price_max,filter_room_types,filter_neighborhoods
0,2014-10-01,0000af0a-6f26-4233-9832-27efbfb36148,2014-10-09,2014-10-12,16,3.0,2,2,IE,0.0,6.700000e+01,",Entire home/apt,Entire home/apt,Private room,...",
1,2014-10-01,0000af0a-6f26-4233-9832-27efbfb36148,2014-10-09,2014-10-19,3,10.0,1,2,IE,0.0,6.700000e+01,,
2,2014-10-01,000cd9d3-e05b-4016-9e09-34a6f8ba2fc5,,,1,,1,1,GB,,,,
3,2014-10-01,000cd9d3-e05b-4016-9e09-34a6f8ba2fc5,2014-11-08,2014-11-10,11,2.0,1,2,GB,,,",Entire home/apt",
4,2014-10-01,001c04f0-5a94-4ee0-bf5d-3591265256de,,,1,,1,1,IE,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35732,2014-10-14,ff3c92ed-ebea-4691-a808-cc6fb73f2f55,,,3,,2,2,US,0.0,1.000000e+02,Entire home/apt,
35733,2014-10-14,ff61875b-1d27-4ee4-bd4e-36671d3bfed4,,,2,,1,1,US,,,,
35734,2014-10-14,ff946a14-777b-4597-91b2-3277921214cd,2014-11-03,2014-11-06,10,3.0,1,3,IL,,,Entire home/apt,
35735,2014-10-14,ffc681d4-3668-4448-af38-6f73bc9fb89d,,,5,,1,1,IE,0.0,4.850000e+02,,


In [58]:
def mean_agg(series):
    return series.mean()

def set_agg(series):
    result = set(series.dropna())
    return ', '.join(result) if result else ''

aggregation_rules = {
    'ds': set_agg,
    'n_searches': mean_agg,
    'n_nights': mean_agg,
    'n_guests_min': mean_agg,
    'n_guests_max': mean_agg,
    'origin_country': set_agg,
    'filter_price_min': mean_agg,
    'filter_price_max': mean_agg,
    'filter_room_types': set_agg,
    'filter_neighborhoods': set_agg
}

# group by id_user and apply rules
grouped_searches = searches_data.groupby('id_user').agg(aggregation_rules)

# reset the index if needed and check output
grouped_searches.reset_index(inplace=True)
print(grouped_searches.head())

                                id_user                      ds  n_searches  \
0  0000af0a-6f26-4233-9832-27efbfb36148  2014-10-05, 2014-10-01    9.666667   
1  00058bcf-8950-4481-a977-d08b42d1fce5              2014-10-14    8.000000   
2  000ab7fb-dbac-414f-9080-88f265e2243e              2014-10-13    1.000000   
3  000b7ff7-47ea-48d3-9b09-1edd150acb02              2014-10-02    1.000000   
4  000c5621-b577-465d-be91-75254d75cc68              2014-10-14   12.500000   

    n_nights  n_guests_min  n_guests_max origin_country  filter_price_min  \
0   5.333333      1.666667           2.0             IE               0.0   
1        NaN      1.000000           1.0             DE               0.0   
2   2.000000      2.000000           2.0             US               NaN   
3        NaN      1.000000           1.0             FR               NaN   
4  19.000000      1.500000           1.5             RU               0.0   

   filter_price_max                                  filter_ro

In [59]:
grouped_searches

Unnamed: 0,id_user,ds,n_searches,n_nights,n_guests_min,n_guests_max,origin_country,filter_price_min,filter_price_max,filter_room_types,filter_neighborhoods
0,0000af0a-6f26-4233-9832-27efbfb36148,"2014-10-05, 2014-10-01",9.666667,5.333333,1.666667,2.0,IE,0.0,79.333333,",Entire home/apt,Entire home/apt,Private room,...",
1,00058bcf-8950-4481-a977-d08b42d1fce5,2014-10-14,8.000000,,1.000000,1.0,DE,0.0,70.000000,",Entire home/apt",
2,000ab7fb-dbac-414f-9080-88f265e2243e,2014-10-13,1.000000,2.000000,2.000000,2.0,US,,,Entire home/apt,
3,000b7ff7-47ea-48d3-9b09-1edd150acb02,2014-10-02,1.000000,,1.000000,1.0,FR,,,,
4,000c5621-b577-465d-be91-75254d75cc68,2014-10-14,12.500000,19.000000,1.500000,1.5,RU,0.0,74.000000,,
...,...,...,...,...,...,...,...,...,...,...,...
18600,fff2a379-46a1-4e09-9515-05d2cd26ceaa,2014-10-06,2.000000,3.000000,2.000000,2.0,GB,,,",Entire home/apt",
18601,fff3b53b-09fd-4681-9b79-4bc07360488f,2014-10-13,19.000000,1.000000,2.000000,2.0,ZA,0.0,177.000000,",Private room",
18602,fffa2e94-f426-405f-ab11-6f6602731ec8,2014-10-02,1.000000,,1.000000,1.0,KR,,,,
18603,fffea166-9432-43a7-8b1b-09d6f30c1c07,"2014-10-08, 2014-10-07, 2014-10-06",3.000000,12.250000,1.750000,2.0,AR,0.0,79.333333,,


In [60]:
grouped_searches.to_csv('searches_preprocessed.csv')