We'll look at 4 data files in this notebook to get some relevant information.

## 1. Import Dependencies

In [1]:
import pandas as pd
import math 

## 2. Data Exploratory

In [2]:
cities = pd.read_csv('./data/cities.csv')
providers = pd.read_csv('./data/providers.csv')
stations = pd.read_csv('./data/stations.csv')
ticket_data = pd.read_csv('./data/ticket_data.csv')

### 2.1 ***cities.csv***

In [3]:
cities.head()

Unnamed: 0,id,local_name,unique_name,latitude,longitude,population
0,5159,"Padua, Veneto, Italia",padua,45.406435,11.876761,209678.0
1,76,"Barcelona, Cataluña, España",barcelona,41.385064,2.173404,1611822.0
2,81,"Basel, Basel-Stadt, Schweiz",basel,47.593437,7.619812,
3,259,"Erlangen, Bayern, Deutschland",erlangen,49.589674,11.011961,105412.0
4,11979,"Balș, Olt, România",balș,44.353354,24.095672,


In [4]:
# we can see that we have 8040 cities (rows)
cities.shape

(8040, 6)

In [5]:
cities.isnull().sum()

id                0
local_name        0
unique_name       1
latitude          0
longitude         0
population     7671
dtype: int64

In [6]:
cities[cities.unique_name.isnull()]

Unnamed: 0,id,local_name,unique_name,latitude,longitude,population
6866,10244,", Hamburg, Deutschland",,39.613402,2.880431,


In [7]:
cities.unique_name.nunique()

8039

in this data file, we have cities served by tictactrip, we can do the link later by using cities id and perform different operations, We also have `latitude` and `longitude` properties, which allow us to determine the distance between the cities.

### 2.2 ***providers.csv***

In [8]:
providers.head()

Unnamed: 0,id,company_id,provider_id,name,fullname,has_wifi,has_plug,has_adjustable_seats,has_bicycle,transport_type
0,9,1,,ouibus,Ouibus,True,True,True,False,bus
1,10,2,,deinbus,Deinbus.de,False,False,False,False,bus
2,11,3,,infobus,Infobus,False,False,False,False,bus
3,12,4,,studentAgency,Student Agency,False,False,False,False,bus
4,13,5,,flixbus,Flixbus,True,False,False,False,bus


In [9]:
providers.shape

(227, 10)

In [10]:
# we can see that we have 14 different companies and 213 different sub-companies (providers)
providers.company_id.nunique(), providers.provider_id.nunique()

(14, 213)

In [11]:
# different company ids
providers.company_id.unique()

array([ 1,  2,  3,  4,  5,  6,  8,  9, 10, 20, 11, 30, 40, 12],
      dtype=int64)

In [12]:
# different providers ids
providers.provider_id.unique()

array([nan, '31', '33', '37', '41', '45', '47', '49', '57', '65', '77',
       '81', '83', '85', '111', '115', '129', '141', '149', '151', '159',
       '161', '163', '165', '167', '169', '171', '173', '179', '181',
       '185', '189', '195', '203', '205', '207', '209', '213', '215',
       '217', '219', '221', '223', '227', '229', '267', '269', '271',
       '285', '289', '291', '293', '295', '297', '299', '301', '303',
       '307', '315', '319', '321', '323', '325', '327', '329', '331',
       '333', '335', '337', '339', '341', '343', '345', '347', '349',
       '351', '353', '355', '357', '359', '361', '363', '365', '367',
       '369', '371', '373', '375', '377', '379', '381', '383', '385',
       '387', '389', '391', '393', '395', '397', '399', '401', '403',
       '405', '407', '409', '411', '413', '415', '417', '419', '421',
       '423', '425', '427', '429', '431', '433', '435', '437', '439',
       '441', '443', '445', '447', '449', '451', '453', '455', '457',
       '459', 

In [13]:
providers.isnull().sum()

id                       0
company_id               0
provider_id             14
name                     0
fullname                 0
has_wifi                 3
has_plug                 3
has_adjustable_seats     3
has_bicycle              3
transport_type           0
dtype: int64

### 2.3 ***stations.csv***

In [14]:
stations.head()

Unnamed: 0,id,unique_name,latitude,longitude
0,1,Aalen (Stuttgarter Straße),48.835296,10.092956
1,2,Aéroport Bordeaux-Mérignac,44.830226,-0.700883
2,3,Aéroport CDG,49.0099,2.55931
3,4,Aéroport de Berlin-Schönefeld,52.389446,13.520345
4,5,Aéroport de Dresden,51.123604,13.764737


In [15]:
stations.shape

(11035, 4)

In [16]:
stations.isnull().sum()

id             0
unique_name    0
latitude       0
longitude      0
dtype: int64

### 2.4 ***ticket_data.csv***

In [17]:
ticket_data.head()

Unnamed: 0,id,company,o_station,d_station,departure_ts,arrival_ts,price_in_cents,search_ts,middle_stations,other_companies,o_city,d_city
0,6795025,8385,,,2017-10-13 14:00:00+00,2017-10-13 20:10:00+00,4550,2017-10-01 00:13:31.327+00,,,611,542
1,6795026,9,63.0,1044.0,2017-10-13 13:05:00+00,2017-10-14 06:55:00+00,1450,2017-10-01 00:13:35.773+00,"{149,418}",{13},611,542
2,6795027,8377,5905.0,6495.0,2017-10-13 13:27:00+00,2017-10-14 21:24:00+00,7400,2017-10-01 00:13:40.212+00,"{798,798,6794,6246}","{8377,8376}",611,542
3,6795028,8377,5905.0,6495.0,2017-10-13 13:27:00+00,2017-10-14 11:02:00+00,13500,2017-10-01 00:13:40.213+00,"{798,798,6794,6246}","{8377,8376}",611,542
4,6795029,8381,5905.0,6495.0,2017-10-13 21:46:00+00,2017-10-14 19:32:00+00,7710,2017-10-01 00:13:40.213+00,"{5983,5983}",{8380},611,542


In [18]:
ticket_data.shape

(74168, 12)

In [19]:
ticket_data.isnull().sum()

id                     0
company                0
o_station          41441
d_station          41441
departure_ts           0
arrival_ts             0
price_in_cents         0
search_ts              0
middle_stations    41441
other_companies    41441
o_city                 0
d_city                 0
dtype: int64

In [20]:
ticket_data.company.unique()

array([8385,    9, 8377, 8381,   13, 8371, 8376, 8380, 8372, 8387,   10,
       8379, 8384, 8382,  216,   71,  173,  192,   33,   12], dtype=int64)

## 3. Feature extraction

### 3.1 merge ***ticket_data.csv*** and ***providers.csv*** 

Let's merge the ticket_data dataframe and the providers dataframe

In [21]:
# before merging the two dataframes, we'll remove null values from provider_id and convert the column to int type

# first we'll convert to numeric values, we set errors to 'coerce' to force non numerical values to be set as NaN
providers['provider_id'] = pd.to_numeric(providers['provider_id'], errors='coerce')

# then, we'll drop the NaN values
providers.dropna(subset=['provider_id'], inplace=True)

# Finally, we convert the column to integer column
providers['provider_id'] = providers['provider_id'].astype('int')

# check that there is no longer null values 
providers['provider_id'].isnull().sum()

0

In [22]:
ticket_data_with_providers = pd.merge(ticket_data, providers, left_on='company',right_on='provider_id')

In [23]:
ticket_data_with_providers.head()

Unnamed: 0,id_x,company,o_station,d_station,departure_ts,arrival_ts,price_in_cents,search_ts,middle_stations,other_companies,...,id_y,company_id,provider_id,name,fullname,has_wifi,has_plug,has_adjustable_seats,has_bicycle,transport_type
0,6795026,9,63.0,1044.0,2017-10-13 13:05:00+00,2017-10-14 06:55:00+00,1450,2017-10-01 00:13:35.773+00,"{149,418}",{13},...,8383,10,9,corail,Intercités,False,False,True,False,train
1,6795076,9,10144.0,396.0,2017-10-06 10:05:00+00,2017-10-06 16:40:00+00,1990,2017-10-01 01:04:12.45+00,"{932,932}",{13},...,8383,10,9,corail,Intercités,False,False,True,False,train
2,6795078,9,10144.0,396.0,2017-10-06 11:55:00+00,2017-10-07 18:45:00+00,1800,2017-10-01 01:04:12.451+00,"{932,932}",{13},...,8383,10,9,corail,Intercités,False,False,True,False,train
3,6795079,9,3.0,396.0,2017-10-06 12:30:00+00,2017-10-06 16:40:00+00,1590,2017-10-01 01:04:12.451+00,"{406,406}",{13},...,8383,10,9,corail,Intercités,False,False,True,False,train
4,6795081,9,10144.0,396.0,2017-10-06 17:40:00+00,2017-10-07 18:45:00+00,1100,2017-10-01 01:04:12.451+00,"{406,406}",{13},...,8383,10,9,corail,Intercités,False,False,True,False,train


In [24]:
# drop unnecessary columns
ticket_data_with_providers.drop(columns=['id_x','id_y','company'], axis=1, inplace=True)

In [25]:
ticket_data_with_providers.shape

(3564, 19)

In [26]:
ticket_data_with_providers.head()

Unnamed: 0,o_station,d_station,departure_ts,arrival_ts,price_in_cents,search_ts,middle_stations,other_companies,o_city,d_city,company_id,provider_id,name,fullname,has_wifi,has_plug,has_adjustable_seats,has_bicycle,transport_type
0,63.0,1044.0,2017-10-13 13:05:00+00,2017-10-14 06:55:00+00,1450,2017-10-01 00:13:35.773+00,"{149,418}",{13},611,542,10,9,corail,Intercités,False,False,True,False,train
1,10144.0,396.0,2017-10-06 10:05:00+00,2017-10-06 16:40:00+00,1990,2017-10-01 01:04:12.45+00,"{932,932}",{13},628,453,10,9,corail,Intercités,False,False,True,False,train
2,10144.0,396.0,2017-10-06 11:55:00+00,2017-10-07 18:45:00+00,1800,2017-10-01 01:04:12.451+00,"{932,932}",{13},628,453,10,9,corail,Intercités,False,False,True,False,train
3,3.0,396.0,2017-10-06 12:30:00+00,2017-10-06 16:40:00+00,1590,2017-10-01 01:04:12.451+00,"{406,406}",{13},628,453,10,9,corail,Intercités,False,False,True,False,train
4,10144.0,396.0,2017-10-06 17:40:00+00,2017-10-07 18:45:00+00,1100,2017-10-01 01:04:12.451+00,"{406,406}",{13},628,453,10,9,corail,Intercités,False,False,True,False,train


In [27]:
ticket_data_with_providers.isnull().sum()

o_station               0
d_station               0
departure_ts            0
arrival_ts              0
price_in_cents          0
search_ts               0
middle_stations         0
other_companies         0
o_city                  0
d_city                  0
company_id              0
provider_id             0
name                    0
fullname                0
has_wifi                0
has_plug                0
has_adjustable_seats    0
has_bicycle             0
transport_type          0
dtype: int64

In [28]:
# We see that we've two values for transport_type column : train or bus
ticket_data_with_providers.transport_type.unique()

array(['train', 'bus'], dtype=object)

### 3.2 Create the travel duration attribute  

In [29]:
# convert departure and arrival date columns to datetime type
ticket_data_with_providers['departure_ts'] = pd.to_datetime(ticket_data_with_providers['departure_ts'])
ticket_data_with_providers['arrival_ts'] = pd.to_datetime(ticket_data_with_providers['arrival_ts'])

In [30]:
# now that we have the departure and arrival dates we can calculate the travel time (in minutes)
ticket_data_with_providers['duration_min'] = (ticket_data_with_providers['arrival_ts'] - ticket_data_with_providers['departure_ts'] ) / pd.Timedelta(minutes=1)

### 3.3 merge ***ticket_data_with_providers*** and ***cities***

In [31]:
# Let's start with the departure city
ticket_data_with_providers_and_cities = pd.merge(ticket_data_with_providers, cities, left_on='o_city', right_on='id')

# drop this unnecessary columns
ticket_data_with_providers_and_cities.drop(['id','o_city','local_name','population'], axis=1, inplace=True)

# rename the columns with meaningful names
ticket_data_with_providers_and_cities.rename(
    columns={
        'unique_name':'departure_city',
        'latitude':'departure_latitude',
        'longitude':'departure_longitude'
    }, 
    inplace=True)

In [32]:
ticket_data_with_providers_and_cities.head()

Unnamed: 0,o_station,d_station,departure_ts,arrival_ts,price_in_cents,search_ts,middle_stations,other_companies,d_city,company_id,...,fullname,has_wifi,has_plug,has_adjustable_seats,has_bicycle,transport_type,duration_min,departure_city,departure_latitude,departure_longitude
0,63.0,1044.0,2017-10-13 13:05:00+00:00,2017-10-14 06:55:00+00:00,1450,2017-10-01 00:13:35.773+00,"{149,418}",{13},542,10,...,Intercités,False,False,True,False,train,1070.0,orleans,47.907018,1.90627
1,63.0,767.0,2017-10-06 13:05:00+00:00,2017-10-06 20:15:00+00:00,1400,2017-10-02 23:27:20.333+00,"{149,418}",{13},628,10,...,Intercités,False,False,True,False,train,430.0,orleans,47.907018,1.90627
2,63.0,767.0,2017-10-19 13:05:00+00:00,2017-10-19 20:15:00+00:00,1400,2017-10-03 07:50:12.901+00,"{149,418}",{13},628,10,...,Intercités,False,False,True,False,train,430.0,orleans,47.907018,1.90627
3,63.0,279.0,2017-10-19 21:10:00+00:00,2017-10-20 05:05:00+00:00,1700,2017-10-03 07:50:12.901+00,"{863,863}",{9},628,10,...,Intercités,False,False,True,False,train,475.0,orleans,47.907018,1.90627
4,63.0,279.0,2017-10-19 21:10:00+00:00,2017-10-20 09:55:00+00:00,1400,2017-10-03 07:50:12.901+00,"{863,863}",{13},628,10,...,Intercités,False,False,True,False,train,765.0,orleans,47.907018,1.90627


In [33]:
# Let's do the same processing with the arrival city
# Let's start with the departure city
ticket_data_with_providers_and_cities = pd.merge(ticket_data_with_providers_and_cities, cities, left_on='d_city', right_on='id')

# drop this unnecessary columns
ticket_data_with_providers_and_cities.drop(['id','d_city','local_name','population'], axis=1, inplace=True)

# rename the columns with meaningful names
ticket_data_with_providers_and_cities.rename(
    columns={
        'unique_name':'arrival_city',
        'latitude':'arrival_latitude',
        'longitude':'arrival_longitude'
    }, 
    inplace=True)

In [34]:
ticket_data_with_providers_and_cities.head()

Unnamed: 0,o_station,d_station,departure_ts,arrival_ts,price_in_cents,search_ts,middle_stations,other_companies,company_id,provider_id,...,has_adjustable_seats,has_bicycle,transport_type,duration_min,departure_city,departure_latitude,departure_longitude,arrival_city,arrival_latitude,arrival_longitude
0,63.0,1044.0,2017-10-13 13:05:00+00:00,2017-10-14 06:55:00+00:00,1450,2017-10-01 00:13:35.773+00,"{149,418}",{13},10,9,...,True,False,train,1070.0,orleans,47.907018,1.90627,montpellier,43.604452,3.918318
1,279.0,1044.0,2017-10-06 11:10:00+00:00,2017-10-07 06:55:00+00:00,1850,2017-10-02 09:14:47.996+00,"{149,418}",{13},10,9,...,True,False,train,1185.0,paris,48.847702,2.352311,montpellier,43.604452,3.918318
2,848.0,1044.0,2017-10-06 23:40:00+00:00,2017-10-07 20:05:00+00:00,2800,2017-10-02 09:14:47.996+00,"{433,9751}",{13},10,9,...,True,False,train,1225.0,paris,48.847702,2.352311,montpellier,43.604452,3.918318
3,279.0,1044.0,2017-10-06 11:10:00+00:00,2017-10-07 06:55:00+00:00,1850,2017-10-02 17:53:20.699+00,"{149,418}",{13},10,9,...,True,False,train,1185.0,paris,48.847702,2.352311,montpellier,43.604452,3.918318
4,848.0,1044.0,2017-10-06 23:40:00+00:00,2017-10-07 20:05:00+00:00,3200,2017-10-02 17:53:20.699+00,"{433,9751}",{13},10,9,...,True,False,train,1225.0,paris,48.847702,2.352311,montpellier,43.604452,3.918318


### 3.4 Create the travel distance attribute

We'll use `latitude` and `longitude` attributes to create the distance column

In [35]:
# Convert the latitudes and longitudes to Radian
ticket_data_with_providers_and_cities['departure_latitude'] = (ticket_data_with_providers_and_cities['departure_latitude'] * math.pi)/180
ticket_data_with_providers_and_cities['departure_longitude'] = (ticket_data_with_providers_and_cities['departure_longitude'] * math.pi)/180
ticket_data_with_providers_and_cities['arrival_latitude'] = (ticket_data_with_providers_and_cities['arrival_latitude'] * math.pi)/180
ticket_data_with_providers_and_cities['arrival_longitude'] = (ticket_data_with_providers_and_cities['arrival_longitude'] * math.pi)/180

# Calculate the latitude and longitude distance
ticket_data_with_providers_and_cities['diff_latitude'] = ticket_data_with_providers_and_cities['arrival_latitude'] - ticket_data_with_providers_and_cities['departure_latitude']
ticket_data_with_providers_and_cities['diff_longitude'] = ticket_data_with_providers_and_cities['arrival_longitude'] - ticket_data_with_providers_and_cities['departure_longitude']

# compute the distance in KM
distance = []
for i in range(len(ticket_data_with_providers_and_cities)):
    
    diff_latitude = ticket_data_with_providers_and_cities['diff_latitude'][i]
    diff_longitude = ticket_data_with_providers_and_cities['diff_longitude'][i]
    departure_latitude = ticket_data_with_providers_and_cities['departure_latitude'][i]
    arrival_latitude = ticket_data_with_providers_and_cities['arrival_latitude'][i]
    
    intermediate_value = math.sin(diff_latitude / 2)**2 + math.cos(departure_latitude) * math.cos(arrival_latitude) * math.sin( diff_longitude / 2)**2
    distance.append(6371 * 2 * math.asin(math.sqrt(intermediate_value)))
# create the distance column
ticket_data_with_providers_and_cities['distance_km'] = distance

In [36]:
ticket_data_with_providers_and_cities.head()

Unnamed: 0,o_station,d_station,departure_ts,arrival_ts,price_in_cents,search_ts,middle_stations,other_companies,company_id,provider_id,...,duration_min,departure_city,departure_latitude,departure_longitude,arrival_city,arrival_latitude,arrival_longitude,diff_latitude,diff_longitude,distance_km
0,63.0,1044.0,2017-10-13 13:05:00+00:00,2017-10-14 06:55:00+00:00,1450,2017-10-01 00:13:35.773+00,"{149,418}",{13},10,9,...,1070.0,orleans,0.836135,0.033271,montpellier,0.761041,0.068388,-0.075094,0.035117,503.197167
1,279.0,1044.0,2017-10-06 11:10:00+00:00,2017-10-07 06:55:00+00:00,1850,2017-10-02 09:14:47.996+00,"{149,418}",{13},10,9,...,1185.0,paris,0.852553,0.041056,montpellier,0.761041,0.068388,-0.091512,0.027332,595.301747
2,848.0,1044.0,2017-10-06 23:40:00+00:00,2017-10-07 20:05:00+00:00,2800,2017-10-02 09:14:47.996+00,"{433,9751}",{13},10,9,...,1225.0,paris,0.852553,0.041056,montpellier,0.761041,0.068388,-0.091512,0.027332,595.301747
3,279.0,1044.0,2017-10-06 11:10:00+00:00,2017-10-07 06:55:00+00:00,1850,2017-10-02 17:53:20.699+00,"{149,418}",{13},10,9,...,1185.0,paris,0.852553,0.041056,montpellier,0.761041,0.068388,-0.091512,0.027332,595.301747
4,848.0,1044.0,2017-10-06 23:40:00+00:00,2017-10-07 20:05:00+00:00,3200,2017-10-02 17:53:20.699+00,"{433,9751}",{13},10,9,...,1225.0,paris,0.852553,0.041056,montpellier,0.761041,0.068388,-0.091512,0.027332,595.301747


### 3.5 The mean/max/min price and duration travel between cities

Let's group and aggregate the data to find the min, max and mean price between cities

In [71]:
grouped_tickets_by_cities = ticket_data_with_providers_and_cities.groupby(['departure_city', 'arrival_city','transport_type'])

In [72]:
aggregated_data_price = grouped_tickets_by_cities.aggregate({'price_in_cents': ['mean','max','min']})
aggregated_data_price

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,price_in_cents,price_in_cents,price_in_cents
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,max,min
departure_city,arrival_city,transport_type,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
aix-en-provence,lille,train,4890.000000,4890,4890
aix-en-provence,paris,train,2292.500000,2690,1600
aix-en-provence,toulon,train,1890.000000,1890,1890
amiens,aix-en-provence,train,3895.000000,4290,3500
amiens,bordeaux,train,2845.000000,4090,2090
...,...,...,...,...,...
valence,paris,train,2780.576923,4090,1400
valence,rouen,train,3840.000000,4090,3590
valence,saint-dizier,train,3095.000000,3100,3090
vannes,bruxelles,train,3400.000000,3400,3400


In [73]:
aggregated_data_duration = grouped_tickets_by_cities.aggregate({'duration_min': ['mean','max','min']})
aggregated_data_duration

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,duration_min,duration_min,duration_min
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,max,min
departure_city,arrival_city,transport_type,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
aix-en-provence,lille,train,1230.000000,1230.0,1230.0
aix-en-provence,paris,train,683.750000,750.0,660.0
aix-en-provence,toulon,train,860.000000,860.0,860.0
amiens,aix-en-provence,train,1082.500000,1215.0,950.0
amiens,bordeaux,train,1050.000000,1490.0,755.0
...,...,...,...,...,...
valence,paris,train,595.769231,1260.0,475.0
valence,rouen,train,805.000000,920.0,690.0
valence,saint-dizier,train,747.500000,760.0,735.0
vannes,bruxelles,train,1750.000000,1750.0,1750.0


### 3.6 price and duration time by distance 

In [74]:
def get_price_and_duration_statistics_by_distance(data, min_distance, max_distance):
    
    print(f"Between {min_distance} and {max_distance}KM")
    
    # price
    print("price :")
    print(data[(data['distance_km']>=min_distance)&(data['distance_km']<=max_distance)]["price_in_cents"].describe())
    
    # duration
    print("\nduration :")
    print(data[data['distance_km']<=distance]["duration_min"].describe())

In [65]:
get_price_and_duration_statistics_by_distance(ticket_data_with_providers_and_cities, 0, 200)

Between 0 and 200KM
price :
count     335.000000
mean     2093.880597
std       897.496443
min       850.000000
25%      1400.000000
50%      1890.000000
75%      2690.000000
max      5890.000000
Name: price_in_cents, dtype: float64

duration :
count     3564.000000
mean       855.669753
std        527.237804
min        140.000000
25%        520.000000
50%        705.000000
75%       1075.000000
max      12105.000000
Name: duration_min, dtype: float64


In [66]:
get_price_and_duration_statistics_by_distance(ticket_data_with_providers_and_cities, 200, 800)

Between 200 and 800KM
price :
count     3013.000000
mean      3273.816130
std       1365.397929
min       1000.000000
25%       2300.000000
50%       3090.000000
75%       3990.000000
max      14596.000000
Name: price_in_cents, dtype: float64

duration :
count     3564.000000
mean       855.669753
std        527.237804
min        140.000000
25%        520.000000
50%        705.000000
75%       1075.000000
max      12105.000000
Name: duration_min, dtype: float64


In [67]:
get_price_and_duration_statistics_by_distance(ticket_data_with_providers_and_cities, 800, 2000)

Between 800 and 2000KM
price :
count      216.000000
mean      6165.069444
std       2310.751286
min       2290.000000
25%       4175.000000
50%       5800.000000
75%       7700.000000
max      16395.000000
Name: price_in_cents, dtype: float64

duration :
count     3564.000000
mean       855.669753
std        527.237804
min        140.000000
25%        520.000000
50%        705.000000
75%       1075.000000
max      12105.000000
Name: duration_min, dtype: float64
