# Introduction

In this notebook I create a way of using the aggregated model in the explaianability dashboard against targets. Basically for each day trhough the year it creates different aggregation for the day agains the previous ones, and then, it creates a prediction with the targets model.

### Instalations and imports

In [2]:
!pip install plotly boto3==1.19.12 s3fs pandas==2.1.2 

[31mERROR: Ignored the following versions that require a different python version: 2.1.0 Requires-Python >=3.9; 2.1.0rc0 Requires-Python >=3.9; 2.1.1 Requires-Python >=3.9; 2.1.2 Requires-Python >=3.9; 2.1.3 Requires-Python >=3.9; 2.1.4 Requires-Python >=3.9; 2.2.0 Requires-Python >=3.9; 2.2.0rc0 Requires-Python >=3.9; 2.2.1 Requires-Python >=3.9; 2.2.2 Requires-Python >=3.9[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement pandas==2.1.2 (from versions: 0.1, 0.2, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.4.3, 0.5.0, 0.6.0, 0.6.1, 0.7.0, 0.7.1, 0.7.2, 0.7.3, 0.8.0, 0.8.1, 0.9.0, 0.9.1, 0.10.0, 0.10.1, 0.11.0, 0.12.0, 0.13.0, 0.13.1, 0.14.0, 0.14.1, 0.15.0, 0.15.1, 0.15.2, 0.16.0, 0.16.1, 0.16.2, 0.17.0, 0.17.1, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.19.2, 0.20.0, 0.20.1, 0.20.2, 0.20.3, 0.21.0, 0.21.1, 0.22.0, 0.23.0, 0.23.1, 0.23.2, 0.23.3, 0.23.4, 0.24.0, 0.24.1, 0.24.2, 0.25.0, 0.25.1, 0.25.2, 0.25.3, 1.0.0, 1.0.1, 1.0.2, 1.0.3, 1.0.4, 1.0.5, 1.1.0, 1.1.1, 1.1.2, 1.1.3

In [3]:
# General
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import os
import numpy as np
import xlsxwriter
import datetime
import boto3
import s3fs

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV

# Plots
import matplotlib.pyplot as plt
import seaborn as sns

#Warnings
import warnings
warnings.filterwarnings("ignore")

# 1. Aggregation logic

In [4]:
# Use own bucket and prefix
S3_BUCKET_NPS = 'iberia-data-lake' # In this case: iberia-data-lake
S3_BUCKET_NPS_PREFIX = 'customer/nps_explainability_model' # In this case: sagemaker/sagemaker-template

S3_BUCKET_LF = 'ibdata-prod-ew1-s3-customer'
S3_BUCKET_LF_PREFIX = 'customer/load_factor_to_s3_nps_model'

S3_PATH_READ_NPS = 'customer/nps_surveys/export_historic'
S3_PATH_READ_LF = "customer/load_factor_to_s3_nps_model"

insert_date_ci='2024-03-30'
today_date_str='2024-03-30'

## Read data

In [5]:
# READ NPS DATA SOURCE
# Read df_nps_surveys
s3_resource = boto3.resource("s3")

# READ TODAY DATA (HISTORIC NPS)
today_nps_surveys_prefix = f'{S3_PATH_READ_NPS}/insert_date_ci={today_date_str}/'
s3_keys = [item.key for item in s3_resource.Bucket(S3_BUCKET_NPS).objects.filter(Prefix=today_nps_surveys_prefix)]
preprocess_paths = [f"s3://{S3_BUCKET_NPS}/{key}" for key in s3_keys]

df_nps_historic = pd.DataFrame()
for file in preprocess_paths:
    df = pd.read_csv(file)
    df_nps_historic = pd.concat([df_nps_historic, df], axis=0)
df_nps_historic = df_nps_historic.reset_index(drop=True)

In [6]:
# READ LF DATA SOURCE
# lf_dir = 's3://ibdata-prod-ew1-s3-customer/customer/load_factor_to_s3_nps_model/'    
load_factor_prefix = f's3://{S3_BUCKET_LF}/{S3_PATH_READ_LF}/'

# Assume rol for prod
sts_client = boto3.client('sts')
assumed_role = sts_client.assume_role(
    RoleArn="arn:aws:iam::320714865578:role/ibdata-prod-role-assume-customer-services-from-ibdata-aip-prod",
    RoleSessionName="test"
)
credentials = assumed_role['Credentials']
fs = s3fs.S3FileSystem(key=credentials['AccessKeyId'], secret=credentials['SecretAccessKey'], token=credentials['SessionToken'])

# Listall the files
load_factor_list = fs.ls(load_factor_prefix)
    
print("userlog: Read historic load_factor data path %s.", load_factor_prefix)
dataframes = []
for file_path in load_factor_list:
    try:
        file_info = fs.info(file_path)
        if file_info['Size'] == 0:
            print(f"Skipping empty file: {file_path}")
            continue

        with fs.open(f's3://{file_path}') as f:
            df = pd.read_csv(f)
            dataframes.append(df)
    except pd.errors.EmptyDataError:
        print(f"Caught EmptyDataError for file: {file_path}, skipping...")
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")

if dataframes:
    df_lf_historic = pd.concat(dataframes, ignore_index=True)
else:
    df_lf_historic = pd.DataFrame()

userlog: Read historic load_factor data path %s. s3://ibdata-prod-ew1-s3-customer/customer/load_factor_to_s3_nps_model/


## A little preprocess

In [7]:
condition_1 = (df_nps_historic['operating_airline_code'].isin(['IB', 'YW']))
condition_2 = ((df_nps_historic['invitegroup_ib'] != 3) | (df_nps_historic['invitegroup_ib'].isnull()))
condition_3 = (df_nps_historic['invitegroup'] == 2)

df_nps_historic = df_nps_historic.loc[condition_1 & (condition_2 & condition_3)]

df_lf_historic = df_lf_historic.loc[(df_lf_historic['operating_carrier'].isin(['IB', 'YW']))]

In [8]:
datetime_features = ['date_flight_local', 'scheduled_departure_time_local', 'scheduled_arrival_time_local', 'real_departure_time_local',
                     'real_arrival_time_local', 'started']
columns_to_cross_kpis=['cabin_in_surveyed_flight','haul']
columns_ext = ['tier_level', 'language_code', 'seat_no', 'volume_of_bags', 'number_of_child_in_the_booking', 'number_of_infant_in_the_booking',
              'number_of_people_in_the_booking', 'country_code', 'customer_journey_origin', 'customer_journey_destination', 'number_of_flights_in_journey',
              'order_of_flight_in_journey', 'marketing_airline_code', 'overall_haul', 'weight_category', 'ff_number', 'ticket_num', 'operating_airline_code',
               'nps_category', 'nps_100', 'group_age_survey', 'gender'] # invite_group

#'bkg_100_booking', 
touchpoints = ['bkg_200_journey_preparation', 'pfl_100_checkin', 'pfl_200_security', 'pfl_300_lounge',
               'pfl_500_boarding', 'ifl_300_cabin', 'ifl_200_flight_crew_annoucements', 'ifl_600_wifi', 'ifl_500_ife',
               'ifl_400_food_drink', 'ifl_100_cabin_crew', 'arr_100_arrivals', 'con_100_connections', 'pun_100_punctuality',
               'loy_200_loyalty_programme', 'inm_400_issues_response', 'img_310_ease_contact_phone']

# ,'img_320_ease_contact_ibplus_mail'
survey_fields = ['cla_600_wifi_t_f', 'tvl_journey_reason']

df_nps_historic['date_flight_local'] = pd.to_datetime(df_nps_historic['date_flight_local'])
df_lf_historic['flight_date_local'] = pd.to_datetime(df_lf_historic['flight_date_local'])

In [9]:
df_nps_historic = df_nps_historic[df_nps_historic['date_flight_local'].dt.year >= 2019]
df_nps_historic = df_nps_historic[~df_nps_historic['date_flight_local'].dt.year.isin([2020, 2021])]

df_lf_historic = df_lf_historic[~df_lf_historic['flight_date_local'].dt.year.isin([2020, 2021])]

In [10]:
delay_features = ['real_departure_time_local', 'scheduled_departure_time_local']
for feat in delay_features:
    df_nps_historic[feat] = pd.to_datetime(df_nps_historic[feat], format="%Y-%m-%d %H:%M:%S", errors = 'coerce')
            
df_nps_historic['delay_departure'] = (df_nps_historic['real_departure_time_local'] - df_nps_historic['scheduled_departure_time_local']).dt.total_seconds()/60

In [11]:
df_nps_historic['haul'] = df_nps_historic['haul'].replace('MH', 'SH')
#df_nps_historic['cabin_in_surveyed_flight'] = df_nps_historic['cabin_in_surveyed_flight'].replace('Premium Economy', 'Economy')# Load Factor
df_lf_historic['load_factor_business'] = df_lf_historic['pax_business'] / df_lf_historic['capacity_business']
df_lf_historic['load_factor_premium_ec'] = df_lf_historic['pax_premium_ec'] / df_lf_historic['capacity_premium_ec']
df_lf_historic['load_factor_economy'] = df_lf_historic['pax_economy'] / df_lf_historic['capacity_economy']

In [12]:
 # OTP
df_nps_historic['otp15_takeoff'] = (df_nps_historic['delay_departure'] > 15).astype(int)

# Promoter and Detractor columns
df_nps_historic["promoter_binary"] = df_nps_historic["nps_category"].apply(lambda x: 1 if x == "Promoter" else 0)
df_nps_historic["detractor_binary"] = df_nps_historic["nps_category"].apply(lambda x: 1 if x == "Detractor" else 0)

In [13]:
# Load Factor
df_lf_historic['load_factor_business'] = df_lf_historic['pax_business'] / df_lf_historic['capacity_business']
df_lf_historic['load_factor_premium_ec'] = df_lf_historic['pax_premium_ec'] / df_lf_historic['capacity_premium_ec']
df_lf_historic['load_factor_economy'] = df_lf_historic['pax_economy'] / df_lf_historic['capacity_economy']

In [14]:
cabin_to_load_factor_column = {
    'Economy': 'load_factor_economy',
    'Business': 'load_factor_business',
    'Premium Economy': 'load_factor_premium_ec'
}

# HISTORIC
df_lf_historic.columns = ['date_flight_local' if x=='flight_date_local' else 
                                'operating_airline_code' if x=='operating_carrier' else
                                'surveyed_flight_number' if x=='op_flight_num' else
                                x for x in df_lf_historic.columns]

df_historic = pd.merge(df_nps_historic, df_lf_historic, 
                    how='left', 
                    on=['date_flight_local', 'operating_airline_code', 'surveyed_flight_number', 'haul'])

df_historic['load_factor'] = df_historic.apply(lambda row: row[cabin_to_load_factor_column[row['cabin_in_surveyed_flight']]], axis=1)

In [15]:
df_historic['cabin_in_surveyed_flight']

0          Economy
1         Business
2          Economy
3          Economy
4          Economy
            ...   
548435     Economy
548436     Economy
548437     Economy
548438     Economy
548439     Economy
Name: cabin_in_surveyed_flight, Length: 548440, dtype: object

In [16]:
df_nps_historic['delay_departure']

1            20.000
2            -5.000
3         -1419.000
4            10.000
6             3.000
             ...   
1013454      30.000
1013455      10.000
1013456       0.000
1013457       6.000
1013458       3.000
Name: delay_departure, Length: 544114, dtype: float64

In [17]:
df_historic['cabin_in_surveyed_flight'].unique()

array(['Economy', 'Business', 'Premium Economy'], dtype=object)

In [18]:
df_nps_historic['real_departure_time_local']

1         2023-01-04 13:55:00
2         2023-01-29 16:45:00
3         2019-07-23 00:16:00
4         2019-09-03 12:20:00
6         2019-02-16 19:53:00
                  ...        
1013454   2024-03-04 17:15:00
1013455   2024-03-04 16:20:00
1013456   2024-03-04 22:10:00
1013457   2024-03-04 15:56:00
1013458   2024-03-04 08:48:00
Name: real_departure_time_local, Length: 544114, dtype: datetime64[ns]

In [19]:
df_historic

Unnamed: 0,respondent_id,sample_id,surveyed_flight_number,date_flight_local,scheduled_departure_time_local,scheduled_arrival_time_local,tier_level,language_code,aircraft_registration_number,seat_no,volume_of_bags,number_of_child_in_the_booking,number_of_infant_in_the_booking,number_of_people_in_the_booking,infinita_customers_identifer,flag_of_ib_singular_customers,country_code,list_of_options_for_booking_channel,list_of_options_for_checkin_channel,lounge_used_at_origin_airport,customer_journey_origin,customer_journey_destination,number_of_flights_in_journey,order_of_flight_in_journey,fleet_in_surveyed_flight,marketing_airline_code,date_of_flight_gmt,scheduled_departure_time_gmt,real_departure_time_local,real_departure_time_gmt,scheduled_arrival_time_gmt,real_arrival_time_local,real_arrival_time_gmt,segment,route,overall_haul,purser,invitegroup_ib,weight_category,weekly_weight,monthly_weight,pnr_show,ff_number,id_golden_record,ticket_num,started,time_spent_hrminsec,customer_email_show,origin_of_surveyed_flight,destination_of_surveyed_flight,operating_airline_code,cabin_in_surveyed_flight,haul,first_name_show,last_name_show,second_last_name_show,nps_category,nps_100,survey_type,invitegroup,group_age_survey,pun_100_punctuality,inm_200_issues_prior_checkin,inm_200_issues_prior_ticket_change,inm_200_issues_prior_schedule_change,inm_200_issues_prior_contact_center,inm_200_issues_prior_special_serv,inm_200_issues_prior_special_req,inm_200_issues_prior_avios,inm_200_issues_prior_voucher,inm_206_issues_checkin_long_queues,inm_206_issues_checkin_wrong_info,inm_206_issues_checkin_additional_fees,inm_206_issues_checkin_overbooking,inm_206_issues_checkin_downgrade,inm_206_issues_checkin_staff,inm_206_issues_checkin_social_distance,inm_206_issues_checkin_face_masks,inm_206_issues_checkin_documentation,inm_206_issues_checkin_other,inm_207_issues_lounge_denied,inm_207_issues_lounge_overcrowded,inm_207_issues_lounge_cleanliness,inm_207_issues_lounge_wifi,inm_207_issues_lounge_staff,inm_207_issues_lounge_food_drink,inm_207_issues_lounge_face_masks,inm_207_issues_lounge_other,inm_208_issues_security_leave_sth,inm_208_issues_security_long_queues,inm_208_issues_security_staff,inm_208_issues_security_social_distance,inm_208_issues_security_face_masks,inm_208_issues_security_other,inm_209_issues_boarding_unclear,inm_209_issues_boarding_gate_changed,inm_209_issues_boarding_lack_space,inm_209_issues_boarding_disorganised,inm_209_issues_boarding_staff,inm_209_issues_boarding_social_distance,inm_209_issues_boarding_face_masks,inm_209_issues_boarding_documentation,inm_209_issues_boarding_other,inm_220_issues_timing_cancelled,inm_220_issues_timing_dep_delay,inm_220_issues_timing_arr_delay,inm_220_issues_timing_missed,inm_230_issues_onboard_staff,inm_230_issues_onboard_ife,inm_230_issues_onboard_overcrowding,inm_230_issues_onboard_face_masks,inm_235_issues_onboard_comfort_damaged,inm_235_issues_onboard_comfort_space,inm_235_issues_onboard_comfort_temperature,inm_235_issues_onboard_comfort_cleanliness,inm_235_issues_onboard_comfort_washrooms,inm_235_issues_onboard_comfort_other,inm_236_issues_meal_availability,inm_236_issues_meal_portions,inm_236_issues_meal_quality,inm_236_issues_meal_special,inm_236_issues_meal_other,inm_240_issues_baggage_lost,inm_240_issues_baggage_delayed,inm_240_issues_baggage_demaged,inm_240_issues_baggage_staff,inm_240_issues_baggage_hand,inm_240_issues_baggage_other,inm_250_issues_arrival_slow,inm_250_issues_arrival_unclear,inm_250_issues_arrival_aditional_request,inm_250_issues_arrival_staff,inm_250_issues_arrival_immigration_queues,inm_250_issues_arrival_immigration_passport,inm_250_issues_arrival_immigration_other,inm_255_issues_connecting_missed,inm_255_issues_connecting_staff,inm_255_issues_connecting_baggage,inm_255_issues_connecting_other,bkg_100_booking,bkg_200_journey_preparation,inm_400_issues_response,pfl_100_checkin,pfl_200_security,pfl_300_lounge,pfl_500_boarding,ifl_100_cabin_crew,ifl_200_flight_crew_annoucements,ifl_300_cabin,ifl_400_food_drink,ifl_600_wifi,arr_100_arrivals,con_100_connections,img_310_ease_contact_phone,img_320_ease_contact_ibplus_mail,ifl_500_ife,loy_200_loyalty_programme,inm_050_issues_t_f,dig_400_mobile_app,cov_300_appropiate_changes_to_reassure,hot_topic_verbatim,iag_ht_oe_t_scrubbed,cla_800_did_you_use_wifi_on_board,status,nps_all_t,inm_220_issues_timing_staff,inm_220_issues_timing_other,cla_200_check_in_methodiberias_mobile_app,cla_200_check_in_methodiberias_website,cla_200_check_in_methodiberias_checkin_desk_at_the_airport,cla_200_check_in_methodselfservice_kiosk_at_the_airport,cla_200_check_in_methodother,inm_100_journey_issuean_issue_prior_to_travelling,inm_100_journey_issuean_issue_at_your_departure_airport,inm_100_journey_issuea_disruption_to_your_flight_timing,inm_100_journey_issuean_issue_onboard,inm_100_journey_issuean_issue_with_your_baggage,inm_100_journey_issuean_issue_disembarking_or_at_your_arrival_airport,inm_100_journey_issuean_issue_connecting_tofrom_another_flight,inm_100_issues_other,inm_100_oth_t,inm_200_issues_prior_other,inm_200_oth_t,inm_205_issues_dep_airport_checkin,inm_205_issues_dep_airport_lounge,inm_205_issues_dep_airport_security,inm_205_issues_dep_airport_boarding,inm_205_issues_dep_airport_assistance,inm_205_issues_dep_airport_other,inm_205_oth_t,inm_206_oth_t,inm_207_oth_t,inm_208_oth_t,inm_209_oth_t,inm_220_oth_t,inm_230_issues_onboard_seat_selected,inm_230_issues_onboard_not_together,inm_230_issues_onboard_comfort,inm_230_issues_onboard_meal,inm_230_issues_onboard_left_sth,inm_230_issues_onboard_other,inm_230_oth_t,inm_235_oth_t,inm_236_oth_t,inm_240_issues_baggage_stolen,inm_240_oth_t,inm_250_oth_t,inm_255_oth_t,inm_301_how_managed_issue_representative,inm_301_how_managed_issue_call_centre,inm_301_how_managed_issue_website,inm_301_how_managed_issue_could_not,inm_301_how_managed_issue_email,inm_301_how_managed_issue_decided_not,inm_301_how_managed_issue_other,inm_301_oth_t,inm_305_issues_resolved_t_f,inm_500_issues_verbatim_translated,cla_100_booking_channel_survey,cla_400_lounge_t_f,cla_500_ife_t_f,cla_610_wifi_aware,cla_600_wifi_t_f,cla_600_wifi_other_verbatim_translated,cla_300_connection_from,cla_300_connection_to,cla_300_connection_no,img_430_solved_1st_time_social_net,cov_500_covid_verbatim,cov_500_covid_verbatim_translated,gender,res100_country_code_survey,bnd_011_looked_lowest_cost_1_5,tvl_journey_reason,tvloth_journey_reason_other_verbatim_tranlated,rea_choosing_reason,anom_anonimity,cla_450_fast_track_t_f,cla_900_boarding_how,cla_700_food_drink_provision,cla_120_food_drink_preordered,cla_711_meal_prepurchase,cla_711_meal_prepurchase_other_verbatim_translated,cla_710_food_drink_purchased,cla_710_food_drink_purchased_other,cla_712_get_preferred_food_y_n,cla_550_digital_press,cla_550_digital_press_other_verbatim_translated,usb_100_usb_use,usb_100_usb_use_other_verbatim_translated,cla_950_disembark_how,arr_400_arrival_luggage_collection,insert_date_ci,date_survey_completed,scheduled_arrival_date_local,iag_mod_702_logic,inm_200_issue_helpi_contacted_iberia_on_social_media,loy_100_ways_of_contactby_phone,loy_100_ways_of_contactvia_the_iberia_plus_email,loy_100_ways_of_contactby_whatsapp,loy_100_ways_of_contacton_social_networks_facebook_twitter_etc,loy_100_ways_of_contacti_contacted_iberia_using_other_channels_please_specify,loy_100_ways_of_contacti_did_not_contact_iberia,iag_loy_100_96_oth_t_scrubbed,loy_500_by_whatsapp,dem_700_travelling_withi_was_travelling_on_my_own,dem_700_travelling_withwith_babies_aged_under_2,dem_700_travelling_withwith_toddlers_aged_2_to_5,dem_700_travelling_withwith_children_aged_6_to_12,dem_700_travelling_withwith_teens_aged_13_to_17,dem_700_travelling_withwith_my_spouse_or_partner,dem_700_travelling_withother_adult_party_6_people_or_less,dem_700_travelling_withadult_partygroup_more_than_6_people,dem_700_travelling_withprefer_not_to_say,iag_dem_800_96_oth_t_scrubbed,perm_200_future_contact,iag_sel_000_continuesubmit,interaction_point,sel_100_module_selectionbooking_and_journey_preparation,sel_100_module_selectioncheckin,sel_100_module_selectionlounge_experience,sel_100_module_selectionboarding,sel_100_module_selectioncabin_crew,sel_100_module_selectioncabin_environment,sel_100_module_selectionfood_and_drink_on_board,sel_100_module_selectionin_flight_entertainment_and_wifi,sel_100_module_selectionin_flight_entertainment,sel_100_module_selectionwifi_service,sel_100_module_selectionarrival_experience,sel_100_module_selectionconnections_with_another_flight,sel_100_module_selectioniberia_plus_loyalty_program,iag_inm_101_96_oth_t_scrubbed,mod_102_pre_journeyease_of_booking_process_on_iberia_websitemobile_app,mod_102_pre_journeyclarity_of_information_and_conditions_during_booking_process_on_iberia_websitemobile_app,mod_102_pre_journeyease_of_managing_your_booking_on_iberia_websitemobile_app,mod_102_pre_journeyease_of_contact_with_iberia_by_phone,mod_102_pre_journeyhelpfulness_of_iberia_staff_by_phone,mod_102_pre_journeyease_of_contact_with_iberia_by_mail,mod_102_pre_journeyother_please_specify,iag_mod_102_96_oth_t_scrubbed,iag_mod_201_t_scrubbed,mod_203_checkinwaiting_time_at_airport_checkin_area,mod_203_checkinhelpfulness_of_staff_at_airport_checkin_area,mod_203_checkinsocial_distancing_during_checkin_at_the_airport,mod_203_checkinease_of_online_checkin_process,mod_203_checkinwebsiteapp_reliability_and_performance,mod_203_checkinease_of_use_of_kiosks_at_the_airport,mod_203_checkinother_please_specify,iag_mod_203_96_oth_t_scrubbed,iag_mod_301_t_scrubbed,mod_302_loungehelpfulness_of_staff_at_reception,mod_302_loungehelpfulness_of_staff_inside_the_lounge,mod_302_loungequality_of_the_food,mod_302_loungethe_variety_of_the_food_on_offer,mod_302_loungethe_variety_of_drinks_on_offer,mod_302_loungeseat_availability,mod_302_loungewifi,mod_302_loungeother_please_specify,iag_mod_302_96_oth_t_scrubbed,iag_mod_401_t_scrubbed,mod_403_boardingthe_organisation_of_the_boarding_process,mod_403_boardingspeed_of_boarding,mod_403_boardinghelpfulness_of_staff_at_the_departure_gate,mod_403_boardingannouncements_made_at_the_departure_gate,mod_403_boardingpriority_boarding,mod_403_boardingsocial_distancing_during_boarding,mod_403_boardingbiometric_boarding_capabilities,mod_403_boardingavailability_of_space_for_your_hand_luggagepersonal_belongings_in_the_aircraft,mod_403_boardingother_please_specify,iag_mod_403_96_oth_t_scrubbed,iag_mod_501_t_scrubbed,mod_502_crewhelpfulness_of_cabin_crew,mod_502_crewcrew_availability_during_the_flight,mod_502_crewempowerment_of_cabin_crew_to_resolve_problems,mod_502_crewcabin_crew_managing_other_passengers,mod_502_crewcabin_crew_managing_boarding_disembarking,mod_502_crewgrooming_and_appearance,mod_502_crewflight_information_provided_by_pilots,mod_502_crewannouncements_provided_by_cabin_crew,mod_502_crewother_please_specify,iag_mod_502_96_oth_t_scrubbed,iag_mod_601_t_scrubbed,mod_602_cabincleanliness_of_the_cabin,mod_602_cabincleanliness_of_toilets,mod_602_cabinphysical_condition_of_the_cabin,mod_602_cabinphysical_condition_of_the_toilets,mod_602_cabinamount_of_legroom,mod_602_cabinseat_comfort,mod_602_cabinsocial_distancing,mod_602_cabintemperature_onboard,mod_602_cabinother_please_specify,iag_mod_602_96_oth_t_scrubbed,iag_mod_701_t_scrubbed,mod_707_inflight_fndquality_of_food,mod_707_inflight_fndquality_of_wines,mod_707_inflight_fndselection_of_food,mod_707_inflight_fndselection_of_drinks,mod_707_inflight_fndvisual_appeal_of_food,mod_707_inflight_fndquantity_portion_size_of_food_available,mod_707_inflight_fndtimings_when_food_and_drinks_are_served,mod_707_inflight_fndvalue_for_money_of_food_and_drink_available,mod_707_inflight_fndsustainable_packaging_materials_for_food_and_drinks,mod_707_inflight_fndother_please_specify,iag_mod_707_96_oth_t_scrubbed,iag_mod_803_a_t_scrubbed,iag_mod_803_b_t_scrubbed,iag_mod_803_c_t_scrubbed,mod_806_ife_and_wifiinflight_entertainment_contents,mod_806_ife_and_wifiinflight_entertainment_ease_of_use,mod_806_ife_and_wifichoice_of_movies,mod_806_ife_and_wifiscreen_quality,mod_806_ife_and_wifiease_of_use_of_the_wifi,mod_806_ife_and_wifireliability_of_the_wifi_connection,mod_806_ife_and_wifispeed_of_the_wifi_connection,mod_806_ife_and_wifiwifi_value_for_money,mod_806_ife_and_wifiother_please_specify,iag_mod_806_96_oth_t_scrubbed,mod_807_ifeinflight_entertainment_contents,mod_807_ifeease_of_access,mod_807_ifeother_please_specify,iag_mod_807_96_oth_t_scrubbed,mod_808_wifiease_of_use_of_the_wifi,mod_808_wifireliability_of_the_wifi_connection,mod_808_wifispeed_of_the_wifi_connection,mod_808_wifiwifi_value_for_money,mod_808_wifiother_please_specify,iag_mod_808_96_oth_t_scrubbed,iag_mod_901_t_scrubbed,mod_904_arrivalsorganizationspeed_for_getting_of_the_plane,mod_904_arrivalssocial_distancing_getting_off_the_plane,mod_904_arrivalssignage_to_immigrationconnections,mod_904_arrivalsspeed_of_getting_through_immigration,mod_904_arrivalstime_to_collect_checked_baggage,mod_904_arrivalsease_of_claiming_for_lost_or_damaged_baggage,mod_904_arrivalsother_please_specify,iag_mod_904_96_oth_t_scrubbed,iag_mod_1001_t_scrubbed,mod_1002_needed_for_connectionchange_terminals,mod_1002_needed_for_connectionchange_airports,mod_1002_needed_for_connectionchange_airlines,mod_1002_needed_for_connectioncollect_and_recheck_your_luggage,mod_1002_needed_for_connectionpass_through_immigration,mod_1002_needed_for_connectionpass_through_security,mod_1002_needed_for_connectioncheckin_and_collect_boarding_pass,mod_1002_needed_for_connectioncollect_boarding_pass_only,mod_1002_needed_for_connectionseek_help_or_advice_from_a_customer_service_representative,mod_1002_needed_for_connectionnone_of_the_above,mod_1003_connectiononboard_announcements_regarding_connections,mod_1003_connectionairport_signage_to_locate_connecting_flight,mod_1003_connectiontime_available_for_you_connection,mod_1003_connectionaccessibility_of_staff_to_support_questionsqueries,mod_1003_connectionalerts_sent_to_my_mobile_with_live_updates_on_my_connection,mod_1003_connectionsimplify_document_checks_between_flights,mod_1003_connectionother_please_specify,iag_mod_1003_96_oth_t_scrubbed,iag_mod_1101_t_scrubbed,mod_1102_loyaltychances_to_use_my_avios,mod_1102_loyaltyvolume_of_accrued_avios_with_my_flight,mod_1102_loyaltytier_benefits,mod_1102_loyaltyrecognition_while_travelling_with_iberia,mod_1102_loyaltyhelpfulness_of_the_personalized_contact_centre_service,mod_1102_loyaltyother_please_specify,iag_mod_1102_96_oth_t_scrubbed,alert,img_410_phone,img_420_ibplus_email,group_age,localdeparturedateuk,date_campaign,date_sample_file,date_survey_completion,completiondateuk,device_type,browser_show,ipaddress_show,ovp_100_overall_experience,inm_100_issues_other_verbatim,inm_200_issues_prior_other_verbatim,inm_205_issues_dep_airport_other_verbatim,inm_206_issues_checkin_other_verbatim,inm_207_issues_lounge_other_verbatim,inm_208_issues_security_other_verbatim,inm_209_issues_boarding_other_verbatim,inm_220_issues_timing_other_verbatim,inm_270_flight_delay_length,inm_280_issues_delay_inf_clarity,inm_282_issues_delay_length,inm_284_issues_delay_looked_after,inm_230_issues_onboard_other_verbatim,inm_235_issues_onboard_comfort_other_verbatim,inm_236_issues_meal_other_verbatim,inm_240_issues_baggage_other_verbatim,inm_250_issues_arrival_lounge,inm_250_issues_arrival_immigration_other_verbatim,inm_255_issues_connecting_other_verbatim,inm_300_issues_spoken_t_f,inm_301_how_managed_issue_mobile_app,inm_301_how_managed_issue_other_verbatim,inm_310_issues_apology_t_f,inm_320_issues_empathy_t_f,inm_500_issues_verbatim,vfm_100_value_for_money,loy_100_likelihoodd_fly_again,tvloth_journey_reason_other_verbatim,bnd_021_willing_to_pay_more_1_5,com_200_num_trips_ly_ib,com_210_num_trips_ly_other_airlines,com_101_num_trips_ly_ib,com_201_num_trips_ly_other_airlines,cla_105_booking_agency,cla_110_booking_device,bkg_110_booking_time,bkg_120_booking_staff_service,cla_202_boarding_pass_channel,cla_250_checked_baggage_t_f,pfl_100_checkin_speed,pfl_120_checkin_staff_service,pfl_350_lounge_service,bdg_100_boarding_announcements,bdg_110_boarding_clarity,bdg_111_boarding_signage,bdg_200_boarding_speed,bdg_300_boarding_staff_service,bdg_400_boarding_hand_lugg_space,cbn_500_cabin_mood_music,crw_100_cabin_crew_helpfulness,crw_110_cabin_crew_feel_special,crw_120_cabin_crew_announcements,cbn_101_cabin_cleanliness,cbn_102_cabin_physical_condition,cbn_201_washrooms_cleanliness,cbn_202_washrooms_physical_condition,cbn_300_cabin_seat_comfort,cbn_650_ronda,usb_100_usb_use_other_verbatim,cla_171_special_meal,cla_711_meal_prepurchase_other_verbatim,cla_713_get_preferred_drink_y_n,cla_800_duty_free_t_f,fnd_110_food_quality,fnd_725_wines_quality,fnd_200_food_drink_portion,fnd_310_food_selection,fnd_320_drinks_selection,fnd_401_food_visual_appeal,fnd_600_food_drink_time_to_clear,fnd_900_food_drink_value_for_money,fnd_800_meal_prepurchase,ifl_700_duty_free_range,ife_100_ife_usability,ife_200_ife_content,cla_600_wifi_other_verbatim,wfi_200_wifi_value_for_money,wfi_300_wifi_ease_of_use,wfi_400_wifi_speed,cla_550_digital_press_other_verbatim,ife_300_digital_press,arr_200_arrivals_ease,arr_300_arrivals_immigration_speed,con_200_connection_information,con_300_connection_clarity,con_400_connection_time,cla_130_additional_needs_t_f,dig_100_web_mobile_t_f,dig_200_web_pc,dig_200_web_tablet,dig_200_web_smartphone,dig_300_web,dig_250_mobile_phone_system,img_330_ease_contact_social_net,rea_100_frequent_flyer_prog_reason,lvl_loyalty_program_survey,end_oe_suggestion_verbatim,end_oe_suggestion_verbatim_translated,sze_num_people,nat_100_nationality,nat_100_nationality_other,cbn_100_cabin,cbn_200_cabin_washrooms_clean,cbn_400_cabin_temperature,cbn_600_newspappers,cbn_450_cabin_temp_description,fnd_100_food_drink_quality,fnd_300_food_drink_choice,fnd_400_food_drink_presentation,fnd_500_food_drink_first_choice,bnd_010_looked_lowest_cost,bnd_020_willing_to_pay_more,bnd_030_iberia_cares_its_customers,prize_100_prize_draw_participate_t_f,prize_300_prize_draw_name,prize_300_prize_draw_email,prize_300_prize_draw_phone,flag_misconnection_misc,delay,flag_mishandling_ahl,flag_mishandling_dpr,num_bags_mishandling_ahl,num_bags_mishandling_dpr,issue_operative,issue_non_operative,issue_category,issue_category_calculated,customer_journey_ctry_origin,customer_journey_ctry_destination,ctry_origin_of_surveyed_flight,ctry_destination_of_surveyed_flight,gr_region,region,ticketing_carrier_orig,ticket_num_orig,coupon_num_orig,ticket_price,ctry_route,issue_category_calculated_d15,codeshare,delay_departure,otp15_takeoff,promoter_binary,detractor_binary,flight_date_utc,boardpoint_stn_code_actual,offpoint_stn_code_actual,calc_dep_diff,punctuality,capacity_business,pax_business,capacity_premium_ec,pax_premium_ec,capacity_economy,pax_economy,load_factor_business,load_factor_premium_ec,load_factor_economy,load_factor
0,52990189,10009487200,8319.000,2023-01-04,2023-01-04 13:35:00,2023-01-04 13:35:00,,EN,ECNHU,07F,TRUE,0.000,0,3,,FALSE,,,,,CDT,MAD,1.000,1.000,CR2,IB,2023-01-04,2023-01-04 12:35:00,2023-01-04 13:55:00,2023-01-04 12:55:00,2023-01-04 13:35:00,2023-01-04 14:00:00,2023-01-04 14:00:00,CDT-MAD,CDT-MAD,SH,,2.000,ECONOMY_SH+MH,1.100,0.519,,,,,2023-01-07 15:30:09,00:06:47,,,MAD,YW,Economy,SH,,,,Passive,8,SURVEYS PROCESSED,2,45-54,7.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.000,8.000,,9.000,4.000,,9.000,9.000,9.000,9.000,7.000,,9.000,,,,,,No,,8.000,I didn’t need reassurance.,No necesitaba tranquilidad.,,Completed,El vuelo estuvo bien pero creo que hubiera sid...,,,,Iberia's website,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Iberia website,,"No, I did not want to use it",,,,,,No,,I didn’t need reassurance.,No necesitaba tranquilidad.,Female,Philippines,1,Holiday (more than 5 nights),,Best schedule for my needs,"Provide Iberia with my survey responses, but n...",,,,,,,,,,,,,,,,2023-04-13,2023-01-07 15:36:56,2023-01-04,Cabin+OtherHaul,,,,,,,I did not contact Iberia,,,,,,,With teens aged 13 to 17,With my spouse or partner,,,,,,Submit feedback,IAG_IB,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,20.000,0,0,0.000,0.000,0,0,No issue,Operative,ES,ES,,,,,,,,,,Operative,,20.000,1,0,0,2023-01-04,CDT,MAD,20.000,OTP15,6.000,0.000,0.000,0.000,44.000,15.000,0.000,,0.341,0.341
1,54805040,10010991371,8327.000,2023-01-29,2023-01-29 16:50:00,2023-01-29 18:30:00,IB SILVER (OW RUBY),ES,ECLSQ,01A,FALSE,0.000,0,1,,FALSE,,,,,LEU,MAD,1.000,1.000,ATX,IB,2023-01-29,2023-01-29 15:50:00,2023-01-29 16:45:00,2023-01-29 15:45:00,2023-01-29 17:30:00,2023-01-29 18:07:00,2023-01-29 17:07:00,LEU-MAD,LEU-MAD,SH,,4.000,BUSINESS_SH+MH,0.627,0.414,,,,,2023-02-01 15:33:18,00:04:41,,,MAD,YW,Business,SH,,,,Promoter,10,SURVEYS PROCESSED,2,35-44,10.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.000,9.000,,10.000,9.000,,9.000,10.000,,8.000,9.000,,10.000,,,,,6.000,No,,1.000,No se realmente que mas podría hacer,No se realmente que mas podría hacer,,Completed,Muy buena iniciativa la línea Seu-Madrid.,,,Iberia's mobile App,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Iberia website,"No, I tried but the lounge was not available","No, I did not want to use it",,,,,,No,,No se realmente que mas podría hacer,No se realmente que mas podría hacer,Male,Spain,2,Other (please specify),vacaciones 2 días,The only airline operating this route,"Provide Iberia with my survey responses, but n...",,,Complimentary complete meals,,,,,,,,,,,,,2023-04-28,2023-02-01 15:37:59,2023-01-29,OtherCabin+OtherHaul,,,,,,,I did not contact Iberia,,,I was travelling on my own,,,,,,,,,,,Submit feedback,IAG_IB,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-5.000,0,0,0.000,0.000,0,0,No issue,No issue,ES,ES,,,,,,,,,,No issue,,-5.000,0,1,0,2023-01-29,LEU,MAD,-5.000,,6.000,5.000,0.000,0.000,66.000,38.000,0.833,,0.576,0.833
2,9386758,9206003320,6841.000,2019-07-23,2019-07-23 23:55:00,2019-07-24 00:00:00,IB GOLD (OW ZAPHIRE),ES,ECJBA,,,,0,1,,N,ES,,,,MAD,BUE,1.000,1.000,346,IB,2019-07-23,2019-07-23 21:55:00,2019-07-23 00:16:00,2019-07-23 22:16:00,2019-07-24 00:00:00,2019-07-23 07:54:00,2019-07-24 00:00:00,MAD-EZE,EZE-MAD,LH,,,ECONOMY_LH,0.731,1.593,J4541,13691753,,,2019-07-26 07:08:24,00:00:00,ENRIQUE.ENPI@GMAIL.COM,MAD,EZE,IB,Economy,LH,ENRIQUE,MADRID,IZQUIERDO,Promoter,9,SURVEYS MARITZ,2,45-54,8.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.000,8.000,,8.000,8.000,8.000,8.000,9.000,9.000,9.000,7.000,6.000,8.000,,8.000,,8.000,9.000,No,,,,,Yes,,,,,,,Airline's check-in desk,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Telephone directly with airline,Yes,Yes,Yes,Yes,,,,No,,,,M,ES,,Business/work,,Iberia is a brand I trust,Responses linked to customer details,Yes,Direct from terminal via passenger boarding br...,Complimentary complete meals,No,,,,,,,,,,Directly to the terminal - passenger boarding ...,8.000,2023-02-15,2019-07-26 07:08:24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.000,,16-21,2019-07-23,2019-07-24,2019-07-24,2019-07-25,2019-07-25,D,Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:6...,190.210.131.81,9.000,,,,,,,,,,,,,,,,,,,,,,,,,,9.000,10.000,,,,,,,,,8.000,8.000,Check-in desk,Yes,8.000,9.000,8.000,,,,,,,,,,,,,,,8.000,,,,,,No,6.000,,,,,6.000,,,,,8.000,8.000,,2.000,,,,,8.000,8.000,,,,2.000,Yes - website,PC/Laptop,,,8.000,,,,Oro,,,1.000,Spanish,,9.000,8.000,8.000,,,,,,,9.000,5.000,8.000,,,,,,21.000,0,0,0.000,0.000,0,0,No issue,Operative,MAD,BUE,ES,AR,AMERICA SUR,AMERICA SUR,,,,,AR,Operative,,-1419.000,0,1,0,2019-07-23,MAD,EZE,21.000,OTP15,36.000,29.000,23.000,13.000,300.000,282.000,0.806,0.565,0.940,0.940
3,9974098,9252098247,6845.000,2019-09-03,2019-09-03 12:10:00,2019-09-03 19:50:00,,ES,ECMXV,,,,0,2,,N,,,,,BCN,BUE,1.000,1.000,359,IB,2019-09-03,2019-09-03 10:10:00,2019-09-03 12:20:00,2019-09-03 10:20:00,2019-09-03 22:50:00,2019-09-03 19:41:00,2019-09-03 22:41:00,MAD-EZE,EZE-MAD,LH,,5.000,ECONOMY_LH,0.890,0.729,J47P7,,,,2019-09-10 07:25:17,00:00:00,BELTRAMINOJORGE@GMAIL.COM,MAD,EZE,IB,Economy,LH,DORAEMILIA,REDONDOZANETTA,,Detractor,6,SURVEYS MARITZ,2,65+,8.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.000,8.000,,7.000,8.000,,7.000,8.000,8.000,8.000,8.000,,8.000,,6.000,,,,No,,,,,,,,,,,,Airline's check-in desk,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Airline website,No,,,,,,,No,,,,,,,Holiday,,Value for money offer on this flight,Responses linked to customer details,No,Direct from terminal via passenger boarding br...,Complimentary complete meals,No,,,,,Yes,,,,,Directly to the terminal - passenger boarding ...,8.000,2023-02-15,2019-09-10 07:25:17,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.000,,2019-09-03,2019-09-06,2019-09-06,2019-09-09,2019-09-09,D,Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple...,190.183.202.133,6.000,,,,,,,,,,,,,,,,,,,,,,,,,,7.000,7.000,,,,,,,,PC/Laptop,6.000,,Check-in desk,Yes,8.000,8.000,,,,,,,,,,7.000,8.000,,,,,0.000,,,,,Yes,No,7.000,8.000,8.000,8.000,8.000,7.000,7.000,,,,,,,,,,,,,,,,,2.000,No,,,,,,,,Not a member,un poquito mas de espacio en los asientos,un poquito mas de espacio en los asientos,2.000,,,8.000,8.000,8.000,,,,,,,,7.000,,,,,,,10.000,0,0,0.000,0.000,0,0,No issue,Operative,BCN,BUE,ES,AR,AMERICA SUR,AMERICA SUR,,,,,AR,No issue,,10.000,0,0,1,2019-09-03,MAD,EZE,10.000,,31.000,25.000,24.000,22.000,293.000,291.000,0.806,0.917,0.993,0.993
4,7661816,9049000125,440.000,2019-02-16,2019-02-16 19:50:00,2019-02-16 20:55:00,IB CLASSIC,ES,ECJDL,,,,0,1,,N,ES,,,,IBZ,BIO,2.000,2.000,319,IB,2019-02-16,2019-02-16 18:50:00,2019-02-16 19:53:00,2019-02-16 18:53:00,2019-02-16 19:55:00,2019-02-16 21:00:00,2019-02-16 20:00:00,MAD-BIO,BIO-MAD,SH,,,ECONOMY_SH+MH,1.111,1.502,ANÓNIMO,ANÓNIMO,,,2019-02-19 06:05:20,00:00:00,ANÓNIMO,MAD,BIO,IB,Economy,SH,ANÓNIMO,ANÓNIMO,ANÓNIMO,Promoter,9,SURVEYS MARITZ,2,26-34,10.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.000,7.000,,6.000,,,7.000,,8.000,8.000,,,8.000,7.000,5.000,,,8.000,No,3.000,,,,,,,,,,Airline's website,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Airline website,No,,,,,Connected from another flight,,,,,,F,ES,,Holiday,,The only airline operating this route,Responses not linked to customer details,No,Bus service to board,Food and drink items to purchase,No,,,,,,,,,,Directly to the terminal - passenger boarding ...,8.000,2023-02-15,2019-02-19 06:05:20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.000,,45-54,2019-02-16,2019-02-17,2019-02-17,2019-02-18,2019-02-18,M,ANÓNIMO,ANÓNIMO,8.000,,,,,,,,,,,,,,,,,,,,,,,,,,8.000,9.000,,,,,,,,PC/Laptop,6.000,,Smartphone/tablet,Yes,7.000,8.000,,,,,,,,,,,,,,,,6.000,,,,,,,,,,,,,,,,,,,,,,,,,8.000,,,8.000,9.000,2.000,Yes - Website and mobile app,PC/Laptop,,,6.000,Android,,,Clásica,,,1.000,,,8.000,,,,,,,,,0.000,8.000,,,,,,,3.000,0,0,0.000,0.000,0,0,No issue,Operative,ES,ES,ES,ES,ESPAÑA,ESPAÑA,,,,,ES,No issue,,3.000,0,1,0,2019-02-16,MAD,BIO,3.000,,6.000,1.000,0.000,0.000,132.000,106.000,0.167,,0.803,0.803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548435,65187574,10041115167,6274.000,2024-03-04,2024-03-04 16:45:00,2024-03-05 07:55:00,IB CLASSIC,EN,ECLUX,40A,True,0.000,0,2,,False,,IND-INC-BUSINESS,WEB,,ORD,MAD,2.000,1.000,330,IB,2024-03-04,2024-03-04 22:45:00,2024-03-04 17:15:00,2024-03-04 23:15:00,2024-03-05 06:55:00,2024-03-05 08:11:00,2024-03-05 07:11:00,ORD-MAD,MAD-ORD,LH,,4.000,ECONOMY_LH,,,LJ0XM,D5Y2800,,758058145223.000,2024-03-08 17:16:33,00:07:16,MESAUV68@OUTLOOK.COM,ORD,MAD,IB,Economy,LH,MARYELLEN,SAUVAGEAU,,Promoter,9,SURVEYS PROCESSED,2,65+,10.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.000,10.000,,10.000,10.000,,10.000,10.000,10.000,10.000,10.000,,10.000,,,,10.000,,No,,,,,"No, I didn't want to use it",Completed,Enfoque profesional. Atentos a los clientes.,,,,Iberia's website,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Via a travel agent,,Yes,Yes,"No, I didn't want to use it",,,Connected to another flight from [DestinationA...,,,,,Female,United States,2,Holiday (more than 5 nights),,Direct flight,Provide Iberia with my survey responses linked...,,,Complimentary complete meals,,,,,,,,,,,,,2024-03-18,2024-03-08 17:23:49,2024-03-05,Cabin+Haul,,,,,,,I did not contact Iberia,,,,,,,,With my spouse or partner,,,,,No,Submit feedback,IAG_IB,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000,30.000,0,0,0.000,0.000,0,0,No issue,Operative,US,ES,US,ES,AMERICA NORTE,AMERICA NORTE,75.000,8058145223.000,1.000,184.020,US,Operative,IB,30.000,1,1,0,,,,,,,,,,,,,,,
548436,65161851,10041065070,3264.000,2024-03-04,2024-03-04 16:10:00,2024-03-04 18:25:00,,IT,ECMXY,10B,False,0.000,0,4,,False,,DIR-ONLINE,MOB,,MAD,MXP,1.000,1.000,320,IB,2024-03-04,2024-03-04 15:10:00,2024-03-04 16:20:00,2024-03-04 15:20:00,2024-03-04 17:25:00,2024-03-04 18:22:00,2024-03-04 17:22:00,MAD-MXP,MAD-MXP,MH,,4.000,ECONOMY_SH+MH,,,MD90L,,307212803.000,751421886146.000,2024-03-07 18:53:49,00:05:18,PAOLOBARACCO1@GMAIL.COM,MAD,MXP,IB,Economy,SH,EMANUELE,COVELLI,,Passive,8,SURVEYS PROCESSED,2,45-54,8.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.000,9.000,,10.000,9.000,,9.000,8.000,8.000,8.000,7.000,5.000,8.000,,,,,,No,,,,,"No, I could not get it to work",Completed,Demasiadas esperas para bajar,,,Iberia's mobile App,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Iberia mobile App,,,Yes,"No, I could not get it to work",,,,No,,,,Female,Italy,2,Short break (up to 5 nights),,Best schedule for my needs,Provide Iberia with my survey responses linked...,,,Food and drink items to purchase,,,,,,,,,,,,,2024-03-18,2024-03-07 18:59:07,2024-03-04,Cabin+OtherHaul,,,,,,,I did not contact Iberia,,,,,,,,With my spouse or partner,,,,,No,Submit feedback,IAG_IB,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000,10.000,0,0,0.000,0.000,0,0,No issue,Operative,ES,IT,ES,IT,EUROPA,EUROPA CEE,75.000,1421886146.000,1.000,45.380,IT,No issue,IB,10.000,0,0,0,,,,,,,,,,,,,,,
548437,65187061,10041075268,442.000,2024-03-04,2024-03-04 22:10:00,2024-03-04 23:15:00,,ES,ECIZH,17C,True,0.000,0,1,,False,,IND-NON-OTA,MOB,,CMN,BIO,2.000,2.000,32A,IB,2024-03-04,2024-03-04 21:10:00,2024-03-04 22:10:00,2024-03-04 21:10:00,2024-03-04 22:15:00,2024-03-04 23:01:00,2024-03-04 22:01:00,MAD-BIO,BIO-MAD,MH,,4.000,ECONOMY_SH+MH,,,KR4ES,,324439358.000,752106465336.000,2024-03-08 14:47:40,00:06:46,HASNAA.SADIK5@GMAIL.COM,MAD,BIO,IB,Economy,SH,HASNAA,SADIKESSAKHI,,Promoter,10,SURVEYS PROCESSED,2,16-21,10.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.000,10.000,,10.000,10.000,,10.000,10.000,10.000,10.000,10.000,,10.000,10.000,,,,,No,,,,,,Completed,Muy contenta,,,Iberia's mobile App,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Using another travel website,,,No,,,Connected from another flight into [OriginAirp...,,,,,,Female,Spain,5,Holiday (more than 5 nights),,Value for money offer on this flight,Provide Iberia with my survey responses linked...,No,Directly from the terminal via a passenger boa...,Food and drink items to purchase,,,,,,,,,,,,,2024-03-18,2024-03-08 14:54:26,2024-03-04,Cabin+OtherHaul,,,,,,,I did not contact Iberia,,,I was travelling on my own,,,,,,,,,,Yes,Continue,IAG_IB,Booking and journey preparation,Check-in,,Boarding,,,,,,,,,,,Ease of booking process on Iberia website/mobi...,,,,,,,,Facil cualquiera lo puede hacer,,Helpfulness of staff at airport check-in area,,Ease of online check-in process,Website/app reliability and performance,,,,,,,,,,,,,,No tuve ningun problema ya que Barajas es muy ...,The organisation of the boarding process,Speed of boarding,Helpfulness of staff at the departure gate,Announcements made at the departure gate,,Social distancing during boarding,,Availability of space for your hand luggage/pe...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000,0.000,0,0,0.000,0.000,0,0,No issue,No issue,MA,ES,ES,ES,ESPAÑA,ESPAÑA,75.000,2106465336.000,2.000,36.610,ES,No issue,IB,0.000,0,1,0,,,,,,,,,,,,,,,
548438,65161266,10041066400,3151.000,2024-03-04,2024-03-04 15:50:00,2024-03-04 18:45:00,,ES,ECNTP,24A,True,0.000,0,4,,False,,IND-INC-LEISURE,MOB,,ATH,MAD,1.000,1.000,320,IB,2024-03-04,2024-03-04 13:50:00,2024-03-04 15:56:00,2024-03-04 13:56:00,2024-03-04 17:45:00,2024-03-04 19:06:00,2024-03-04 18:06:00,ATH-MAD,ATH-MAD,MH,,4.000,ECONOMY_SH+MH,,,P6Q8T,,321257005.000,756627912172.000,2024-03-07 16:35:43,00:10:34,CARLOSMENDEZMARTIN@GMAIL.COM,ATH,MAD,IB,Economy,SH,DARIO,MENDEZRECIO,,Detractor,4,SURVEYS PROCESSED,2,45-54,9.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.000,9.000,,9.000,9.000,,6.000,10.000,10.000,8.000,8.000,,8.000,,,,,,No,,,,,"No, I didn't want to use it",Completed,"En el aeropuerto de Atenas, la persona que me ...",,,,Iberia's website,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Via a travel agent,,,Yes,"No, I didn't want to use it",,,,No,,,,Male,Spain,3,Holiday (more than 5 nights),,Recommendation by travel agency/company travel...,Provide Iberia with my survey responses linked...,,Directly from the terminal via a passenger boa...,Food and drink items to purchase,,,,,,,,,,,,,2024-03-18,2024-03-07 16:46:17,2024-03-04,Cabin+OtherHaul,,,,,,,I did not contact Iberia,,,,,,With children aged 6 to 12,,With my spouse or partner,Other adult party (6 people or less),,,,Yes,Continue,IAG_IB,,,,Boarding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,No se me atentio amablemente en el mostrador d...,,Speed of boarding,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000,6.000,0,0,0.000,0.000,0,0,No issue,Operative,GR,ES,GR,ES,EUROPA,EUROPA CEE,75.000,6627912172.000,1.000,160.690,GR,No issue,IB,6.000,0,0,1,,,,,,,,,,,,,,,


## Agregation logic

Given a date it takes it as an "end_date" and computes every interval with previous dates. Then it perfomr the satisfaction, NPS, load factor and otp aggregations for that particular interval.

In [20]:
# Funciones auxiliares
def calculate_nps(promoters, detractors, total_responses):
    """Calcula el Net Promoter Score (NPS)."""
    return ((promoters - detractors) / total_responses) * 100 if total_responses != 0 else 0

def calculate_weighted_nps(group_df):
    """Calcula el NPS ponderado para un grupo de datos."""
    promoters_weight = group_df.loc[group_df['nps_100'] > 8, 'monthly_weight'].sum()
    detractors_weight = group_df.loc[group_df['nps_100'] <= 6, 'monthly_weight'].sum()
    total_weight = group_df['monthly_weight'].sum()
    
    if total_weight > 0:
        return (promoters_weight - detractors_weight) / total_weight * 100
    else:
        return 0

def calculate_satisfaction(df, variable):
    """Calcula la tasa de satisfacción para una variable dada."""
    satisfied_count = df[df[variable] >= 8].shape[0]
    total_count = df[variable].notnull().sum()
    return (satisfied_count / total_count) * 100 if total_count != 0 else 0

def calculate_otp(df, variable='otp15_takeoff'):
    """Calcula el On-Time Performance (OTP) como el porcentaje de valores igual a 1."""
    on_time_count = (df[variable] == 0).sum()
    total_count = df[variable].notnull().sum()
    return (on_time_count / total_count) * 100 if total_count > 0 else 0


def calculate_load_factor(df, pax_column, capacity_column):
    """Calcula el factor de carga para una cabina específica."""
    total_pax = df[pax_column].sum()
    total_capacity = df[capacity_column].sum()
    # Evitar la división por cero
    if total_capacity > 0:
        return (total_pax / total_capacity) * 100
    else:
        return 0

    
def calculate_metrics_summary(df, start_date, end_date, touchpoints):
    # Filtrar por rango de fechas
    df_filtered = df[(df['date_flight_local'] >= pd.to_datetime(start_date)) & (df['date_flight_local'] <= pd.to_datetime(end_date))]
    
    # Mapeo de cabinas a columnas de pax y capacidad
    cabin_mapping = {
        'Economy': ('pax_economy', 'capacity_economy'),
        'Business': ('pax_business', 'capacity_business'),
        'Premium Economy': ('pax_premium_ec', 'capacity_premium_ec')
    }
    
    results_list = []
    
    for (cabin, haul), group_df in df_filtered.groupby(['cabin_in_surveyed_flight', 'haul']):
        result = {
            'start_date': start_date,
            'end_date': end_date,
            'cabin_in_surveyed_flight': cabin,
            'haul': haul,
            'otp15_takeoff': calculate_otp(group_df)
        }
        
        # Calcula el NPS para el grupo
        promoters = (group_df['nps_100'] >= 9).sum()
        detractors = (group_df['nps_100'] <= 6).sum()
        total_responses = group_df['nps_100'].notnull().sum()
        result['NPS'] = calculate_nps(promoters, detractors, total_responses) if total_responses else None
        
        # Calcula el NPS ponderado para el grupo
        result['NPS_weighted'] = calculate_weighted_nps(group_df)
        
        # Satisfacción para cada touchpoint
        for tp in touchpoints:
            result[f'{tp}_satisfaction'] = calculate_satisfaction(group_df, tp)
        
        # Calcula el factor de carga para la cabina
        pax_column, capacity_column = cabin_mapping.get(cabin, (None, None))
        if pax_column and capacity_column:
            result['load_factor'] = calculate_load_factor(group_df, pax_column, capacity_column)
        
        results_list.append(result)
    
    return pd.DataFrame(results_list)

def generate_date_intervals(start_date, end_date):
    """Genera una lista de tuplas con intervalos de fechas desde start_date hasta end_date."""
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    intervals = [(start_date + pd.Timedelta(days=d), end_date) for d in range((end_date - start_date).days + 1)]
    return intervals

def calculate_metrics_for_intervals(df, touchpoints, start_date, end_date):
    """Calcula las métricas para todos los intervalos posibles hasta end_date."""
    intervals = generate_date_intervals(start_date, end_date)
    all_metrics = []

    for interval_start, interval_end in intervals:
        interval_metrics = calculate_metrics_summary(df, interval_start, interval_end, touchpoints)
        print(f"Interval: {interval_start} to {interval_end}, Data points: {len(interval_metrics)}")
        all_metrics.append(interval_metrics)

    
    # Concatenar todos los DataFrames de resultados en uno solo
    results_df = pd.concat(all_metrics, ignore_index=True)
    return results_df

# Ejemplo de uso:
# touchpoints = ['tp1', 'tp2', 'tp3']  # Asegúrate de reemplazar estos con los nombres reales de tus touchpoints
df_result = calculate_metrics_summary(df_historic, '2023-01-01', '2023-01-31', touchpoints)
# print(df_result)

# Definir la fecha de inicio del año y la fecha de fin específica
start_date = '2024-01-01'
end_date = '2024-04-08'



results_intervals_df = calculate_metrics_for_intervals(df_historic, touchpoints, start_date, end_date)




Interval: 2024-01-01 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-02 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-03 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-04 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-05 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-06 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-07 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-08 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-09 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-10 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-11 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-12 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-13 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-14 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-15 00:00:00 to 2

In [21]:
results_intervals_df

Unnamed: 0,start_date,end_date,cabin_in_surveyed_flight,haul,otp15_takeoff,NPS,NPS_weighted,bkg_200_journey_preparation_satisfaction,pfl_100_checkin_satisfaction,pfl_200_security_satisfaction,pfl_300_lounge_satisfaction,pfl_500_boarding_satisfaction,ifl_300_cabin_satisfaction,ifl_200_flight_crew_annoucements_satisfaction,ifl_600_wifi_satisfaction,ifl_500_ife_satisfaction,ifl_400_food_drink_satisfaction,ifl_100_cabin_crew_satisfaction,arr_100_arrivals_satisfaction,con_100_connections_satisfaction,pun_100_punctuality_satisfaction,loy_200_loyalty_programme_satisfaction,inm_400_issues_response_satisfaction,img_310_ease_contact_phone_satisfaction,load_factor
0,2024-01-01,2024-04-08,Business,LH,83.471,39.000,40.574,73.785,81.774,82.710,75.671,77.478,75.557,81.900,48.767,71.854,68.803,81.467,80.439,71.147,84.219,72.015,13.647,60.757,90.360
1,2024-01-01,2024-04-08,Business,SH,89.289,48.575,47.232,74.530,82.343,80.877,73.256,78.349,76.335,81.212,51.624,45.745,75.261,88.911,81.651,74.341,82.286,71.352,17.597,53.559,80.510
2,2024-01-01,2024-04-08,Economy,LH,83.529,27.565,28.852,69.436,75.944,82.965,69.366,76.643,68.981,80.614,46.554,76.373,59.857,75.973,78.934,69.770,80.330,65.172,12.318,54.599,88.559
3,2024-01-01,2024-04-08,Economy,SH,88.670,38.700,38.445,71.945,79.003,79.792,74.854,75.769,73.002,78.799,43.836,44.170,54.331,81.334,78.835,69.777,79.162,65.992,12.525,53.581,87.435
4,2024-01-01,2024-04-08,Premium Economy,LH,83.810,32.380,34.630,71.584,78.958,83.849,81.714,79.264,71.982,79.852,47.324,75.077,56.324,74.111,79.973,70.355,82.510,73.443,15.302,52.907,87.724
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425,2024-03-26,2024-04-08,Business,LH,71.429,23.810,0.000,75.000,85.714,80.952,84.211,66.667,71.429,85.714,66.667,70.000,66.667,71.429,80.952,66.667,61.905,71.429,0.000,83.333,93.671
426,2024-03-26,2024-04-08,Business,SH,82.143,57.143,0.000,79.167,77.778,85.714,77.273,75.000,75.000,74.074,55.556,100.000,76.923,82.143,78.571,72.727,82.143,71.429,33.333,66.667,0.000
427,2024-03-26,2024-04-08,Economy,LH,72.277,30.693,0.000,70.115,74.490,86.316,0.000,78.571,71.134,77.083,40.351,79.070,70.103,75.258,72.449,71.154,77.228,65.517,21.053,57.143,96.766
428,2024-03-26,2024-04-08,Economy,SH,82.427,43.515,0.000,76.382,83.913,81.140,75.000,77.826,76.496,76.000,51.923,45.000,54.074,82.609,81.702,73.333,76.151,65.333,21.429,70.833,0.000


In [22]:
import pandas as pd
from datetime import datetime, timedelta

# Assume df_historic and touchpoints are defined elsewhere

# Convert start_date and end_date strings to datetime objects for manipulation
start_date = datetime.strptime('2024-01-01', '%Y-%m-%d')
original_end_date = datetime.strptime('2024-04-08', '%Y-%m-%d')

# Initialize an empty DataFrame to store the results from each interval
all_intervals_results = pd.DataFrame()

# Loop over the range from (original_end_date - 15 days) to original_end_date
for offset in range(0, 16):  # Including the 15th day
    # Calculate the new end_date for this iteration
    end_date = original_end_date - timedelta(days=offset)
    
    # Convert end_date back to string format if your function expects a string
    end_date_str = end_date.strftime('%Y-%m-%d')
    
    # Call your function with the current interval's end_date
    interval_results = calculate_metrics_for_intervals(df_historic, touchpoints, start_date.strftime('%Y-%m-%d'), end_date_str)
    
    # Assuming interval_results is a DataFrame, you may want to add a column to indicate the end_date for this interval's results
    interval_results['interval_end_date'] = end_date_str
    
    # Append the results for this interval to the all_intervals_results DataFrame
    all_intervals_results = pd.concat([all_intervals_results, interval_results])

# Reset the index of the final DataFrame if necessary
all_intervals_results.reset_index(drop=True, inplace=True)

# Now, all_intervals_results contains the metrics calculated for each interval


Interval: 2024-01-01 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-02 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-03 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-04 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-05 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-06 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-07 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-08 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-09 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-10 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-11 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-12 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-13 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-14 00:00:00 to 2024-04-08 00:00:00, Data points: 5
Interval: 2024-01-15 00:00:00 to 2

In [23]:
all_intervals_results

Unnamed: 0,start_date,end_date,cabin_in_surveyed_flight,haul,otp15_takeoff,NPS,NPS_weighted,bkg_200_journey_preparation_satisfaction,pfl_100_checkin_satisfaction,pfl_200_security_satisfaction,pfl_300_lounge_satisfaction,pfl_500_boarding_satisfaction,ifl_300_cabin_satisfaction,ifl_200_flight_crew_annoucements_satisfaction,ifl_600_wifi_satisfaction,ifl_500_ife_satisfaction,ifl_400_food_drink_satisfaction,ifl_100_cabin_crew_satisfaction,arr_100_arrivals_satisfaction,con_100_connections_satisfaction,pun_100_punctuality_satisfaction,loy_200_loyalty_programme_satisfaction,inm_400_issues_response_satisfaction,img_310_ease_contact_phone_satisfaction,load_factor,interval_end_date
0,2024-01-01,2024-04-08,Business,LH,83.471,39.000,40.574,73.785,81.774,82.710,75.671,77.478,75.557,81.900,48.767,71.854,68.803,81.467,80.439,71.147,84.219,72.015,13.647,60.757,90.360,2024-04-08
1,2024-01-01,2024-04-08,Business,SH,89.289,48.575,47.232,74.530,82.343,80.877,73.256,78.349,76.335,81.212,51.624,45.745,75.261,88.911,81.651,74.341,82.286,71.352,17.597,53.559,80.510,2024-04-08
2,2024-01-01,2024-04-08,Economy,LH,83.529,27.565,28.852,69.436,75.944,82.965,69.366,76.643,68.981,80.614,46.554,76.373,59.857,75.973,78.934,69.770,80.330,65.172,12.318,54.599,88.559,2024-04-08
3,2024-01-01,2024-04-08,Economy,SH,88.670,38.700,38.445,71.945,79.003,79.792,74.854,75.769,73.002,78.799,43.836,44.170,54.331,81.334,78.835,69.777,79.162,65.992,12.525,53.581,87.435,2024-04-08
4,2024-01-01,2024-04-08,Premium Economy,LH,83.810,32.380,34.630,71.584,78.958,83.849,81.714,79.264,71.982,79.852,47.324,75.077,56.324,74.111,79.973,70.355,82.510,73.443,15.302,52.907,87.724,2024-04-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6860,2024-03-24,2024-03-24,Business,LH,100.000,48.387,0.000,72.414,86.207,96.429,79.167,89.655,80.645,96.667,57.895,78.571,74.194,90.000,90.323,80.952,93.548,75.000,50.000,100.000,100.000,2024-03-24
6861,2024-03-24,2024-03-24,Business,SH,96.429,57.143,0.000,60.870,77.778,74.074,64.706,80.000,70.370,77.778,50.000,0.000,74.074,89.286,92.308,62.500,89.286,66.667,0.000,66.667,0.000,2024-03-24
6862,2024-03-24,2024-03-24,Economy,LH,84.694,26.531,0.000,73.563,79.688,85.714,33.333,82.723,67.010,80.729,42.553,68.553,56.186,76.289,79.365,74.157,83.163,70.492,18.182,71.429,97.223,2024-03-24
6863,2024-03-24,2024-03-24,Economy,SH,95.465,48.687,0.000,74.277,82.178,79.404,62.500,78.818,72.861,81.390,41.176,40.625,56.471,83.756,83.333,78.049,84.010,69.767,8.696,57.895,0.000,2024-03-24


In [24]:
all_intervals_results.to_csv('intervals.csv')

# 2. Prediction with Darts model

In [25]:
!pip install darts==0.27.1 optuna==3.5.0 shap==0.44.0

[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [26]:
!pip install lightgbm==4.1.0

[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [27]:
import darts
from darts import TimeSeries
from darts.utils.timeseries_generation import (
    gaussian_timeseries,
    linear_timeseries,
    sine_timeseries,
)

from darts.metrics import mape, smape, mae
from darts.dataprocessing.transformers import Scaler
from darts.utils.timeseries_generation import datetime_attribute_timeseries

from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import RandomForestRegressor

import lightgbm

from darts.models import LightGBMModel

from darts.models import LightGBMModel, RandomForest, LinearRegressionModel
from darts.utils.statistics import check_seasonality, plot_acf, plot_residuals_analysis

from darts.explainability.shap_explainer import ShapExplainer
import pickle
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from darts.models import LinearRegressionModel, LightGBMModel, RandomForest
from calendar import month_name as mn
import os

import shap


In [28]:
import pandas as pd

In [29]:
day_predict_df=pd.read_csv('intervals.csv')

In [30]:
day_predict_df

Unnamed: 0.1,Unnamed: 0,start_date,end_date,cabin_in_surveyed_flight,haul,otp15_takeoff,NPS,NPS_weighted,bkg_200_journey_preparation_satisfaction,pfl_100_checkin_satisfaction,pfl_200_security_satisfaction,pfl_300_lounge_satisfaction,pfl_500_boarding_satisfaction,ifl_300_cabin_satisfaction,ifl_200_flight_crew_annoucements_satisfaction,ifl_600_wifi_satisfaction,ifl_500_ife_satisfaction,ifl_400_food_drink_satisfaction,ifl_100_cabin_crew_satisfaction,arr_100_arrivals_satisfaction,con_100_connections_satisfaction,pun_100_punctuality_satisfaction,loy_200_loyalty_programme_satisfaction,inm_400_issues_response_satisfaction,img_310_ease_contact_phone_satisfaction,load_factor,interval_end_date
0,0,2024-01-01,2024-04-08,Business,LH,83.471,39.000,40.574,73.785,81.774,82.710,75.671,77.478,75.557,81.900,48.767,71.854,68.803,81.467,80.439,71.147,84.219,72.015,13.647,60.757,90.360,2024-04-08
1,1,2024-01-01,2024-04-08,Business,SH,89.289,48.575,47.232,74.530,82.343,80.877,73.256,78.349,76.335,81.212,51.624,45.745,75.261,88.911,81.651,74.341,82.286,71.352,17.597,53.559,80.510,2024-04-08
2,2,2024-01-01,2024-04-08,Economy,LH,83.529,27.565,28.852,69.436,75.944,82.965,69.366,76.643,68.981,80.614,46.554,76.373,59.857,75.973,78.934,69.770,80.330,65.172,12.318,54.599,88.559,2024-04-08
3,3,2024-01-01,2024-04-08,Economy,SH,88.670,38.700,38.445,71.945,79.003,79.792,74.854,75.769,73.002,78.799,43.836,44.170,54.331,81.334,78.835,69.777,79.162,65.992,12.525,53.581,87.435,2024-04-08
4,4,2024-01-01,2024-04-08,Premium Economy,LH,83.810,32.380,34.630,71.584,78.958,83.849,81.714,79.264,71.982,79.852,47.324,75.077,56.324,74.111,79.973,70.355,82.510,73.443,15.302,52.907,87.724,2024-04-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6860,6860,2024-03-24,2024-03-24,Business,LH,100.000,48.387,0.000,72.414,86.207,96.429,79.167,89.655,80.645,96.667,57.895,78.571,74.194,90.000,90.323,80.952,93.548,75.000,50.000,100.000,100.000,2024-03-24
6861,6861,2024-03-24,2024-03-24,Business,SH,96.429,57.143,0.000,60.870,77.778,74.074,64.706,80.000,70.370,77.778,50.000,0.000,74.074,89.286,92.308,62.500,89.286,66.667,0.000,66.667,0.000,2024-03-24
6862,6862,2024-03-24,2024-03-24,Economy,LH,84.694,26.531,0.000,73.563,79.688,85.714,33.333,82.723,67.010,80.729,42.553,68.553,56.186,76.289,79.365,74.157,83.163,70.492,18.182,71.429,97.223,2024-03-24
6863,6863,2024-03-24,2024-03-24,Economy,SH,95.465,48.687,0.000,74.277,82.178,79.404,62.500,78.818,72.861,81.390,41.176,40.625,56.471,83.756,83.333,78.049,84.010,69.767,8.696,57.895,0.000,2024-03-24


In [31]:
def process_dataframe(df):
    df.drop(columns=['pun_100_punctuality_satisfaction', 'inm_400_issues_response_satisfaction'], inplace=True)
    # Agrupar y procesar los datos
    grouped_dfs = {}
    features = {}
    for group_name, group_data in df.groupby(['cabin_in_surveyed_flight', 'haul']):
        cabin_value, haul_value = group_name
        group_df = group_data.copy()
        group_df_name = f'{cabin_value}_{haul_value}_df'
        
        # Identificar las columnas de características
        satisfaction_cols = [col for col in df.columns if col.endswith('_satisfaction')]
        otp_cols = ['otp15_takeoff']
        features_cols = satisfaction_cols + ['load_factor'] + otp_cols
        cols_to_keep = ['start_date','end_date','cabin_in_surveyed_flight', 'haul'] + features_cols + ['NPS_weighted']

        # Filtrar las columnas en el grupo y actualizar el diccionario de características
        grouped_df = group_df[cols_to_keep]
        features[group_df_name] = features_cols
        grouped_dfs[group_df_name] = grouped_df

    # Reconstruir el DataFrame original
    df = pd.concat(grouped_dfs.values())
    df.reset_index(drop=True, inplace=True)

    return df, grouped_dfs, features

# Aplicar la función a cada DataFrame y almacenar los resultados en las variables correspondientes
day_predict_df, day_predict_df_grouped_dfs, features_cols = process_dataframe(day_predict_df)

In [32]:
day_predict_df_grouped_dfs.keys()

dict_keys(['Business_LH_df', 'Business_SH_df', 'Economy_LH_df', 'Economy_SH_df', 'Premium Economy_LH_df'])

In [33]:
os.getcwd()

'/root/NPS forecasting model/nps_aggregated_model/Pipeline target_explainability_model'

In [34]:
satisfaction_cols = [col for col in day_predict_df.columns if col.endswith('_satisfaction')]
otp_cols = ['otp15_takeoff']
features_cols = satisfaction_cols + ['load_factor'] + otp_cols

In [35]:
import pandas as pd
import numpy as np
import pickle
import os
import pandas as pd
from darts.timeseries import TimeSeries
import os
import pickle


def compute_shap_and_prediction(row, key, features_cols):
    """
    Computes SHAP values and the predicted NPS for a given row.
    
    Parameters:
    - row_df: The DataFrame row for which to compute SHAP values and prediction.
    - key: The key identifying the specific model and scaler to use.
    - features_cols: List of column names representing features used by the model.
    
    Returns:
    - A tuple containing SHAP values as a dictionary and the predicted NPS.
    """
    # Logic to prepare the row for SHAP value computation and prediction
    aux_nps_ts = TimeSeries.from_series(pd.Series([0]))
    aux_row = pd.DataFrame(0, index=[0], columns=row.columns)
    row_df = pd.concat([aux_row, row]).reset_index(drop=True)
    
    # Load the pre-trained model and scaler
    best_tuned_model_dataframe_path = os.path.join('targets_model', f"best_tuned_dataframe_{key}.pkl")
    with open(best_tuned_model_dataframe_path, 'rb') as dataframe_file:
        best_tuned_model = pickle.load(dataframe_file)
    
    future_scaler_path = os.path.join('targets_model', f"future_scaler_{key}.pkl")
    with open(future_scaler_path, 'rb') as scaler_file:
        future_scaler = pickle.load(scaler_file)
    
    future_covariates_ts = TimeSeries.from_dataframe(row_df[features_cols])[-1:]
    future_covariates_ts_scaled = future_scaler.transform(future_covariates_ts)
    
    model_file_path = os.path.join('targets_model', f"best_tuned_mae_model_{key}_{best_tuned_model['model_name']}.pkl")
    with open(model_file_path, 'rb') as model_file:
        model = pickle.load(model_file)
    
    # Compute SHAP values and prediction
    shap_explain = ShapExplainer(model=model)
    shap_explained = shap_explain.explain(aux_nps_ts, foreground_future_covariates=future_covariates_ts_scaled)
    shap_explanation = shap_explained.get_shap_explanation_object(horizon=1)

    shap_values = shap_explanation[0].values
    base_value = shap_explanation[0].base_values
    pred_value = base_value + shap_values.sum()
    feature_names=[]
    for feat in shap_explanation.feature_names:
        name = [f for f in features_cols if f in feat]
        feature_names.append(name[0])
    
    
    # Convert SHAP values to a dictionary and adjust the logic based on your ShapExplainer
    shap_values_dict = {f"{feature}_nps": value for feature, value in zip(feature_names, shap_values)}
    shap_values_dict["out_prob_base"] = base_value,
    shap_values_dict["out_prob_nps"] = pred_value,
    
    return shap_values_dict


# Initialize a dictionary to store the augmented DataFrames
augmented_dfs = {}

for key in day_predict_df_grouped_dfs.keys():
    # Initialize a list to collect augmented rows
    augmented_rows = []

    for index in range(len(day_predict_df_grouped_dfs[key])):
        # Access the row by its index using .iloc
        row_df = day_predict_df_grouped_dfs[key].iloc[[index]]

        # Compute SHAP values and predicted NPS here...
        # Assuming `compute_shap_and_prediction` is a function you'd implement
        # This function should return SHAP values as a dict and the predicted NPS
        shap_values = compute_shap_and_prediction(row_df, key, features_cols)

        # For each feature, add its SHAP value to the row
        for feature_name, shap_value in shap_values.items():
            row_df[f'{feature_name}'] = shap_value

        # Add base value and predicted NPS columns
        # row_df['Base Value'] = shap_values['base_value']  # Adjust based on how you obtain the base value
        # row_df['Predicted NPS'] = predicted_nps
        print(key)
        print(index)
        print(row_df[['NPS_weighted', 'out_prob_nps']])

        # Append the augmented row to the list
        augmented_rows.append(row_df)
        

    # Concatenate all augmented rows to form the complete augmented DataFrame
    augmented_dfs[key] = pd.concat(augmented_rows).reset_index(drop=True)

# `augmented_dfs` now contains the augmented DataFrames with SHAP values and predictions
augmented_dfs

Business_LH_df
0
   NPS_weighted  out_prob_nps
0        40.574        39.816
Business_LH_df
1
   NPS_weighted  out_prob_nps
5        40.399        40.233
Business_LH_df
2
    NPS_weighted  out_prob_nps
10        40.175        40.233
Business_LH_df
3
    NPS_weighted  out_prob_nps
15        40.939        40.233
Business_LH_df
4
    NPS_weighted  out_prob_nps
20        40.684        40.233
Business_LH_df
5
    NPS_weighted  out_prob_nps
25        40.619        40.233
Business_LH_df
6
    NPS_weighted  out_prob_nps
30        40.715        40.233
Business_LH_df
7
    NPS_weighted  out_prob_nps
35        40.737        40.233
Business_LH_df
8
    NPS_weighted  out_prob_nps
40        40.918        40.233
Business_LH_df
9
    NPS_weighted  out_prob_nps
45        41.122        40.233
Business_LH_df
10
    NPS_weighted  out_prob_nps
50        41.537        40.568
Business_LH_df
11
    NPS_weighted  out_prob_nps
55        41.477        40.568
Business_LH_df
12
    NPS_weighted  out_prob_nps
60   

{'Business_LH_df':       start_date    end_date cabin_in_surveyed_flight haul  \
 0     2024-01-01  2024-04-08                 Business   LH   
 1     2024-01-02  2024-04-08                 Business   LH   
 2     2024-01-03  2024-04-08                 Business   LH   
 3     2024-01-04  2024-04-08                 Business   LH   
 4     2024-01-05  2024-04-08                 Business   LH   
 ...          ...         ...                      ...  ...   
 1368  2024-03-20  2024-03-24                 Business   LH   
 1369  2024-03-21  2024-03-24                 Business   LH   
 1370  2024-03-22  2024-03-24                 Business   LH   
 1371  2024-03-23  2024-03-24                 Business   LH   
 1372  2024-03-24  2024-03-24                 Business   LH   
 
       bkg_200_journey_preparation_satisfaction  pfl_100_checkin_satisfaction  \
 0                                       73.785                        81.774   
 1                                       73.716               

In [36]:
augmented_dfs['Business_LH_df'][['NPS_weighted','out_prob_nps']]

Unnamed: 0,NPS_weighted,out_prob_nps
0,40.574,39.816
1,40.399,40.233
2,40.175,40.233
3,40.939,40.233
4,40.684,40.233
...,...,...
1368,0.000,41.832
1369,0.000,37.642
1370,0.000,38.341
1371,0.000,48.370


In [37]:
    # Reconstruir el DataFrame original
df = pd.concat(augmented_dfs.values())
df.reset_index(drop=True, inplace=True)

In [38]:
df

Unnamed: 0,start_date,end_date,cabin_in_surveyed_flight,haul,bkg_200_journey_preparation_satisfaction,pfl_100_checkin_satisfaction,pfl_200_security_satisfaction,pfl_300_lounge_satisfaction,pfl_500_boarding_satisfaction,ifl_300_cabin_satisfaction,ifl_200_flight_crew_annoucements_satisfaction,ifl_600_wifi_satisfaction,ifl_500_ife_satisfaction,ifl_400_food_drink_satisfaction,ifl_100_cabin_crew_satisfaction,arr_100_arrivals_satisfaction,con_100_connections_satisfaction,loy_200_loyalty_programme_satisfaction,img_310_ease_contact_phone_satisfaction,load_factor,otp15_takeoff,NPS_weighted,bkg_200_journey_preparation_satisfaction_nps,pfl_100_checkin_satisfaction_nps,pfl_200_security_satisfaction_nps,pfl_300_lounge_satisfaction_nps,pfl_500_boarding_satisfaction_nps,ifl_300_cabin_satisfaction_nps,ifl_200_flight_crew_annoucements_satisfaction_nps,ifl_600_wifi_satisfaction_nps,ifl_500_ife_satisfaction_nps,ifl_400_food_drink_satisfaction_nps,ifl_100_cabin_crew_satisfaction_nps,arr_100_arrivals_satisfaction_nps,con_100_connections_satisfaction_nps,loy_200_loyalty_programme_satisfaction_nps,img_310_ease_contact_phone_satisfaction_nps,load_factor_nps,otp15_takeoff_nps,out_prob_base,out_prob_nps
0,2024-01-01,2024-04-08,Business,LH,73.785,81.774,82.710,75.671,77.478,75.557,81.900,48.767,71.854,68.803,81.467,80.439,71.147,72.015,60.757,90.360,83.471,40.574,-0.572,-0.178,-0.119,-0.129,-1.066,-1.082,0.009,0.715,-0.009,-0.091,-1.179,1.507,0.448,-1.416,-0.561,-0.123,0.094,43.569,39.816
1,2024-01-02,2024-04-08,Business,LH,73.716,81.729,82.753,75.687,77.494,75.471,81.904,48.729,71.898,68.790,81.461,80.506,71.171,72.155,60.832,90.402,83.439,40.399,-0.401,-0.088,-0.119,-0.129,-1.066,-1.514,0.009,0.715,-0.009,-0.091,-1.017,1.712,0.435,-1.184,-0.561,-0.123,0.094,43.569,40.233
2,2024-01-03,2024-04-08,Business,LH,73.634,81.669,82.659,75.663,77.319,75.444,81.844,48.593,71.776,68.740,81.440,80.397,71.012,72.244,60.839,90.427,83.466,40.175,-0.401,-0.088,-0.119,-0.129,-1.066,-1.514,0.009,0.715,-0.009,-0.091,-1.017,1.712,0.435,-1.184,-0.561,-0.123,0.094,43.569,40.233
3,2024-01-04,2024-04-08,Business,LH,73.819,81.727,82.683,75.824,77.543,75.656,81.906,48.635,71.929,69.018,81.617,80.502,71.031,72.168,61.111,90.366,83.685,40.939,-0.521,-0.178,-0.119,-0.129,-1.066,-1.082,0.009,0.715,-0.009,-0.091,-1.046,1.507,0.448,-1.184,-0.561,-0.123,0.094,43.569,40.233
4,2024-01-05,2024-04-08,Business,LH,73.826,81.745,82.546,75.897,77.581,75.582,81.799,48.260,71.815,68.970,81.758,80.426,70.727,72.092,60.838,90.304,83.814,40.684,-0.521,-0.178,-0.119,-0.129,-1.066,-1.082,0.009,0.715,-0.009,-0.091,-1.046,1.507,0.448,-1.184,-0.561,-0.123,0.094,43.569,40.233
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6860,2024-03-20,2024-03-24,Premium Economy,LH,59.494,79.121,77.778,93.333,80.000,61.957,75.862,42.105,67.532,43.956,66.304,75.281,75.676,69.388,63.636,97.887,87.097,0.000,-0.287,-1.402,-0.261,0.410,0.076,-4.940,-0.260,0.081,-1.095,-3.782,-3.609,-0.810,0.106,-0.090,1.453,-3.929,0.153,32.982,14.796
6861,2024-03-21,2024-03-24,Premium Economy,LH,64.912,75.385,76.923,100.000,78.125,62.121,74.603,41.860,63.793,43.077,66.667,73.438,66.667,66.667,57.143,97.447,86.567,0.000,-0.239,-1.622,-0.238,0.388,0.069,-4.967,-0.279,0.188,-1.542,-4.065,-2.623,-1.525,-0.424,-0.271,0.930,-4.002,0.038,32.982,12.799
6862,2024-03-22,2024-03-24,Premium Economy,LH,65.854,78.723,80.851,100.000,78.261,68.750,74.468,42.424,69.048,41.667,65.957,76.087,70.588,76.000,66.667,97.076,85.417,0.000,-0.190,-1.356,0.328,0.379,0.088,-4.616,-0.249,0.170,-0.876,-4.815,-3.455,-0.353,-0.248,-0.049,1.512,-4.178,-0.336,32.982,14.737
6863,2024-03-23,2024-03-24,Premium Economy,LH,68.000,82.759,89.655,100.000,82.143,70.000,75.862,45.000,76.923,50.000,68.966,82.143,90.000,80.000,100.000,95.495,86.667,0.000,-0.172,-1.310,0.709,0.279,-0.006,-4.756,-0.413,-0.094,-0.654,-0.168,-2.483,0.748,1.282,0.190,2.081,-3.896,0.049,32.982,24.368


In [39]:
df.to_csv('example_aggregated_for_20240408.csv')