In [38]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import streamlit as st
from pycaret.regression import *

In [44]:
### preprocessing 

def split_category(value):
    vals = []
    if '|' in value:
        vals = value.split('|')
    else:
        vals.append(value)
    return vals

def split_columns_with_multiple_values(data):
    data['Airline'] = data['Airline'].apply(split_category)
    data['Places Covered'] = data['Places Covered'].apply(split_category)
    data['Sightseeing Places Covered'] = data['Sightseeing Places Covered'].apply(split_category)
    data['Hotel Details'] = data['Hotel Details'].apply(split_category)
    return data

def one_hot_encoding_on_columns(data):
    dummy_type = pd.get_dummies(data['Package Type'], prefix='type')
    data.drop(columns=['Package Type'], inplace=True)
    data = pd.concat([data, dummy_type], axis=1)

    dummy_city = pd.get_dummies(data['Start City'], prefix='sc')
    data.drop(columns=['Start City'], inplace=True)
    data = pd.concat([data, dummy_city], axis=1)
    return data

def show_category(series):
    values = {}
    for val in series:
        for each in val:
            if each in values:
                values[each] += 1
            else:
                values[each] = 1
    return values

def make_feature_col(series, all_keys):
    feature_dict = {}
    for key in all_keys:
        feature_dict[key] = []

    for items in series:
        for key in all_keys:
            if key not in items:
                feature_dict[key].append(0)
            else:
                feature_dict[key].append(1)

    return pd.DataFrame(feature_dict)

def structuring_columns(data):
    change = lambda pc: [each+'_ae' for each in pc]
    A_all_keys = show_category(data['Airline']).keys()
    A_all_keys = change(A_all_keys)
    airline = make_feature_col(data['Airline'], A_all_keys)
    data = pd.concat([data, airline], axis=1)
    
    change = lambda pc: [each+'_pc' for each in pc]
    PC_all_keys = show_category(data['Places Covered']).keys()
    PC_all_keys = change(PC_all_keys)
    p_covered = make_feature_col(data['Places Covered'], PC_all_keys)
    data = pd.concat([data, p_covered], axis=1)

    change = lambda pc: [each+'_hd' for each in pc]
    HD_all_keys = show_category(data['Hotel Details']).keys()
    HD_all_keys = change(PC_all_keys)
    h_details = make_feature_col(data['Hotel Details'], HD_all_keys)
    data = pd.concat([data, h_details], axis=1)

    return data

def remove_useless_columns(data):
    data.drop('Uniq Id', inplace=True, axis=1)
    data.drop('Package Name', inplace=True, axis=1)
    data.drop('Destination', inplace=True, axis=1)
    data.drop('Cancellation Rules', inplace=True, axis=1)
    return data

def switch_hotel_rating_to_numerical(data):
    data['Hotel Details'] = data['Hotel Details'].str.replace(":One",':1.0')
    data['Hotel Details'] = data['Hotel Details'].str.replace(":Two",':2.0')
    data['Hotel Details'] = data['Hotel Details'].str.replace(":Three",':3.0')
    data['Hotel Details'] = data['Hotel Details'].str.replace(":Four",':4.0')
    data['Hotel Details'] = data['Hotel Details'].str.replace(":Five",':5.0')
    return data

def preprocess_data(data):
    data = remove_useless_columns(data)
    data = switch_hotel_rating_to_numerical(data)
    print('HOTEL AAAAAAAAAAAAAAAAAAAA')
    display(data['Hotel Details'])
    data = split_columns_with_multiple_values(data)
    data = one_hot_encoding_on_columns(data)
    data = structuring_columns(data)
    return(data)

In [45]:
train = pd.read_csv('dataset\Train.csv')
validation = pd.read_csv('dataset\Test.csv')

final_train_data = preprocess_data(train)
final_validation_data = preprocess_data(validation)

HOTEL AAAAAAAAAAAAAAAAAAAA


0                                            Not Available
1        The Orchard Retreat & Spa:4.6|WelcomHotel Pine...
2             The Ananta:4.4|juSTa Lake Nahargarh Palace:4
3        Elixir Hills Suites Resort & Spa-MMT Holidays ...
4        Dusit Thani Pattaya - MMT Special:4.5|Amari Wa...
                               ...                        
20995    Summit Thistle Villa Resort & Spa:Four|Snow Va...
20996    The Orchard Retreat & Spa:4.6|The Khyber Himal...
20997         Mount Lavinia Hotel-MMT HOLIDAYS SPECIAL:4.0
20998    Ramada by Wyndham Darjeeling Gandhi Road:4.4|T...
20999    Iceberg Hill Hotel-MMT Holidays Special:3.8|Sa...
Name: Hotel Details, Length: 21000, dtype: object

HOTEL AAAAAAAAAAAAAAAAAAAA


0                            Novotel Goa Resort & Spa:4.1
1        The Windflower Jungle Resorts & Spa Bandipur:4.4
2                                           Not Available
3                                           Not Available
4       Casino Hotel - Cgh Earth-MMT Holidays Special:...
                              ...                        
8995    Iceberg Hill Hotel-MMT Holidays Special:4|Elep...
8996    California Group of Houseboats:Three|The China...
8997    Little Mastiff Dharamshala - Unit Of Pong View...
8998    Souvenir Peppermint:3.9|Lariya Resort:4.3|Gorb...
8999                                 Hotel Sai Balaji:3.5
Name: Hotel Details, Length: 9000, dtype: object

In [41]:
### Displaying processed data

print('================TRAINING DATA==================')
display(final_train_data)
print(final_train_data.shape)

print('================VALIDATION DATA==================')
display(final_validation_data)
print(final_validation_data.shape)



Unnamed: 0,Itinerary,Places Covered,Travel Date,Hotel Details,Airline,Flight Stops,Meals,Sightseeing Places Covered,Per Person Price,type_Budget,type_Deluxe,type_Luxury,type_Premium,type_Standard,sc_Mumbai,sc_New Delhi,Not Available_ae,IndiGo_ae,Spicejet_ae,Go Air_ae,Air India_ae,Emirates_ae,Vistara_ae,Thai Airways_ae,AirAsia Indonesia_ae,Srilankan Airlines_ae,AirAsia_ae,Air India Express_ae,Malindo Air_ae,Malaysia Airlines_ae,Qantas Airways_ae,Air New Zealand_ae,Singapore Airlines_ae,Air Seychelles_ae,Etihad Airways_ae,AirAsia X_ae,Scoot_ae,Air Mauritius_ae,Oman Air_ae,Thai AirAsia_ae,Jetstar Asia_ae,Saudia_ae,Thai Vietjet Air_ae,EgyptAir_ae,Garuda Indonesia_ae,Silkair_ae,Bangkok Airways_ae,Kenya Airways_ae,Aeroflot_ae,Philippine Airlines_ae,Jetstar Airways_ae,Swiss_ae,Gulf Air_ae,Lufthansa_ae,Air Baltic_ae,Uzbekistan Airways_ae,Kuwait Airways_ae,China Southern_ae,Finnair_ae,Qatar Airways_ae,Airconnect_ae,China Eastern_ae,Norwegian_ae,British Airways_ae,Virgin Atlantic_ae,Turkish Airlines_ae,Brussels Airlines_ae,Cathay Pacific_ae,Air Austral_ae,South African Airways_ae,Ethiopian Airlines_ae,Sichuan Airlines_ae,New Delhi_pc,Shimla_pc,Manali_pc,Chandigarh_pc,Srinagar_pc,Pahalgam_pc,Udaipur_pc,Chittorgarh_pc,Munnar_pc,Kumarakom_pc,Allepey_pc,Kovalam and Poovar_pc,Pattaya_pc,Bangkok_pc,Cochin_pc,Jaipur_pc,Kasol_pc,Gulmarg_pc,Kanyakumari_pc,Guwahati_pc,Mount Abu_pc,Nainital_pc,Kausani_pc,Corbett_pc,Kodaikanal_pc,Dharamshala_pc,Ooty_pc,Mysore_pc,Agra_pc,Dubai_pc,Goa_pc,Gangtok_pc,Pelling_pc,Darjeeling_pc,Dalhousie_pc,Amritsar_pc,Coorg_pc,Bandipur_pc,Thekkady_pc,Lachung_pc,Bangalore_pc,Kandy_pc,Bentota_pc,Colombo_pc,Parwanoo_pc,Nameri_pc,Wayanad_pc,Kalimpong_pc,Bali_pc,Singapore_pc,Kovalam_pc,Negombo_pc,Nuwara Eliya_pc,Madurai_pc,Rameshwaram_pc,Leh_pc,Kullu_pc,Jodhpur_pc,Jaisalmer_pc,Port Blair_pc,Havelock_pc,Shillong_pc,Namchi_pc,Pondicherry_pc,Mahabalipuram_pc,Katra_pc,Mussoorie_pc,Abu Dhabi_pc,Rishikesh_pc,Kuala Lumpur_pc,Aurangabad_pc,Shirdi_pc,Kasauli_pc,Queenstown_pc,Kumbalgarh_pc,Ranikhet_pc,Niel Island_pc,Tirupati_pc,Kaziranga_pc,Ranthambhore_pc,Ajmer_pc,Mathura_pc,Haridwar_pc,Bikaner_pc,Auckland_pc,Rotorua_pc,Christchurch_pc,Sharjah_pc,Mauritius_pc,Praslin Island_pc,Mahe Island_pc,Poovar_pc,Varkala_pc,Pushkar_pc,Langkawi_pc,Yala_pc,Thimpu_pc,Paro_pc,Chail_pc,Tanjore_pc,Kumbakonam_pc,Tiruchirapally_pc,Patnitop_pc,Phuket_pc,Krabi town_pc,Siliguri_pc,Almora_pc,Punakha_pc,Pali_pc,Mumbai_pc,Alibagh_pc,Ganpatipule_pc,Lonavala And Khandala_pc,Sigiriya_pc,Galle_pc,Genting_pc,Bharatpur_pc,Cherrapunjee_pc,Palampur_pc,Ras Al Khaimah_pc,Penang_pc,Paris_pc,Bintan Island_pc,Nubra Valley_pc,Chintpurni_pc,Kangra_pc,Kabini_pc,Batam_pc,Lachen_pc,Dambulla_pc,Kuta_pc,Seminyak_pc,Nasik_pc,Saputara_pc,Jammu_pc,Ahmedabad_pc,Jamnagar_pc,Dwarka_pc,Somnath_pc,Anuradhapura_pc,Lucknow_pc,Varanasi_pc,Jakarta_pc,Johor Bahru_pc,Trivandrum_pc,Ranakpur_pc,Trincomalee_pc,Sonmarg_pc,Chennai_pc,Kanchipuram_pc,Nanded_pc,Ahmednagar_pc,Gokarna_pc,Melbourne_pc,Gold Coast_pc,Cairns_pc,Sydney_pc,Nawalgarh_pc,Bundi_pc,Prague_pc,Phuentsholing_pc,Moscow_pc,Saint Petersburg_pc,Hassan_pc,Hampi_pc,Franz Josef_pc,Kota Kinabalu_pc,Chikmangalur_pc,Kishangarh_pc,Mandawa_pc,Singapore Cruise_pc,Hong Kong_pc,Vadodara_pc,Joshimath_pc,Copenhagen_pc,Daman_pc,Helsinki_pc,Gwalior_pc,Orchha_pc,Khajuraho_pc,Alwar_pc,Oslo_pc
0,1N New Delhi . 2N Shimla . 2N Manali . 1N Chan...,"[New Delhi, Shimla, Manali, Chandigarh]",30-07-2021,Not Available,[Not Available],2,3,[Not Available],11509.0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1N Srinagar . 2N Pahalgam . 1N Srinagar,"[Srinagar, Pahalgam, Srinagar]",08-12-2021,The Orchard Retreat & Spa:4.6|WelcomHotel Pine...,"[IndiGo, IndiGo]",0,5,"[ Dal Lake , Avantipura Ruins , Mughal Garde...",22485.5,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2N Udaipur . 1N Chittorgarh,"[Udaipur, Chittorgarh]",26-04-2021,The Ananta:4.4|juSTa Lake Nahargarh Palace:4,[IndiGo],0,4,"[ Lake Pichola , Jag Mandir Palace , Saheliy...",12421.5,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2N Munnar . 1N Kumarakom . 1N Allepey . 2N Kov...,"[Munnar, Kumarakom, Allepey, Kovalam and Poovar]",27-08-2021,Elixir Hills Suites Resort & Spa-MMT Holidays ...,[IndiGo],0,5,"[ Mattupetty Dam , Echo Point , Tata Tea Mus...",35967.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4N Pattaya . 3N Bangkok,"[Pattaya, Bangkok]",12-12-2021,Dusit Thani Pattaya - MMT Special:4.5|Amari Wa...,"[Spicejet, Go Air]",0,5,"[ Coral Island Tour with Indian Lunch, Join Sp...",25584.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20995,2N Shimla . 3N Manali . 1N New Delhi,"[Shimla, Manali, New Delhi]",15-04-2021,Summit Thistle Villa Resort & Spa:Four|Snow Va...,[Not Available],1,3,"[ Mall road , Indira Bunglow , Himalayan Zoo...",16168.5,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
20996,1N Srinagar . 1N Gulmarg . 2N Pahalgam . 3N Sr...,"[Srinagar, Gulmarg, Pahalgam, Srinagar]",30-08-2021,The Orchard Retreat & Spa:4.6|The Khyber Himal...,"[Vistara, Vistara]",0,4,"[ Dal Lake , Gondola Point , Avantipura Ruin...",33770.5,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
20997,5N Colombo,[Colombo],27-02-2021,Mount Lavinia Hotel-MMT HOLIDAYS SPECIAL:4.0,"[IndiGo, IndiGo, IndiGo, IndiGo]",1,3,"[ Sigiriya Excursion , Nuwara Eliya Excursion ]",29964.0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
20998,2N Darjeeling . 2N Pelling . 2N Gangtok,"[Darjeeling, Pelling, Gangtok]",13-01-2022,Ramada by Wyndham Darjeeling Gandhi Road:4.4|T...,[IndiGo],1,3,"[ Tiger Hill , Himalayan Mountaineering Insti...",28145.5,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


(21000, 254)


Unnamed: 0,Itinerary,Places Covered,Travel Date,Hotel Details,Airline,Flight Stops,Meals,Sightseeing Places Covered,type_Budget,type_Deluxe,type_Luxury,type_Premium,type_Standard,sc_Mumbai,sc_New Delhi,IndiGo_ae,Not Available_ae,Emirates_ae,Spicejet_ae,Air India_ae,Lufthansa_ae,Srilankan Airlines_ae,Vistara_ae,Go Air_ae,AirAsia X_ae,Malindo Air_ae,Bangkok Airways_ae,AirAsia Indonesia_ae,AirAsia_ae,Malaysia Airlines_ae,Jetstar Asia_ae,Oman Air_ae,Air India Express_ae,Singapore Airlines_ae,Silkair_ae,Thai Airways_ae,Thai AirAsia_ae,Cathay Pacific_ae,Qantas Airways_ae,Air Mauritius_ae,Scoot_ae,EgyptAir_ae,Air New Zealand_ae,Air Austral_ae,Saudia_ae,Aeroflot_ae,Air Seychelles_ae,Etihad Airways_ae,Swiss_ae,Air Baltic_ae,Turkish Airlines_ae,Thai Vietjet Air_ae,Qatar Airways_ae,Ethiopian Airlines_ae,Garuda Indonesia_ae,Goa_pc,Bandipur_pc,Munnar_pc,Cochin_pc,Thekkady_pc,Allepey_pc,Mauritius_pc,Kaziranga_pc,Guwahati_pc,Gangtok_pc,Lachen_pc,Lachung_pc,Paris_pc,Coorg_pc,Ooty_pc,Sharjah_pc,Kuala Lumpur_pc,Johor Bahru_pc,Amritsar_pc,Mysore_pc,Dharamshala_pc,Dalhousie_pc,Darjeeling_pc,Shimla_pc,Manali_pc,Chandigarh_pc,Bangalore_pc,New Delhi_pc,Agra_pc,Jaipur_pc,Kovalam and Poovar_pc,Bali_pc,Phuket_pc,Krabi town_pc,Pattaya_pc,Bangkok_pc,Colombo_pc,Kandy_pc,Madurai_pc,Kodaikanal_pc,Rameshwaram_pc,Kanyakumari_pc,Kovalam_pc,Udaipur_pc,Mount Abu_pc,Port Blair_pc,Havelock_pc,Jodhpur_pc,Abu Dhabi_pc,Dubai_pc,Singapore_pc,Jaisalmer_pc,Kabini_pc,Pelling_pc,Chittorgarh_pc,Shillong_pc,Haridwar_pc,Kasauli_pc,Srinagar_pc,Gulmarg_pc,Pahalgam_pc,Chail_pc,Nainital_pc,Kausani_pc,Ranikhet_pc,Corbett_pc,Kumarakom_pc,Tirupati_pc,Kalimpong_pc,Bentota_pc,Galle_pc,Bikaner_pc,Katra_pc,Leh_pc,Pondicherry_pc,Anuradhapura_pc,Sigiriya_pc,Nuwara Eliya_pc,Yala_pc,Hassan_pc,Hampi_pc,Nasik_pc,Saputara_pc,Cherrapunjee_pc,Mathura_pc,Thimpu_pc,Paro_pc,Kullu_pc,Bintan Island_pc,Negombo_pc,Pushkar_pc,Shirdi_pc,Trivandrum_pc,Ajmer_pc,Langkawi_pc,Mussoorie_pc,Parwanoo_pc,Varkala_pc,Genting_pc,Niel Island_pc,Chikmangalur_pc,Rishikesh_pc,Wayanad_pc,Ranthambhore_pc,Phuentsholing_pc,Punakha_pc,Kanchipuram_pc,Palampur_pc,Auckland_pc,Queenstown_pc,Patnitop_pc,Kuta_pc,Seminyak_pc,Mahabalipuram_pc,Mandawa_pc,Kumbakonam_pc,Tanjore_pc,Tiruchirapally_pc,Nubra Valley_pc,Aurangabad_pc,Ahmednagar_pc,Lonavala And Khandala_pc,Bharatpur_pc,Christchurch_pc,Trincomalee_pc,Kasol_pc,Batam_pc,Almora_pc,Poovar_pc,Namchi_pc,Gwalior_pc,Orchha_pc,Khajuraho_pc,Jammu_pc,Ranakpur_pc,Kumbalgarh_pc,Nanded_pc,Ahmedabad_pc,Sonmarg_pc,Kota Kinabalu_pc,Siliguri_pc,Kishangarh_pc,Chintpurni_pc,Kangra_pc,Praslin Island_pc,Mahe Island_pc,Chennai_pc,Hong Kong_pc,Mumbai_pc,Gokarna_pc,Lucknow_pc,Varanasi_pc,Prague_pc,Jamnagar_pc,Dwarka_pc,Somnath_pc,Nameri_pc,Jakarta_pc,Dambulla_pc,Ganpatipule_pc,Moscow_pc,Saint Petersburg_pc,Alwar_pc,Bundi_pc,Copenhagen_pc,Vadodara_pc,Pali_pc,Joshimath_pc,Nawalgarh_pc,Daman_pc,Ras Al Khaimah_pc,Franz Josef_pc,Rotorua_pc,Penang_pc,Melbourne_pc,Gold Coast_pc,Cairns_pc,Sydney_pc,30-01-2021_pc,Alibagh_pc
0,5N Goa,[Goa],13-07-2021,Novotel Goa Resort & Spa:4.1,[IndiGo],0,4,[Not Available],0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2N Bandipur,[Bandipur],20-03-2021,The Windflower Jungle Resorts & Spa Bandipur:4.4,[Not Available],0,4,[ Bandipur National Park ],0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,3N Munnar,[Munnar],09-02-2022,Not Available,[Not Available],2,3,"[ Old Munnar Town , Eravikulam national park ...",0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3N Munnar,[Munnar],16-05-2021,Not Available,[Not Available],1,3,"[ Tata Tea Museum , Eravikulam national park ...",0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1N Cochin . 2N Munnar . 1N Thekkady . 1N Allepey,"[Cochin, Munnar, Thekkady, Allepey]",13-10-2021,Casino Hotel - Cgh Earth-MMT Holidays Special:...,[IndiGo],0,4,"[ Fort Cochin , Dutch Palace , Jewish Synago...",0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8995,2N Munnar . 1N Thekkady . 1N Allepey,"[Munnar, Thekkady, Allepey]",04-06-2021,Iceberg Hill Hotel-MMT Holidays Special:4|Elep...,[Vistara],2,3,"[ Valara Waterfalls , Tea Plantation at Devik...",0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8996,1N Srinagar . 1N Pahalgam . 1N Srinagar . 1N G...,"[Srinagar, Pahalgam, Srinagar, Gulmarg]",07-03-2021,California Group of Houseboats:Three|The China...,[IndiGo],1,3,"[ Dal Lake , Pahalgam , Avantipura Ruins , ...",0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8997,3N Dharamshala . 2N Dalhousie,"[Dharamshala, Dalhousie]",21-08-2021,Little Mastiff Dharamshala - Unit Of Pong View...,[Vistara],0,4,"[ Dalai Lama temple , Bhagsu falls , Church ...",0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8998,2N Jaipur . 1N Jodhpur . 2N Jaisalmer . 2N Uda...,"[Jaipur, Jodhpur, Jaisalmer, Udaipur]",19-03-2021,Souvenir Peppermint:3.9|Lariya Resort:4.3|Gorb...,"[IndiGo, IndiGo]",1,3,"[ City Palace , Hawa Mahal , Jantar Mantar ,...",0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


(9000, 235)


In [42]:
### Saving processed data to csv

final_train_data.to_csv('dataset\Train_prepared.csv') 
final_validation_data.to_csv('dataset\Test_prepared.csv')