In [807]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_log_error
import pandas_profiling as pp
from sklearn.linear_model import Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV
import xgboost
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [808]:
pd.set_option('display.max_columns', 1000)

In [809]:
def onehotencode(data,col_name,prefix):
    return pd.concat([data, pd.get_dummies(data[col_name], prefix=prefix)],axis=1)

In [810]:
def encode_total_stops(data):
    data['Encoded_stops'] = 0
    for i in range(0,len(data['Total_Stops'])):
        if data.loc[i, 'Total_Stops'] == '1 stop':
            data.loc[i,'Encoded_stops'] = 1
        elif data.loc[i, 'Total_Stops'] == '2 stops':
            data.loc[i,'Encoded_stops'] = 2
        elif data.loc[i, 'Total_Stops'] == '3 stops' or data.loc[i, 'Total_Stops'] == '':
            data.loc[i,'Encoded_stops'] = 3
        elif data.loc[i, 'Total_Stops'] == '4 stops':
            data.loc[i,'Encoded_stops'] = 4
        elif data.loc[i, 'Total_Stops'] == 'non-stop':
            data.loc[i,'Encoded_stops'] = 0

    return data

In [811]:
def extract_journey_data(data):
    data['Journey_Day'] = pd.DataFrame(pd.to_datetime(data['Date_of_Journey'], format='%d/%m/%Y').dt.day)
    data['Journey_Month'] = pd.DataFrame(pd.to_datetime(data['Date_of_Journey'], format='%d/%m/%Y').dt.month)
    data['Journey_day_of_week'] =  pd.DataFrame(((pd.to_datetime(data['Date_of_Journey'], format='%d/%m/%Y').dt.weekday_name)))
    data['Journey_over_weekend'] =  pd.DataFrame(((pd.to_datetime(data['Date_of_Journey'], format='%d/%m/%Y').dt.dayofweek) // 5).astype('int'))
    data['Date_Lapse'] = ((pd.to_datetime(data['Date_of_Journey'], format='%d/%m/%Y')) - 
                                        (pd.to_datetime('01/03/2019', format='%d/%m/%Y')))/np.timedelta64(1,'D')
    #data['is_friday'] = 0
    #data.loc[data['Journey_day_of_week'] == 4, 'is_friday'] = 1
    return data

In [812]:
def encode_duration(data):
    data['Duration_Type'] = 0
    for i in range(0, len(data)):
    
        split_len = data.loc[i,'Duration'].split(' ')
        if len(split_len) == 1:
            if str(data.loc[i,'Duration']).find('h') != -1:
#                print ("hours: ",data.loc[i,'Duration'])
                hours = int(split_len[0].replace('h',""))
                mins = 0
            else:
                hours = 4 
#                print ("mins: ",data.loc[i,'Duration']
                mins = int(split_len[0].replace('m',""))
        else:
            
            hours = int(split_len[0].replace('h',""))
            mins  = int(split_len[1].replace('m',""))
        
        data.loc[i,'Duration_Type'] = (hours*60 + mins)/1440
        
    return data

In [813]:
def flight_hours(data):
    data['Fly_Hours'] = 0
    
    data_Dep_Time = data['Dep_Time'].str.split(":",n=1,expand=True).astype('int64')
    data_Dep_Time.columns = ['Hours','Mins']
    for i in range(0,len(data)):
        
        hr = data_Dep_Time.loc[i, 'Hours']
        min = data_Dep_Time.loc[i, 'Mins']
        data.loc[i,'Fly_Hours'] = (hr*60 + (min))/1440
            
    return data

In [814]:
def arrival_hours(data):
    data['Arrival_Hours'] = 0
    for i in range(0,len(data)):
      
        data_Arr_Time = data.loc[i,'Arrival_Time'].split(" ")[0]
        data_Dep_Time = data_Arr_Time.split(":")
        
        hr = int(data_Dep_Time[0])
        min = int(data_Dep_Time[1])
        data.loc[i,'Arrival_Hours'] = (hr*60 + (min))/1440
            
    return data

In [815]:
def log_transform(data):
    return np.log10(data)

# Data cleaning and Modeling starts here

In [816]:
data = pd.read_excel('C:\\Users\\LENOVO\\Desktop\\Flight_Ticket_Participant_Datasets\\Data_Train.xlsx')

In [817]:
test = pd.read_excel('C:\\Users\\LENOVO\\Desktop\\Flight_Ticket_Participant_Datasets\\Test_set.xlsx')

In [818]:
data_copy = data
test_copy = test

In [819]:
data_copy.shape

(10683, 11)

In [820]:
#data_copy.drop_duplicates(keep='first', inplace=True)

In [821]:
data_copy.shape

(10683, 11)

In [822]:
#Drop anamolies.
#data_copy = data_copy[data_copy['Price'] > data["Price"].quantile(0.01)]
#data_copy = data_copy[data_copy['Price'] < data["Price"].quantile(0.99)]
data_copy = data_copy[data_copy['Airline'] != 'Trujet']
data_copy = data_copy[data_copy['Additional_Info'] != 'Red-eye flight']

In [823]:
#data_copy.loc[data_copy['Airline'] == 'Jet Airways Business','Airline'] = 'Jet Airways'
#data_copy.loc[data_copy['Airline'] == 'Multiple carriers Premium economy','Airline'] = 'Multiple carriers'
#data_copy.loc[data_copy['Airline'] == 'Vistara Premium economy','Airline'] = 'Vistara'

In [824]:
#data_copy.loc[data_copy['Destination'] == 'New Delhi','Destination'] = 'Delhi'

In [825]:
data_copy.reset_index(inplace=True)

In [826]:
data_copy = data_copy[data_copy['Airline'] != 'Trujet']

In [827]:
data_copy = onehotencode(data_copy,'Airline',"Airline_")

In [828]:
data_copy.columns

Index(['index', 'Airline', 'Date_of_Journey', 'Source', 'Destination', 'Route',
       'Dep_Time', 'Arrival_Time', 'Duration', 'Total_Stops',
       'Additional_Info', 'Price', 'Airline__Air Asia', 'Airline__Air India',
       'Airline__GoAir', 'Airline__IndiGo', 'Airline__Jet Airways',
       'Airline__Jet Airways Business', 'Airline__Multiple carriers',
       'Airline__Multiple carriers Premium economy', 'Airline__SpiceJet',
       'Airline__Vistara', 'Airline__Vistara Premium economy'],
      dtype='object')

In [829]:
data_copy = onehotencode(data_copy,'Source',"Source_")

In [830]:
data_copy = onehotencode(data_copy,'Destination',"Destination_")

In [831]:
#handle No Info, No info values before one hot encoding.
data_copy.loc[data_copy['Additional_Info'] == 'No info','Additional_Info'] = 'No Info'
data_copy.loc[data_copy['Additional_Info'] == '1 Long layover','Additional_Info'] = 'Layover'
data_copy.loc[data_copy['Additional_Info'] == '1 Short layover','Additional_Info'] = 'Layover'
data_copy.loc[data_copy['Additional_Info'] == '2 Long layover','Additional_Info'] = 'Layover'
data_copy.loc[data_copy['Additional_Info'] == 'Business class','Additional_Info'] = 'Business_Class'
data_copy.loc[data_copy['Additional_Info'] == 'Change airports','Additional_Info'] = 'Airport_Changed'


data_copy = onehotencode(data_copy,'Additional_Info',"Additional_Info_")

In [832]:
data_copy = encode_total_stops(data_copy)

In [833]:
data_copy = extract_journey_data(data_copy)

In [834]:
data_copy = onehotencode(data_copy,'Journey_day_of_week',"Dayis_")

In [835]:
data_copy.head(2)

Unnamed: 0,index,Airline,Date_of_Journey,Source,Destination,Route,Dep_Time,Arrival_Time,Duration,Total_Stops,Additional_Info,Price,Airline__Air Asia,Airline__Air India,Airline__GoAir,Airline__IndiGo,Airline__Jet Airways,Airline__Jet Airways Business,Airline__Multiple carriers,Airline__Multiple carriers Premium economy,Airline__SpiceJet,Airline__Vistara,Airline__Vistara Premium economy,Source__Banglore,Source__Chennai,Source__Delhi,Source__Kolkata,Source__Mumbai,Destination__Banglore,Destination__Cochin,Destination__Delhi,Destination__Hyderabad,Destination__Kolkata,Destination__New Delhi,Additional_Info__Airport_Changed,Additional_Info__Business_Class,Additional_Info__In-flight meal not included,Additional_Info__Layover,Additional_Info__No Info,Additional_Info__No check-in baggage included,Encoded_stops,Journey_Day,Journey_Month,Journey_day_of_week,Journey_over_weekend,Date_Lapse,Dayis__Friday,Dayis__Monday,Dayis__Saturday,Dayis__Sunday,Dayis__Thursday,Dayis__Tuesday,Dayis__Wednesday
0,0,IndiGo,24/03/2019,Banglore,New Delhi,BLR → DEL,22:20,01:10 22 Mar,2h 50m,non-stop,No Info,3897,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,24,3,Sunday,1,23.0,0,0,0,1,0,0,0
1,1,Air India,1/05/2019,Kolkata,Banglore,CCU → IXR → BBI → BLR,05:50,13:15,7h 25m,2 stops,No Info,7662,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,2,1,5,Wednesday,0,61.0,0,0,0,0,0,0,1


In [836]:
#data_copy['Month_Start'] = 0
#data_copy.loc[data_copy['Journey_Day'] == 1,'Month_Start'] = 1
#data_copy.loc[data_copy['Journey_Day'] == 3,'Month_Start'] = 1
#data_copy.loc[data_copy['Journey_Day'] == 6,'Month_Start'] = 1
#data_copy.loc[data_copy['Journey_Day'] == 9,'Month_Start'] = 1

In [837]:
#data_copy['is_event'] = 0
#data_copy['is_event'] = np.where((((data_copy['Journey_Month'] == 3) & (data_copy['Journey_Day'] == 24)) |
#                                 ((data_copy['Journey_Month'] == 4) & (data_copy['Journey_Day'] == 21)) |
#                                 ((data_copy['Journey_Month'] == 5) & (data_copy['Journey_Day'] == 18))),1,0)

In [838]:
#data_copy['is_event'].value_counts()

In [839]:
data_copy = encode_duration(data_copy)

In [840]:
data_copy = flight_hours(data_copy)

In [841]:
data_copy = arrival_hours(data_copy)

In [842]:
data_copy = onehotencode(data_copy,'Encoded_stops',"No_of_stops_")

In [843]:
#data_copy = onehotencode(data_copy,'Fly_Hours',"Fly_Hours_")

In [844]:
data_final = data_copy

In [845]:
data_final.loc[data_final['Encoded_stops'].isnull(),'Encoded_stops'] = 0.3

In [846]:
data_final.drop(['Airline','Date_of_Journey','Source','Destination','Route','Dep_Time','Arrival_Time','Duration',
                'Total_Stops','Additional_Info','Journey_day_of_week','Encoded_stops'],axis=1,inplace=True)
#                'Total_Stops','Additional_Info','Fly_Hours'],axis=1,inplace=True)

In [847]:
data_final.drop(['index'],axis=1,inplace=True)

In [848]:
train_Feature = data_copy[[x for x in data_final.columns if (x != 'Price' and x != 'index')]]
train_Target = data_copy['Price']

In [849]:
X_train, X_test, y_train, y_test = train_test_split(train_Feature, train_Target, test_size=0.30, random_state=101)

In [850]:
null_columns=X_train.columns[X_train.isnull().any()]
X_train[null_columns].isnull().sum()

Series([], dtype: float64)

In [851]:
X_train.head()

Unnamed: 0,Airline__Air Asia,Airline__Air India,Airline__GoAir,Airline__IndiGo,Airline__Jet Airways,Airline__Jet Airways Business,Airline__Multiple carriers,Airline__Multiple carriers Premium economy,Airline__SpiceJet,Airline__Vistara,Airline__Vistara Premium economy,Source__Banglore,Source__Chennai,Source__Delhi,Source__Kolkata,Source__Mumbai,Destination__Banglore,Destination__Cochin,Destination__Delhi,Destination__Hyderabad,Destination__Kolkata,Destination__New Delhi,Additional_Info__Airport_Changed,Additional_Info__Business_Class,Additional_Info__In-flight meal not included,Additional_Info__Layover,Additional_Info__No Info,Additional_Info__No check-in baggage included,Journey_Day,Journey_Month,Journey_over_weekend,Date_Lapse,Dayis__Friday,Dayis__Monday,Dayis__Saturday,Dayis__Sunday,Dayis__Thursday,Dayis__Tuesday,Dayis__Wednesday,Duration_Type,Fly_Hours,Arrival_Hours,No_of_stops__0,No_of_stops__1,No_of_stops__2,No_of_stops__3,No_of_stops__4
9541,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,27,4,1,57.0,0,0,1,0,0,0,0,0.131944,0.611111,0.743056,1,0,0,0,0
3798,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,21,3,0,20.0,0,0,0,0,1,0,0,1.083333,0.71875,0.802083,0,0,1,0,0
7863,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,27,6,0,118.0,0,0,0,0,1,0,0,0.121528,0.465278,0.586806,1,0,0,0,0
3750,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,15,6,1,106.0,0,0,1,0,0,0,0,0.104167,0.829861,0.934028,1,0,0,0,0
6726,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,5,0,61.0,0,0,0,0,0,0,1,0.0625,0.381944,0.444444,1,0,0,0,0


In [852]:
scaler = StandardScaler()

In [853]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=train_Feature.columns)

In [854]:
X_test =  pd.DataFrame(scaler.transform(X_test),columns=train_Feature.columns)

# Final Prediction on Test set starts here

In [855]:
#test_copy.loc[test_copy['Airline'] == 'Jet Airways Business','Airline'] = 'Jet Airways'
#test_copy.loc[test_copy['Airline'] == 'Multiple carriers Premium economy','Airline'] = 'Multiple carriers'
#test_copy.loc[test_copy['Airline'] == 'Vistara Premium economy','Airline'] = 'Vistara'

In [856]:
#test_copy.loc[test_copy['Destination'] == 'New Delhi','Destination'] = 'Delhi'

In [857]:
test_copy = onehotencode(test_copy,'Airline',"Airline_")

In [858]:
test_copy = onehotencode(test_copy,'Source',"Source_")

In [859]:
test_copy = onehotencode(test_copy,'Destination',"Destination_")

In [860]:
#handle No Info, No info values before one hot encoding.
test_copy.loc[test_copy['Additional_Info'] == 'No info','Additional_Info'] = 'No Info'
test_copy.loc[test_copy['Additional_Info'] == '1 Long layover','Additional_Info'] = 'Layover'
test_copy.loc[test_copy['Additional_Info'] == '1 Short layover','Additional_Info'] = 'Layover'
test_copy.loc[test_copy['Additional_Info'] == '1 Long layover','Additional_Info'] = 'Layover'
test_copy.loc[test_copy['Additional_Info'] == 'Business class','Additional_Info'] = 'Business_Class'
test_copy.loc[test_copy['Additional_Info'] == 'Change airports','Additional_Info'] = 'Airport_Changed'

test_copy = onehotencode(test_copy,'Additional_Info',"Additional_Info_")

In [861]:
test_copy = encode_total_stops(test_copy)

In [862]:
test_copy = extract_journey_data(test_copy)

In [863]:
#test_copy['Month_Start'] = 0
#test_copy.loc[test_copy['Journey_Day'] == 1,'Month_Start'] = 1
#test_copy.loc[test_copy['Journey_Day'] == 3,'Month_Start'] = 1
#test_copy.loc[test_copy['Journey_Day'] == 6,'Month_Start'] = 1
#test_copy.loc[test_copy['Journey_Day'] == 9,'Month_Start'] = 1

In [864]:
#test_copy['is_event'] = 0
#test_copy['is_event'] = np.where((((test_copy['Journey_Month'] == 3) & (test_copy['Journey_Day'] == 24)) |
#                                 ((test_copy['Journey_Month'] == 4) & (test_copy['Journey_Day'] == 21)) |
#                                 ((test_copy['Journey_Month'] == 5) & (test_copy['Journey_Day'] == 18))),1,0)

In [865]:
test_copy = encode_duration(test_copy)

In [866]:
test_copy = flight_hours(test_copy)

In [867]:
test_copy = arrival_hours(test_copy)

In [868]:
test_copy = onehotencode(test_copy,'Journey_day_of_week',"Dayis_")

In [869]:
test_copy = onehotencode(test_copy,'Encoded_stops',"No_of_stops_")

In [870]:
#test_copy = onehotencode(test_copy,'Fly_Hours',"Fly_Hours_")

In [871]:
test_final = test_copy

In [872]:
data_copy.shape

(10681, 48)

In [873]:
[X for X in data_copy.columns if X not in test_final.columns]

['Price']

In [874]:
test_copy.head()

Unnamed: 0,Airline,Date_of_Journey,Source,Destination,Route,Dep_Time,Arrival_Time,Duration,Total_Stops,Additional_Info,Airline__Air Asia,Airline__Air India,Airline__GoAir,Airline__IndiGo,Airline__Jet Airways,Airline__Jet Airways Business,Airline__Multiple carriers,Airline__Multiple carriers Premium economy,Airline__SpiceJet,Airline__Vistara,Airline__Vistara Premium economy,Source__Banglore,Source__Chennai,Source__Delhi,Source__Kolkata,Source__Mumbai,Destination__Banglore,Destination__Cochin,Destination__Delhi,Destination__Hyderabad,Destination__Kolkata,Destination__New Delhi,Additional_Info__Airport_Changed,Additional_Info__Business_Class,Additional_Info__In-flight meal not included,Additional_Info__Layover,Additional_Info__No Info,Additional_Info__No check-in baggage included,Encoded_stops,Journey_Day,Journey_Month,Journey_day_of_week,Journey_over_weekend,Date_Lapse,Duration_Type,Fly_Hours,Arrival_Hours,Dayis__Friday,Dayis__Monday,Dayis__Saturday,Dayis__Sunday,Dayis__Thursday,Dayis__Tuesday,Dayis__Wednesday,No_of_stops__0,No_of_stops__1,No_of_stops__2,No_of_stops__3,No_of_stops__4
0,Jet Airways,6/06/2019,Delhi,Cochin,DEL → BOM → COK,17:30,04:25 07 Jun,10h 55m,1 stop,No Info,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,6,6,Thursday,0,97.0,0.454861,0.729167,0.184028,0,0,0,0,1,0,0,0,1,0,0,0
1,IndiGo,12/05/2019,Kolkata,Banglore,CCU → MAA → BLR,06:20,10:20,4h,1 stop,No Info,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,12,5,Sunday,1,72.0,0.166667,0.263889,0.430556,0,0,0,1,0,0,0,0,1,0,0,0
2,Jet Airways,21/05/2019,Delhi,Cochin,DEL → BOM → COK,19:15,19:00 22 May,23h 45m,1 stop,In-flight meal not included,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,21,5,Tuesday,0,81.0,0.989583,0.802083,0.791667,0,0,0,0,0,1,0,0,1,0,0,0
3,Multiple carriers,21/05/2019,Delhi,Cochin,DEL → BOM → COK,08:00,21:00,13h,1 stop,No Info,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,21,5,Tuesday,0,81.0,0.541667,0.333333,0.875,0,0,0,0,0,1,0,0,1,0,0,0
4,Air Asia,24/06/2019,Banglore,Delhi,BLR → DEL,23:55,02:45 25 Jun,2h 50m,non-stop,No Info,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,24,6,Monday,0,115.0,0.118056,0.996528,0.114583,0,1,0,0,0,0,0,1,0,0,0,0


In [876]:

#data_copy['Encoded_stops'].unique()

In [877]:
test_final.drop(['Airline','Date_of_Journey','Source','Destination','Route','Dep_Time','Arrival_Time','Duration',
                'Total_Stops','Additional_Info','Journey_day_of_week','Encoded_stops'],axis=1,inplace=True)
#                'Total_Stops','Additional_Info','Fly_Hours'],axis=1,inplace=True)

In [878]:
test_final =  pd.DataFrame(scaler.transform(test_final),columns=test_final.columns)

In [879]:
test_final.shape

(2671, 47)

In [880]:
train_Feature.shape

(10681, 47)

In [881]:
y_train_log =  y_train.apply(log_transform)

In [882]:
y_test_log =  y_test.apply(log_transform)

## RandomSearch

In [883]:
rf = RandomForestRegressor(bootstrap=True, max_depth=100,
                           max_features='auto',min_samples_leaf=1,
                           min_samples_split=2,n_estimators=1400, random_state=101)

In [884]:
rf.fit(X_train, y_train_log)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=100,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=1400, n_jobs=1,
           oob_score=False, random_state=101, verbose=0, warm_start=False)

In [885]:
rf.feature_importances_

array([  1.77187130e-03,   7.62668416e-03,   1.67453538e-03,
         8.97295005e-03,   6.12047071e-02,   1.40820308e-03,
         1.73259471e-02,   1.27005044e-03,   5.78060189e-03,
         6.46708548e-03,   3.84714944e-05,   1.06897730e-03,
         9.39805035e-04,   2.14968679e-03,   3.51418169e-03,
         2.48753909e-03,   3.51201528e-03,   2.14642345e-03,
         1.11208374e-03,   2.70498942e-03,   8.68932015e-04,
         2.41817953e-03,   8.60698098e-06,   1.79222135e-04,
         4.39716701e-02,   6.05427775e-05,   6.00976546e-03,
         6.96031206e-05,   9.00136275e-03,   6.84092922e-03,
         3.52576330e-03,   9.26994122e-02,   3.34692537e-03,
         1.43824688e-03,   1.19469365e-03,   3.00896963e-03,
         1.45447521e-03,   6.39840349e-04,   1.40210297e-03,
         6.10078211e-01,   2.58229777e-02,   2.79289940e-02,
         4.86594414e-04,   2.26514495e-02,   1.61524259e-03,
         9.32065324e-05,   7.27096171e-06])

In [886]:
np.sqrt(mean_squared_log_error(y_test,((10**rf.predict(X_test)))))

0.12184552523097021

In [887]:
np.sqrt(mean_squared_log_error(y_train,10**rf.predict(X_train)))

0.052987078986571907

In [888]:
X_train.columns[rf.feature_importances_ >  np.mean(rf.feature_importances_)]

Index(['Airline__Jet Airways', 'Additional_Info__In-flight meal not included',
       'Date_Lapse', 'Duration_Type', 'Fly_Hours', 'Arrival_Hours',
       'No_of_stops__1'],
      dtype='object')

In [889]:
test_pred = 10**rf.predict(test_final)

In [890]:
test_pred = pd.DataFrame(test_pred,columns=['Price'])

In [891]:
test_pred.to_excel("C:/Users/LENOVO/Desktop/RF_v2.xlsx",index=False)