In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from collections import deque
import random
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Reinforcement Learning for Dynamic Flight Pricing

This notebook implements a reinforcement learning (RL) approach to dynamic flight pricing, incorporating causal factors like fuel costs, holidays, weather, and competitor pricing.

## Data Preprocessing

In [None]:
class DataProcessor:
    def __init__(self):
        self.state_scaler = StandardScaler()
        self.price_scaler = MinMaxScaler(feature_range=(0.1, 0.9))
        self.demand_scaler = StandardScaler()
        self.route_encoder = {}
        self.airline_encoder = {}
        self.aircraft_encoder = {}
        self.weather_encoder = {}
        self.holiday_encoder = {}

    def fit(self, historical_data, fuel_prices, climate_data, holiday_data):
        self.state_scaler.fit(self._extract_state_features(historical_data, historical_data['Date'].min()))
        self.demand_scaler.fit(historical_data[['Demand']])
        self.route_encoder = {route: i for i, route in enumerate(historical_data['Route'].unique())}
        self.airline_encoder = {airline: i for i, airline in enumerate(historical_data['Airline'].unique())}
        self.aircraft_encoder = {aircraft: i for i, aircraft in enumerate(historical_data['AircraftType'].unique())}
        self.weather_encoder = {weather: i for i, weather in enumerate(climate_data['WeatherCondition'].unique())}
        self.holiday_encoder = {holiday: i for i, holiday in enumerate(holiday_data['HolidayName'].unique())}

    def transform(self, historical_data, fuel_prices, climate_data, holiday_data):
        historical_data['Date'] = pd.to_datetime(historical_data['Date'])
        fuel_prices['Date'] = pd.to_datetime(fuel_prices['Date'])
        climate_data['Date'] = pd.to_datetime(climate_data['Date'])
        holiday_data['Date'] = pd.to_datetime(holiday_data['Date'])

        historical_data['DayOfWeek'] = historical_data['Date'].dt.dayofweek
        historical_data['Month'] = historical_data['Date'].dt.month
        historical_data['IsWeekend'] = (historical_data['DayOfWeek'] >= 5).astype(int)
        historical_data['Season'] = historical_data['Month'].map(self._get_season)

        historical_data = historical_data.fillna({
            'Demand': historical_data['Demand'].mean(),
            'Price': historical_data['Price'].mean(),
            'Capacity': historical_data['Capacity'].mode()[0]
        })
        fuel_prices = fuel_prices.fillna(fuel_prices.mean())
        climate_data = climate_data.fillna(method='ffill')
        holiday_data = holiday_data.fillna('None')

        historical_data = self._remove_outliers(historical_data, ['Price', 'Demand'])
        historical_data = self._merge_data(historical_data, fuel_prices, climate_data, holiday_data)
        return historical_data

    def _get_season(self, month):
        if month in [12, 1, 2]:
            return 'Winter'
        elif month in [3, 4, 5]:
            return 'Spring'
        elif month in [6, 7, 8]:
            return 'Summer'
        else:
            return 'Fall'

    def _remove_outliers(self, df, columns, n_std=3):
        for column in columns:
            mean = df[column].mean()
            std = df[column].std()
            df = df[(df[column] <= mean + (n_std * std)) &
                   (df[column] >= mean - (n_std * std))]
        return df

    def _extract_state_features(self, historical_data, current_date):
        features = []
        for _, row in historical_data.iterrows():
            features.append([
                row['DayOfWeek'], row['Month'], row['IsWeekend'], row['Season'],
                self.route_encoder.get(row['Route'], -1),
                self.airline_encoder.get(row['Airline'], -1),
                self.aircraft_encoder.get(row['AircraftType'], -1),
                row.get('FuelPrice', np.nan),
                row.get('Temperature', np.nan),
                self.weather_encoder.get(row.get('WeatherCondition', 'Unknown'), -1),
                row.get('IsHoliday', 0),
                row['Demand_MA7'],
                row['PriceElasticity']
            ])
        features = np.array(features)
        features = np.nan_to_num(features, nan=0)
        return features

    def _merge_data(self, historical_data, fuel_prices, climate_data, holiday_data):
        historical_data = pd.merge(historical_data, fuel_prices, on='Date', how='left')
        historical_data['Origin'] = historical_data['Route'].apply(lambda x: x.split('-')[0])
        historical_data = pd.merge(historical_data, climate_data, left_on=['Date', 'Origin'],
                                  right_on=['Date', 'Location'], how='left')
        historical_data.drop('Location', axis=1, inplace=True)
        historical_data = pd.merge(historical_data, holiday_data, left_on=['Date', 'Origin'],
                                  right_on=['Date', 'Location'], how='left')
        historical_data['IsHoliday'] = (historical_data['HolidayName'].notna()).astype(int)
        historical_data.drop(['Location', 'HolidayName'], axis=1, inplace=True)
        return historical_data
    
    def scale_prices(self, prices):
        max_historical_price = self.historical_price_max
        prices = np.clip(prices, 0, max_historical_price)
        return self.price_scaler.transform(prices.reshape(-1, 1)).flatten()

    def inverse_scale_prices(self, scaled_prices):
        return self.price_scaler.inverse_transform(scaled_prices.reshape(-1, 1)).flatten()

## Environment Implementation

In [None]:
class ImprovedAirlinePricingEnv:
    def __init__(self, historical_data, fuel_prices, climate_data, holiday_data, data_processor):
        self.data_processor = data_processor
        self.historical_data = self.data_processor.transform(historical_data, fuel_prices, climate_data, holiday_data)
        self.routes = self.historical_data['Route'].unique()
        self.airlines = self.historical_data['Airline'].unique()
        self.aircraft_types = self.historical_data['AircraftType'].unique()
        self.current_date = self.historical_data['Date'].min()
        self.max_days_ahead = 90
        self.simulation_length_days = 365
        self.seats_capacity = 150
        self.prices = {}
        self.seats_sold = {}
        self.data_processor.historical_price_max = self.historical_data['Price'].max()
        self.data_processor.price_scaler.fit(self.historical_data['Price'].values.reshape(-1,1))
        self.reset()

    def reset(self, historical_data=None):
        if historical_data is None:
            self.current_date = self.historical_data['Date'].min()
            historical_data = self.historical_data
        else:
            self.current_date = historical_data['Date'].min()
        self.current_step = 0
        self.prices = {}
        self.seats_sold = {}
        initial_state = self._get_state(historical_data)
        return initial_state

    def _get_state(self, historical_data):
        state = []
        for route in self.routes:
            for airline in self.airlines:
                for days_ahead in range(self.max_days_ahead + 1):
                    flight_date = self.current_date + pd.Timedelta(days=days_ahead)
                    data_slice = historical_data[
                        (historical_data['Date'] == flight_date) &
                        (historical_data['Route'] == route) &
                        (historical_data['Airline'] == airline)
                    ]

                    if data_slice.empty:
                        day_of_week = flight_date.weekday()
                        month = flight_date.month
                        is_weekend = 1 if day_of_week >= 5 else 0
                        season = self.data_processor._get_season(month)
                        fuel_price = self.historical_data['FuelPrice'].mean()
                        temperature = 25
                        weather_condition = 'Sunny'
                        is_holiday = 0
                        demand_ma7 = self.historical_data['Demand_MA7'].mean()
                        price_elasticity = 0
                        seats_sold = 0
                        current_price = 0
                    else:
                        row = data_slice.iloc[0]
                        day_of_week = int(row['DayOfWeek'])
                        month = int(row['Month'])
                        is_weekend = int(row['IsWeekend'])
                        season = row['Season']
                        fuel_price = float(row['FuelPrice'])
                        temperature = float(row['Temperature'])
                        weather_condition = row['WeatherCondition']
                        is_holiday = int(row['IsHoliday'])
                        demand_ma7 = float(row['Demand_MA7'])
                        price_elasticity = float(row['PriceElasticity'])
                        seats_sold = self.seats_sold.get((route, airline, flight_date), 0)
                        current_price = self.prices.get((route, airline, flight_date), 0)

                    route_encoded = self._one_hot_encode(route, self.data_processor.route_encoder)
                    airline_encoded = self._one_hot_encode(airline, self.data_processor.airline_encoder)
                    aircraft_type = self.historical_data[(self.historical_data['Route'] == route) & (self.historical_data['Airline'] == airline)]['AircraftType'].mode()[0]
                    aircraft_encoded = self._one_hot_encode(aircraft_type, self.data_processor.aircraft_encoder)
                    weather_encoded = self._one_hot_encode(weather_condition, self.data_processor.weather_encoder)
                    season_encoded = self._one_hot_encode(season, ['Winter', 'Spring', 'Summer', 'Fall'])

                    remaining_capacity = self.seats_capacity - seats_sold

                    state.extend([days_ahead, day_of_week, month, is_weekend, fuel_price, temperature,
                                  is_holiday, remaining_capacity, current_price, demand_ma7, price_elasticity])
                    state.extend(route_encoded)
                    state.extend(airline_encoded)
                    state.extend(aircraft_encoded)
                    state.extend(weather_encoded)
                    state.extend(season_encoded)

        state = np.array(state, dtype=np.float32)
        state = self.data_processor.state_scaler.transform(state.reshape(1, -1)).flatten()
        return state

    def _one_hot_encode(self, value, encoder):
        encoding = [0] * len(encoder)
        index = encoder.get(value)
        if index is not None:
            encoding[index] = 1
        return encoding

    def _get_demand(self, route, airline, flight_date, price):
        historical_subset = self.historical_data[
            (self.historical_data['Route'] == route) & (self.historical_data['Airline'] == airline)
        ]

        if historical_subset.empty:
            base_demand = self.historical_data['Demand'].mean()
            price_elasticity = -0.5
        else:
            closest_data = historical_subset.iloc[(historical_subset['Date'] - flight_date).abs().argsort()[:1]]
            if closest_data.empty:
                base_demand = self.historical_data['Demand'].mean()
                price_elasticity = -0.5
            else:
                base_demand = closest_data['Demand_MA7'].values[0]
                price_elasticity = closest_data['PriceElasticity'].values[0]

        seasonal_index = self.seasonal_indices.get((route, airline), {}).get(
            self.data_processor._get_season(flight_date.month), 1.0
        )
        base_demand *= seasonal_index

        is_holiday = self.historical_data[(self.historical_data['Date'] == flight_date)]['IsHoliday'].values
        if len(is_holiday) > 0:
            if is_holiday[0] == 1:
                base_demand *= 1.5
            else:
                base_demand *= 1
        else:
            base_demand *= 1

        demand = base_demand * (1 + price_elasticity * ((price - self.historical_data['Price'].mean()) / self.historical_data['Price'].mean()))
        return max(0, int(demand))

    def step(self, action):
        action_idx = 0
        scaled_actions = self.data_processor.inverse_scale_prices(action)
        for route in self.routes:
            for airline in self.airlines:
                for days_ahead in range(self.max_days_ahead + 1):
                    flight_date = self.current_date + pd.Timedelta(days=days_ahead)
                    self.prices[(route, airline, flight_date)] = scaled_actions[action_idx]
                    action_idx += 1

        total_revenue = 0
        for route in self.routes:
            for airline in self.airlines:
                for days_ahead in range(self.max_days_ahead+1):
                    flight_date = self.current_date + pd.Timedelta(days=days_ahead)
                    price = self.prices.get((route, airline, flight_date), 0)
                    seats_sold_key = (route, airline, flight_date)
                    seats_already_sold = self.seats_sold.get(seats_sold_key, 0)
                    demand = self._get_demand(route, airline, flight_date, price)
                    remaining_capacity = self.seats_capacity - seats_already_sold
                    actual_demand = min(demand, remaining_capacity)

                    self.seats_sold[seats_sold_key] = seats_already_sold + actual_demand
                    total_revenue += actual_demand * price

        reward = total_revenue
        self.current_date += pd.Timedelta(days=1)
        self.current_step += 1
        done = self.current_step >= self.simulation_length_days
        next_state = self._get_state(self.historical_data)
        info = {}
        return next_state, reward, done, info

## RL Agent Implementation

In [None]:
class ImprovedDQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=10000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.batch_size = 64
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_counter = 0
        self.target_update_frequency = 10

    def _build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(256, input_dim=self.state_size),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dropout(0.2),

            tf.keras.layers.Dense(128),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dropout(0.2),

            tf.keras.layers.Dense(64),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),

            tf.keras.layers.Dense(self.action_size, activation='linear')
        ])

        optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
        model.compile(loss='huber_loss', optimizer=optimizer)
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, training=True):
        if training and np.random.rand() <= self.epsilon:
            return np.random.rand(self.action_size)
        state = np.reshape(state, [1, self.state_size])
        act_values = self.model.predict(state, verbose=0)
        return act_values[0]

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        states, targets = [], []
        for state, action, reward, next_state, done in minibatch:
            state = np.reshape(state, [1, self.state_size])
            next_state = np.reshape(next_state, [1, self.state_size])
            target = self.target_model.predict(state, verbose=0)
            if done:
                target[0] = action
                target[0][:] = reward
            else:
                t = self.target_model.predict(next_state, verbose=0)[0]
                target[0] = action
                target[0][:] = reward + self.gamma * np.amax(t)

            states.append(state[0])
            targets.append(target[0])

        states = np.array(states)
        targets = np.array(targets)
        history = self.model.fit(states, targets, epochs=1, verbose=0, batch_size=batch_size)
        loss = history.history['loss'][0]
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        return loss

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

## Training and Evaluation

In [None]:
def train_model(env, agent, n_episodes, validation_data=None):
    best_reward = float('-inf')
    patience = 20
    patience_counter = 0

    for episode in range(n_episodes):
        state = env.reset()
        total_reward = 0
        losses = []

        for time in range(env.simulation_length_days):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)

            agent.remember(state, action, reward, next_state, done)

            if len(agent.memory) > agent.batch_size:
                loss = agent.replay(agent.batch_size)
                losses.append(loss)

            state = next_state
            total_reward += reward

            agent.update_target_counter += 1
            if agent.update_target_counter >= agent.target_update_frequency:
                agent.target_model.set_weights(agent.model.get_weights())
                agent.update_target_counter = 0

            if done:
                break

        if validation_data is not None:
            val_reward = evaluate_model(env, agent, validation_data)
            if val_reward > best_reward:
                best_reward = val_reward
                agent.save('best_model.h5')
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= patience:
                print("Early stopping triggered!")
                break

        avg_loss = np.mean(losses) if losses else 0
        print(f"Episode: {episode + 1}/{n_episodes}")
        print(f"Total Reward: {total_reward}")
        print(f"Average Loss: {avg_loss}")
        print(f"Epsilon: {agent.epsilon:.3f}")
        print("----------------------------------------")

    return agent

def evaluate_model(env, agent, validation_data):
    total_reward = 0
    state = env.reset(validation_data)

    for time in range(env.simulation_length_days):
        action = agent.act(state, training=False)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
        state = next_state
        if done:
            break

    return total_reward

## Data Loading and Initialization

In [None]:
num_days = 730
dates = pd.to_datetime('2023-01-01') + pd.to_timedelta(np.arange(num_days), unit='D')

routes = ['Bangalore-Delhi', 'Delhi-Mumbai', 'Mumbai-Bangalore', 'Bangalore-Mumbai', 'Delhi-Bangalore', 'Mumbai-Delhi']
airlines = ['Vistara', 'IndiGo', 'Air India']
aircraft_types = ['A320', 'B737', 'A321']

historical_data = pd.DataFrame({
    'Date': np.repeat(dates, len(routes) * len(airlines)),
    'Route': np.tile(np.repeat(routes, len(airlines)), num_days),
    'Airline': np.tile(airlines, num_days * len(routes)),
    'AircraftType': np.random.choice(aircraft_types, num_days * len(routes) * len(airlines)),
    'Demand': np.random.randint(50, 150, num_days * len(routes) * len(airlines)),
    'Price': np.random.randint(50, 200, num_days * len(routes) * len(airlines)),
    'Capacity': 180,
})

fuel_prices = pd.DataFrame({
    'Date': dates,
    'FuelPrice': 60 + 10 * np.sin(np.linspace(0, 10, num_days)) + np.random.normal(0, 2, num_days)
})

climate_data = pd.DataFrame({
    'Date': np.repeat(dates, 3),
    'Location': np.tile(['Bangalore', 'Delhi', 'Mumbai'], num_days),
    'Temperature': np.random.randint(20, 35, num_days * 3),
    'WeatherCondition': np.random.choice(['Sunny', 'Cloudy', 'Rainy'], num_days * 3)
})

holiday_data = pd.DataFrame({
    'Date': pd.to_datetime(['2023-01-26', '2023-08-15', '2023-12-25', '2024-01-26', '2024-08-15', '2024-12-25']),
    'HolidayName': ['Republic Day', 'Independence Day', 'Christmas', 'Republic Day', 'Independence Day', 'Christmas'],
    'Location': ['Delhi', 'Delhi', 'Mumbai', 'Delhi', 'Delhi', 'Mumbai']
})

train_dates, test_dates = train_test_split(dates, test_size=0.2, random_state=42)
train_historical = historical_data[historical_data['Date'].isin(train_dates)]
test_historical = historical_data[historical_data['Date'].isin(test_dates)]

train_fuel_prices = fuel_prices[fuel_prices['Date'].isin(train_dates)]
test_fuel_prices = fuel_prices[fuel_prices['Date'].isin(test_dates)]

train_climate_data = climate_data[climate_data['Date'].isin(train_dates)]
test_climate_data = climate_data[climate_data['Date'].isin(test_dates)]

train_holiday_data = holiday_data[holiday_data['Date'].isin(train_dates)]
test_holiday_data = holiday_data[holiday_data['Date'].isin(test_dates)]

data_processor = DataProcessor()
data_processor.fit(train_historical, train_fuel_prices, train_climate_data, train_holiday_data)

env = ImprovedAirlinePricingEnv(train_historical, train_fuel_prices, train_climate_data, train_holiday_data, data_processor)
state_size = len(env.reset())
action_size = len(env.routes) * len(env.airlines) * (env.max_days_ahead + 1)
agent = ImprovedDQNAgent(state_size, action_size)

n_episodes = 100
trained_agent = train_model(env, agent, n_episodes, validation_data=test_historical)

final_evaluation_reward = evaluate_model(env, trained_agent, test_historical)
print(f"Final Evaluation Reward on Test Data: {final_evaluation_reward}")