# Day 5: Linear Regression for Passenger Satisfaction

## Problem Description

Build a linear regression model to predict overall satisfaction scores based on input features using the Airline Passenger Satisfaction dataset.

Dataset source: https://www.kaggle.com/code/ahmedabbas757/airline-passenger-satisfaction-eda/input

In [None]:
import sys
import subprocess

for module in ["pandas", "seaborn", "matplotlib", "sklearn"]:
    try:
        __import__(module)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", module])

In [1]:

# Import necessary libraries
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load Passenger Satisfaction dataset from Kaggle
url = 'mathematical_datasets/airline_passenger_satisfaction.csv'
data = pd.read_csv(url)

data.columns

Index(['ID', 'Gender', 'Age', 'Customer Type', 'Type of Travel', 'Class',
       'Flight Distance', 'Departure Delay', 'Arrival Delay',
       'Departure and Arrival Time Convenience', 'Ease of Online Booking',
       'Check-in Service', 'Online Boarding', 'Gate Location',
       'On-board Service', 'Seat Comfort', 'Leg Room Service', 'Cleanliness',
       'Food and Drink', 'In-flight Service', 'In-flight Wifi Service',
       'In-flight Entertainment', 'Baggage Handling', 'Satisfaction'],
      dtype='object')

In [None]:
# Prepare features and target
X = data[['FlightDistance', 'InflightService']]
y = data['Satisfaction']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error:", mse)

# Visualize results
sns.regplot(x=data['FlightDistance'], y=data['Satisfaction'])
plt.title('Regression: Satisfaction vs Flight Distance')
plt.show()
            