# Linear Regression on Taxi Out Time

In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


## Load the dataset

In [2]:
flight_data = pd.read_csv('M1_final.csv')


## Dropping rows with missing values in the 'Wind' column

In [3]:
flight_data_cleaned = flight_data.dropna(subset=['Wind'])


## Calculating the correlation matrix

In [4]:
correlation_matrix = flight_data_cleaned.corr()


  correlation_matrix = flight_data_cleaned.corr()


## Getting the correlation of all features with 'TAXI_OUT'

In [5]:
taxi_out_corr = correlation_matrix['TAXI_OUT'].sort_values(ascending=False)


## Selecting features with a correlation coefficient greater than 0.06

In [6]:
selected_features = taxi_out_corr[taxi_out_corr.abs() > 0.06].index.drop('TAXI_OUT')


## Selecting the data corresponding to the selected features and target

In [7]:
X_selected = flight_data_cleaned[selected_features]


In [8]:
y_selected = flight_data_cleaned['TAXI_OUT']


## Splitting the data into training and test sets

In [11]:
X_train_selected, X_test_selected, y_train_selected, y_test_selected = train_test_split(    X_selected, y_selected, test_size=0.2, random_state=42)



## Training a linear regression model with the selected features

In [12]:
lr_model_selected = LinearRegression()


In [14]:
lr_model_selected.fit(X_train_selected, y_train_selected)


## Making predictions on the test data

In [15]:
y_pred_selected = lr_model_selected.predict(X_test_selected)


## Evaluating the model's performance

In [16]:
mae_selected = mean_absolute_error(y_test_selected, y_pred_selected)


In [17]:
mse_selected = mean_squared_error(y_test_selected, y_pred_selected)


In [18]:
r2_selected = r2_score(y_test_selected, y_pred_selected)


## Printing the performance metrics

In [19]:
print(f'MAE: {mae_selected}')


MAE: 5.280524473495101


In [20]:
print(f'MSE: {mse_selected}')


MSE: 43.54529009367293


In [21]:
print(f'R2 Score: {r2_selected}')


R2 Score: 0.05761636146631355
