# Read in data

In [1]:
from datetime import timedelta
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

airports = [
    "KATL",
    "KCLT",
    "KDEN",
    "KDFW",
    "KJFK",
    "KMEM",
    "KMIA",
    "KORD",
    "KPHX",
    "KSEA",
]

In [2]:
DATA_DIRECTORY = Path("code execution development data/old")

## LAMP

In [3]:
def read_lamp(airport):
    lamp = pd.read_csv(
        DATA_DIRECTORY / airport / f"{airport}_lamp.csv.bz2",
        parse_dates=["timestamp", "forecast_timestamp"],
        dtype={"temperature": "int16", "wind_direction":"int16", "wind_gust":"int16", "cloud_ceiling":"float16", "visibility":"int16"}
    )
    return lamp

## Config

In [4]:
def read_config(airport):
    config = pd.read_csv(
        DATA_DIRECTORY / airport / f"{airport}_config.csv.bz2",
        parse_dates=["timestamp", "start_time"]
    )
    config.departure_runways = config.departure_runways.astype(str)
    return config

## Runways

In [5]:
def read_runways(airport):
    runways = pd.read_csv(
        DATA_DIRECTORY / airport / f"{airport}_runways.csv.bz2",
        parse_dates=["timestamp", "departure_runway_actual_time"]
    )
    return runways

## TFM

In [6]:
def read_tfm(airport):
    tfm = pd.read_csv(
        DATA_DIRECTORY / airport / f"{airport}_tfm.csv.bz2",
        parse_dates=["timestamp", "arrival_runway_estimated_time"]
    )
    return tfm

## TBFM

In [7]:
def read_tbfm(airport):
    tbfm = pd.read_csv(
        DATA_DIRECTORY / airport / f"{airport}_tbfm.csv.bz2",
        parse_dates=["timestamp", "scheduled_runway_estimated_time"]
    )
    return tbfm

## ETD

In [8]:
def read_etd(airport):
    etd = pd.read_csv(
        DATA_DIRECTORY / airport / f"{airport}_etd.csv.bz2",
        parse_dates=["departure_runway_estimated_time", "timestamp"]
    )
    return etd

## MFS

In [9]:
def read_mfs(airport):
    etd = pd.read_csv(
        DATA_DIRECTORY / airport / f"{airport}_mfs.csv.bz2"
    )
    return etd

# Initial Model, Linear Regression

In [10]:
submission_format = pd.read_csv(
    "code execution development data/test_labels.csv", parse_dates=["timestamp"]
)

## Insert Data

In [11]:
def filter_lamp(current_time):
    # weather data
    valid_lamp = lamp.loc[(lamp.timestamp <= current_time) 
                    & (lamp.timestamp > valid_time) 
                    & (lamp.forecast_timestamp <= current_time) 
                    & (lamp.forecast_timestamp > valid_time)]
    return valid_lamp.iloc[-1]

def filter_config(current_time):
    # runway config data
    valid_config = config.loc[(config.timestamp <= current_time) 
                    & (config.timestamp > valid_time) 
                    & (config.start_time <= current_time) 
                    & (config.start_time > valid_time)]
    try:
        items = valid_config.iloc[-1].departure_runways.split()
        return len(items)
    except:
        return 2

def filter_runways(current_time):
    # traffic (schedule) data
    valid_runways = runways.loc[(runways.timestamp <= current_time) 
                & (runways.timestamp > valid_time) 
                & (runways.departure_runway_actual_time <= current_time) 
                & (runways.departure_runway_actual_time > crunch_time)]
    runway_traffic = valid_runways['gufi'].nunique()
    return runway_traffic

def filter_tbfm(current_time):
    # traffic (schedule) data
    valid_tbfm = tbfm.loc[(tbfm.timestamp <= current_time) 
                    & (tbfm.timestamp > valid_time)]
    return valid_tbfm['gufi'].nunique()

def filter_tfm(current_time):
    # traffic (estimate) data
    valid_tfm = tfm.loc[(tfm.timestamp <= current_time) 
                    & (tfm.timestamp > valid_time)]
    return valid_tfm['gufi'].nunique()
    
def filter_etd(current_time):    
    #etd data
    valid_etd = etd.loc[(etd.timestamp <= current_time) 
                    & (etd.timestamp > valid_time) 
                    & (etd.gufi == df.loc[i].gufi)]
    try:
        return (valid_etd.iloc[-1].departure_runway_estimated_time - current_time).total_seconds()
    except:
        return 3600

In [12]:
import warnings
warnings.filterwarnings('ignore')

In [13]:
full_frame = pd.DataFrame()
for a in airports:
    airport = a
    df = submission_format[submission_format.airport == a]
    times = df.timestamp.unique()

    etd = read_etd(airport)
    config = read_config(airport)
    runways = read_runways(airport)
    tbfm = read_tbfm(airport)
    tfm = read_tfm(airport)
    lamp = read_lamp(airport)

    for t in times:
        indices = df[df.timestamp == t].index
        current_time = pd.to_datetime(t)
        valid_time = current_time - pd.Timedelta(30, unit='hours')
        crunch_time = current_time - pd.Timedelta(1, unit='hours')
        
        # insert etd data
        for i in indices:
            df.loc[i, 'etd'] = filter_etd(current_time)
            
        # insert traffic data
        df.loc[indices, 'config'] = filter_config(current_time)
        df.loc[indices, 'runways'] = filter_runways(current_time)
        df.loc[indices, 'traffic_tbfm'] = filter_tbfm(current_time)
        df.loc[indices, 'traffic_tfm'] = filter_tfm(current_time)

        # insert weather data
        current_forecast = filter_lamp(current_time)
        df.loc[indices, 'precip'] = current_forecast.precip
        df.loc[indices, 'lightning_prob'] = current_forecast.lightning_prob
        df.loc[indices, 'cloud'] = filter_lamp(current_time).cloud
        df.loc[indices, 'visibility'] = current_forecast.visibility
        df.loc[indices, 'cloud_ceiling'] = current_forecast.cloud_ceiling
        df.loc[indices, 'wind_gust'] = current_forecast.wind_gust
        df.loc[indices, 'wind_speed'] = current_forecast.wind_speed
        df.loc[indices, 'wind_direction'] = current_forecast.wind_direction
        df.loc[indices, 'temperature'] = current_forecast.temperature
        
    full_frame = pd.concat([full_frame, df])
        
# insert metadata
metadata = pd.concat([read_mfs('KATL'),
                      read_mfs('KCLT'),
                      read_mfs('KDEN'),
                      read_mfs('KDFW'),
                      read_mfs('KJFK'),
                      read_mfs('KMEM'),
                      read_mfs('KMIA'),
                      read_mfs('KORD'),
                      read_mfs('KPHX'),
                      read_mfs('KSEA')])

full_frame = full_frame.merge(metadata, how='left', on='gufi')

full_frame.head(5)

Unnamed: 0,gufi,timestamp,airport,minutes_until_pushback,etd,config,runways,traffic_tbfm,traffic_tfm,precip,...,cloud_ceiling,wind_gust,wind_speed,wind_direction,temperature,aircraft_engine_class,aircraft_type,major_carrier,flight_type,isdeparture
0,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 11:15:00,KATL,86,6300.0,2.0,1.0,833.0,1710.0,False,...,8.0,0.0,2.0,13.0,54.0,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,True
1,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 11:15:00,KATL,86,6300.0,2.0,1.0,833.0,1710.0,False,...,8.0,0.0,2.0,13.0,54.0,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,False
2,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 12:00:00,KATL,41,3600.0,2.0,4.0,877.0,1752.0,False,...,8.0,0.0,5.0,14.0,53.0,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,True
3,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 12:00:00,KATL,41,3600.0,2.0,4.0,877.0,1752.0,False,...,8.0,0.0,5.0,14.0,53.0,JET,A319,AAL,SCHEDULED_AIR_TRANSPORT,False
4,AAL153.ATL.CLT.201114.1137.0016.TFM,2020-11-15 10:30:00,KATL,62,4980.0,2.0,0.0,808.0,1677.0,False,...,8.0,0.0,5.0,11.0,53.0,JET,A320,AAL,SCHEDULED_AIR_TRANSPORT,True


# Data Prep

## Distribution

In [14]:
full_frame.select_dtypes(include='number').describe()

Unnamed: 0,minutes_until_pushback,etd,config,runways,traffic_tbfm,traffic_tfm,visibility,cloud_ceiling,wind_gust,wind_speed,wind_direction,temperature
count,2356.0,2356.0,2356.0,2356.0,2356.0,2356.0,2356.0,2356.0,2356.0,2356.0,2356.0,2356.0
mean,44.109932,3453.388795,1.972411,28.724958,854.047114,1477.46562,6.750849,7.18039,6.518251,10.754669,18.072581,56.983871
std,28.722366,1638.195583,0.578895,23.64035,286.623268,477.352844,0.773979,1.421451,11.394941,6.077706,7.727816,15.545719
min,1.0,-10920.0,1.0,0.0,229.0,389.0,1.0,2.0,0.0,1.0,1.0,22.0
25%,21.0,2280.0,2.0,10.0,626.0,1053.0,7.0,7.0,0.0,5.0,12.0,46.0
50%,43.0,3540.0,2.0,22.0,882.0,1607.0,7.0,8.0,0.0,9.0,16.0,58.0
75%,63.0,4740.0,2.0,44.0,1155.0,1959.0,7.0,8.0,19.0,16.0,24.0,75.0
max,271.0,9840.0,4.0,85.0,1253.0,2108.0,7.0,8.0,31.0,23.0,35.0,80.0


In [15]:
full_frame.select_dtypes(exclude='number').describe()

Unnamed: 0,gufi,timestamp,airport,precip,lightning_prob,cloud,aircraft_engine_class,aircraft_type,major_carrier,flight_type,isdeparture
count,2356,2356,2356,2356,2356,2356,2356,2356,1902,2182,2356
unique,1228,17,10,2,3,5,2,37,3,2,2
top,DAL8903.MIA.ATL.201114.0057.0044.TFM,2020-11-15 00:00:00,KATL,False,N,FW,JET,A321,AAL,SCHEDULED_AIR_TRANSPORT,True
freq,8,485,416,1991,2338,681,2320,322,958,2170,2046
first,,2020-11-15 00:00:00,,,,,,,,,
last,,2020-11-15 12:00:00,,,,,,,,,


## Missing Values

In [16]:
full_frame.isnull().sum()

gufi                        0
timestamp                   0
airport                     0
minutes_until_pushback      0
etd                         0
config                      0
runways                     0
traffic_tbfm                0
traffic_tfm                 0
precip                      0
lightning_prob              0
cloud                       0
visibility                  0
cloud_ceiling               0
wind_gust                   0
wind_speed                  0
wind_direction              0
temperature                 0
aircraft_engine_class       0
aircraft_type               0
major_carrier             454
flight_type               174
isdeparture                 0
dtype: int64

In [17]:
# impute missing values with mode
full_frame.major_carrier.fillna(full_frame.major_carrier.mode()[0], inplace=True)
full_frame.flight_type.fillna(full_frame.flight_type.mode()[0], inplace=True)

## Outliers

In [18]:
from scipy import stats
z_scores = stats.zscore(full_frame.select_dtypes(include='number').describe())
z_scores

Unnamed: 0,minutes_until_pushback,etd,config,runways,traffic_tbfm,traffic_tfm,visibility,cloud_ceiling,wind_gust,wind_speed,wind_direction,temperature
count,2.631246,0.043772,2.645749,2.644459,2.218603,1.352411,2.645737,2.645737,2.645522,2.645661,2.645547,2.644721
mean,-0.406501,0.243871,-0.377917,-0.380062,-0.160222,0.071602,-0.375991,-0.376371,-0.382034,-0.377132,-0.375589,-0.369322
std,-0.426719,-0.087114,-0.379707,-0.38667,-1.05892,-1.386455,-0.383678,-0.38378,-0.37575,-0.38316,-0.388957,-0.423648
min,-0.463146,-2.376999,-0.379166,-0.417393,-1.150185,-1.515264,-0.383388,-0.383036,-0.390434,-0.389705,-0.397651,-0.415187
25%,-0.436866,0.029914,-0.377882,-0.404397,-0.521408,-0.547223,-0.37567,-0.376603,-0.390434,-0.384549,-0.383437,-0.383722
50%,-0.407959,0.259664,-0.377882,-0.388801,-0.11595,0.260449,-0.37567,-0.375316,-0.390434,-0.379394,-0.378268,-0.36799
75%,-0.38168,0.478474,-0.377882,-0.36021,0.316434,0.773627,-0.37567,-0.375316,-0.36595,-0.370371,-0.36793,-0.345703
max,-0.108375,1.408418,-0.375313,-0.306927,0.471648,0.990853,-0.37567,-0.375316,-0.350487,-0.361349,-0.353715,-0.339148


In [19]:
z_scores[z_scores.abs() >= 3].count()

minutes_until_pushback    0
etd                       0
config                    0
runways                   0
traffic_tbfm              0
traffic_tfm               0
visibility                0
cloud_ceiling             0
wind_gust                 0
wind_speed                0
wind_direction            0
temperature               0
dtype: int64

## Feature Engineering

In [20]:
full_frame.nunique()

gufi                      1228
timestamp                   17
airport                     10
minutes_until_pushback     133
etd                        142
config                       4
runways                     46
traffic_tbfm               114
traffic_tfm                118
precip                       2
lightning_prob               3
cloud                        5
visibility                   4
cloud_ceiling                6
wind_gust                   13
wind_speed                  23
wind_direction              29
temperature                 39
aircraft_engine_class        2
aircraft_type               37
major_carrier                3
flight_type                  2
isdeparture                  2
dtype: int64

In [21]:
# binary encoding
full_frame.replace(False, 0, inplace=True)
full_frame.replace(True, 0, inplace=True)

In [22]:
# nominal encoding
full_frame = pd.get_dummies(full_frame, columns=['airport',
                                 'cloud', 
                                 'lightning_prob',
                                 'aircraft_engine_class',
                                 'aircraft_type',
                                 'major_carrier',
                                 'flight_type'], 
                    drop_first=True)
full_frame.head(5)

Unnamed: 0,gufi,timestamp,minutes_until_pushback,etd,config,runways,traffic_tbfm,traffic_tfm,precip,visibility,...,aircraft_type_E170,aircraft_type_E55P,aircraft_type_E75L,aircraft_type_E75S,aircraft_type_F900,aircraft_type_MD11,aircraft_type_PC12,major_carrier_DAL,major_carrier_UAL,flight_type_SCHEDULED_AIR_TRANSPORT
0,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 11:15:00,86,6300.0,2.0,1.0,833.0,1710.0,0,7.0,...,0,0,0,0,0,0,0,0,0,1
1,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 11:15:00,86,6300.0,2.0,1.0,833.0,1710.0,0,7.0,...,0,0,0,0,0,0,0,0,0,1
2,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 12:00:00,41,3600.0,2.0,4.0,877.0,1752.0,0,7.0,...,0,0,0,0,0,0,0,0,0,1
3,AAL1227.ATL.MIA.201114.1242.0052.TFM,2020-11-15 12:00:00,41,3600.0,2.0,4.0,877.0,1752.0,0,7.0,...,0,0,0,0,0,0,0,0,0,1
4,AAL153.ATL.CLT.201114.1137.0016.TFM,2020-11-15 10:30:00,62,4980.0,2.0,0.0,808.0,1677.0,0,7.0,...,0,0,0,0,0,0,0,0,0,1


# Linear Regression

## Model 1: All Features

In [23]:
import statsmodels.api as sm

Y = full_frame['minutes_until_pushback']
X = full_frame.drop(columns=['gufi', 'timestamp', 'minutes_until_pushback'])
X = sm.add_constant(X)

model = sm.OLS(Y,X).fit()
model.summary()

0,1,2,3
Dep. Variable:,minutes_until_pushback,R-squared:,0.586
Model:,OLS,Adj. R-squared:,0.574
Method:,Least Squares,F-statistic:,49.85
Date:,"Sun, 02 Apr 2023",Prob (F-statistic):,0.0
Time:,11:27:46,Log-Likelihood:,-10215.0
No. Observations:,2356,AIC:,20560.0
Df Residuals:,2290,BIC:,20940.0
Df Model:,65,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,19.6966,54.536,0.361,0.718,-87.248,126.641
etd,0.0129,0.000,51.517,0.000,0.012,0.013
config,7.2457,3.174,2.283,0.023,1.022,13.470
runways,0.1243,0.027,4.560,0.000,0.071,0.178
traffic_tbfm,-0.0228,0.064,-0.357,0.721,-0.148,0.103
traffic_tfm,0.0088,0.068,0.129,0.897,-0.125,0.143
precip,5.171e-13,4.91e-12,0.105,0.916,-9.11e-12,1.01e-11
visibility,2.9908,0.884,3.384,0.001,1.258,4.724
cloud_ceiling,-7.4223,1.336,-5.556,0.000,-10.042,-4.802

0,1,2,3
Omnibus:,2136.252,Durbin-Watson:,0.895
Prob(Omnibus):,0.0,Jarque-Bera (JB):,96984.86
Skew:,4.187,Prob(JB):,0.0
Kurtosis:,33.296,Cond. No.,1.07e+16


In [24]:
# calculate MAE
from sklearn.metrics import mean_absolute_error as mae
ypred = model.predict()
MAE = mae(Y, ypred)
MAE

10.27396558092066

In [25]:
# check for multicollinearity
from statsmodels.stats.outliers_influence import variance_inflation_factor

# create VIF dataframe
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
  
# calculate VIF for each feature
vif_data["VIF"] = [variance_inflation_factor(X.values, i)
                          for i in range(len(X.columns))]
  
vif_data

Unnamed: 0,feature,VIF
0,const,0.000000
1,etd,1.135065
2,config,22.629130
3,runways,2.784137
4,traffic_tbfm,2252.609645
...,...,...
64,aircraft_type_MD11,2.896605
65,aircraft_type_PC12,inf
66,major_carrier_DAL,2.741051
67,major_carrier_UAL,2.963232


## Model 2: Reduced Features

In [26]:
# reduce features
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SequentialFeatureSelector

# re-define independent variables
X = full_frame.drop(columns=['gufi', 'timestamp', 'minutes_until_pushback'])

# linear regression estimator to be used in feature selection
reg = LinearRegression().fit(X, Y)

In [27]:
# reduce features using SequentialFeatureSelector
sfs = SequentialFeatureSelector(reg, n_features_to_select=8)
sfs.fit(X, Y)

In [28]:
# return selected features
sfs.get_feature_names_out()

array(['etd', 'wind_direction', 'airport_KCLT', 'airport_KORD',
       'aircraft_type_B752', 'aircraft_type_B763', 'aircraft_type_E145',
       'aircraft_type_E170'], dtype=object)

In [29]:
# re-fit model
X = full_frame[['etd', 'wind_direction', 'airport_KCLT', 'airport_KORD',
       'aircraft_type_B752', 'aircraft_type_B763', 'aircraft_type_E145',
       'aircraft_type_E170']]
X = sm.add_constant(X)

model2 = sm.OLS(Y,X).fit()
model2.summary()

0,1,2,3
Dep. Variable:,minutes_until_pushback,R-squared:,0.539
Model:,OLS,Adj. R-squared:,0.537
Method:,Least Squares,F-statistic:,342.7
Date:,"Sun, 02 Apr 2023",Prob (F-statistic):,0.0
Time:,11:27:53,Log-Likelihood:,-10342.0
No. Observations:,2356,AIC:,20700.0
Df Residuals:,2347,BIC:,20750.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,4.2990,1.383,3.109,0.002,1.587,7.011
etd,0.0128,0.000,51.673,0.000,0.012,0.013
wind_direction,-0.1569,0.054,-2.926,0.003,-0.262,-0.052
airport_KCLT,-7.3886,1.492,-4.953,0.000,-10.314,-4.464
airport_KORD,-9.4155,1.445,-6.514,0.000,-12.250,-6.581
aircraft_type_B752,10.1641,2.351,4.323,0.000,5.554,14.774
aircraft_type_B763,9.3781,2.778,3.376,0.001,3.931,14.825
aircraft_type_E145,-7.2123,2.561,-2.816,0.005,-12.234,-2.190
aircraft_type_E170,-7.8682,2.133,-3.690,0.000,-12.050,-3.686

0,1,2,3
Omnibus:,2288.85,Durbin-Watson:,0.864
Prob(Omnibus):,0.0,Jarque-Bera (JB):,127089.597
Skew:,4.612,Prob(JB):,0.0
Kurtosis:,37.778,Cond. No.,26600.0


In [30]:
# re-calculate MAE
from sklearn.metrics import mean_absolute_error as mae
ypred = model.predict()
MAE = mae(Y, ypred)
MAE

10.27396558092066

In [31]:
# re-check for multicollinearity
from statsmodels.stats.outliers_influence import variance_inflation_factor

# create VIF dataframe
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
  
# calculate VIF for each feature
vif_data["VIF"] = [variance_inflation_factor(X.values, i)
                          for i in range(len(X.columns))]
  
vif_data

Unnamed: 0,feature,VIF
0,const,11.800006
1,etd,1.008809
2,wind_direction,1.059428
3,airport_KCLT,1.05678
4,airport_KORD,1.028708
5,aircraft_type_B752,1.02395
6,aircraft_type_B763,1.008269
7,aircraft_type_E145,1.020636
8,aircraft_type_E170,1.009005


## Model 3: PCR

In [39]:
# perform PCA
import numpy as np
from sklearn.preprocessing import StandardScaler 
from sklearn.decomposition import PCA

data = full_frame.drop(columns=['gufi', 'timestamp', 'minutes_until_pushback'])
scale = StandardScaler()
X = scale.fit_transform(data)


X

array([[ 1.73801929,  0.04766837, -1.17303015, ..., -0.57212232,
        -0.42677568,  0.07155036],
       [ 1.73801929,  0.04766837, -1.17303015, ..., -0.57212232,
        -0.42677568,  0.07155036],
       [ 0.08951454,  0.04766837, -1.04610154, ..., -0.57212232,
        -0.42677568,  0.07155036],
       ...,
       [-0.4233536 , -1.68012681, -0.1152917 , ..., -0.57212232,
         2.34315132,  0.07155036],
       [ 1.1885177 , -1.68012681, -1.17303015, ..., -0.57212232,
         2.34315132,  0.07155036],
       [ 1.1885177 , -1.68012681, -1.17303015, ..., -0.57212232,
         2.34315132,  0.07155036]])

In [40]:
pca=PCA()
X_red = pca.fit_transform(X)

In [52]:
np.cumsum(np.round(pca.explained_variance_ratio_, decimals = 4)*100)[0:10]

array([ 7.6 , 14.38, 19.45, 23.72, 27.09, 30.26, 33.05, 35.8 , 38.26,
       40.44])

In [53]:
X = sm.add_constant(X)
model = sm.OLS(Y,X[:,0:10]).fit()
model.summary()

0,1,2,3
Dep. Variable:,minutes_until_pushback,R-squared:,0.525
Model:,OLS,Adj. R-squared:,0.523
Method:,Least Squares,F-statistic:,324.1
Date:,"Sun, 02 Apr 2023",Prob (F-statistic):,0.0
Time:,11:41:20,Log-Likelihood:,-10377.0
No. Observations:,2356,AIC:,20770.0
Df Residuals:,2347,BIC:,20820.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,44.1099,0.409,107.959,0.000,43.309,44.911
x1,20.3426,0.419,48.581,0.000,19.522,21.164
x2,1.8093,0.450,4.016,0.000,0.926,2.693
x3,1.7165,0.580,2.958,0.003,0.579,2.854
x4,-2.9135,2.475,-1.177,0.239,-7.768,1.941
x5,0.4817,2.315,0.208,0.835,-4.058,5.021
x6,4.028e-15,1.48e-16,27.152,0.000,3.74e-15,4.32e-15
x7,1.9496,0.615,3.171,0.002,0.744,3.155
x8,-1.2588,0.705,-1.786,0.074,-2.641,0.123

0,1,2,3
Omnibus:,2149.921,Durbin-Watson:,0.854
Prob(Omnibus):,0.0,Jarque-Bera (JB):,94505.158
Skew:,4.246,Prob(JB):,0.0
Kurtosis:,32.842,Cond. No.,2.63e+17


In [54]:
# re-calculate MAE
from sklearn.metrics import mean_absolute_error as mae
ypred = model.predict()
MAE = mae(Y, ypred)
MAE

11.289810632306754