In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [28]:
data = pd.read_csv('M1_final.csv')

# Drop cols with useless data for this analysis
data = data.drop(columns=['TAIL_NUM', 'Wind'])
data

Unnamed: 0,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,OP_UNIQUE_CARRIER,DEST,DEP_DELAY,CRS_ELAPSED_TIME,DISTANCE,CRS_DEP_M,DEP_TIME_M,...,Temperature,Dew Point,Humidity,Wind Speed,Wind Gust,Pressure,Condition,sch_dep,sch_arr,TAXI_OUT
0,11,1,5,B6,CHS,-1,124,636,324,323,...,48,34,58,25,38,29.86,Fair / Windy,9,17,14
1,11,1,5,B6,LAX,-7,371,2475,340,333,...,48,34,58,25,38,29.86,Fair / Windy,9,17,15
2,11,1,5,B6,FLL,40,181,1069,301,341,...,48,34,58,25,38,29.86,Fair / Windy,9,17,22
3,11,1,5,B6,MCO,-2,168,944,345,343,...,48,34,58,25,38,29.86,Fair / Windy,9,17,12
4,11,1,5,DL,ATL,-4,139,760,360,356,...,46,32,58,24,35,29.91,Fair / Windy,9,17,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28815,1,31,5,B6,ORH,2,57,150,1370,1372,...,39,38,96,6,0,30.18,Cloudy,20,32,19
28816,1,31,5,AA,BOS,2,75,187,1390,1392,...,39,38,96,6,0,30.18,Cloudy,19,23,22
28817,1,31,5,AS,SEA,283,392,2422,1125,1408,...,39,38,96,6,0,30.18,Cloudy,19,23,21
28818,1,31,5,B6,SJU,5,224,1598,1417,1422,...,39,38,96,6,0,30.18,Cloudy,19,23,13


In [29]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MONTH,28820.0,7.89424,4.991723,1.0,1.0,11.0,12.0,12.0
DAY_OF_MONTH,28820.0,16.021096,8.750179,1.0,8.0,16.0,24.0,31.0
DAY_OF_WEEK,28820.0,4.008952,1.98523,1.0,2.0,4.0,6.0,7.0
DEP_DELAY,28820.0,6.374983,38.735144,-22.0,-6.0,-3.0,2.0,1276.0
CRS_ELAPSED_TIME,28820.0,225.288203,119.482417,57.0,124.0,188.0,365.0,697.0
DISTANCE,28820.0,1267.746079,889.343246,94.0,483.0,1029.0,2248.0,4983.0
CRS_DEP_M,28820.0,831.003851,299.398525,301.0,545.0,856.0,1095.0,1439.0
DEP_TIME_M,28820.0,828.934698,305.864103,1.0,542.0,854.0,1097.0,1440.0
CRS_ARR_M,28820.0,910.874289,345.411743,1.0,667.0,918.0,1193.0,1439.0
Temperature,28820.0,41.489833,8.043533,17.0,36.0,42.0,47.0,68.0


In [30]:
data.isnull().sum()

MONTH                0
DAY_OF_MONTH         0
DAY_OF_WEEK          0
OP_UNIQUE_CARRIER    0
DEST                 0
DEP_DELAY            0
CRS_ELAPSED_TIME     0
DISTANCE             0
CRS_DEP_M            0
DEP_TIME_M           0
CRS_ARR_M            0
Temperature          0
Dew Point            0
Humidity             0
Wind Speed           0
Wind Gust            0
Pressure             0
Condition            0
sch_dep              0
sch_arr              0
TAXI_OUT             0
dtype: int64

In [31]:
corr_matrix = data.corr(numeric_only=True)
corr_matrix

Unnamed: 0,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,DEP_DELAY,CRS_ELAPSED_TIME,DISTANCE,CRS_DEP_M,DEP_TIME_M,CRS_ARR_M,Temperature,Humidity,Wind Speed,Wind Gust,Pressure,sch_dep,sch_arr,TAXI_OUT
MONTH,1.0,-0.007254,0.029365,0.046455,-0.016052,-0.000144,0.001824,0.002632,-0.012361,0.135682,0.012515,-0.035097,-0.051014,-0.085274,-0.006191,-0.019705,0.018505
DAY_OF_MONTH,-0.007254,1.0,-0.029732,-0.036618,0.002761,-0.002762,0.001754,0.004446,0.008519,-0.132457,0.096215,-0.093211,-0.088991,0.016871,-0.003417,0.002223,-0.02399
DAY_OF_WEEK,0.029365,-0.029732,1.0,-0.006203,0.000413,0.00245,0.005411,0.008256,-0.003339,0.013081,0.001627,0.095943,0.127689,0.102289,0.024415,0.009621,0.028388
DEP_DELAY,0.046455,-0.036618,-0.006203,1.0,-0.030871,-0.031507,0.102384,0.07234,0.043691,-0.032261,-0.011776,0.067718,0.04434,-0.064939,-0.065452,-0.006949,0.034881
CRS_ELAPSED_TIME,-0.016052,0.002761,0.000413,-0.030871,1.0,0.994465,-0.031332,-0.021391,0.035186,-0.01605,0.017198,-0.01344,-0.011934,0.010165,0.083426,-0.067669,0.070838
DISTANCE,-0.000144,-0.002762,0.00245,-0.031507,0.994465,1.0,-0.040963,-0.034604,0.00852,-0.009923,0.01511,-0.010734,-0.009522,0.005692,0.059174,-0.088793,0.059856
CRS_DEP_M,0.001824,0.001754,0.005411,0.102384,-0.031332,-0.040963,1.0,0.946023,0.452476,0.086371,0.003843,0.008063,-0.009887,-0.008285,-0.025621,0.495286,0.044864
DEP_TIME_M,0.002632,0.004446,0.008256,0.07234,-0.021391,-0.034604,0.946023,1.0,0.469758,0.098105,-0.005923,0.006033,-0.014336,-0.00941,0.025609,0.544197,0.057896
CRS_ARR_M,-0.012361,0.008519,-0.003339,0.043691,0.035186,0.00852,0.452476,0.469758,1.0,0.10162,-0.033163,0.01418,0.004486,-0.012682,0.149903,0.424209,0.071377
Temperature,0.135682,-0.132457,0.013081,-0.032261,-0.01605,-0.009923,0.086371,0.098105,0.10162,1.0,0.054401,0.010603,-0.039487,-0.37673,-0.022534,0.075958,-0.067574
