# Parameter Correlations and Scatter Plot Analysis

In [1]:
# Import third party libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import linear_model as lm
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from scipy.stats import circmean

The following notebook will explore correlations between the weather, air quality, drilling and production data. The analysis will include exploration of daily measurements as well as monthly measurements.

## Exploring Parameter Correlations
The four sets of data must be combined into one data frame. As the wind direction is given in degrees, to aggregate the wind direction from hourly to daily a circular mean must be used to account for the angularity of the parameter.

In [2]:
# Read in cleaned weather, air quality, drilling, and production files
AQ_data = pd.read_csv('FarmingtonCommunityHall_AQ.csv')
weather_data = pd.read_csv('weather_data.csv')
drill_data = pd.read_csv('drill_final.csv')
production_data_monthly = pd.read_csv('production_monthly.csv')
production_data_daily = pd.read_csv('production_daily.csv')


# Combine cleaned weather and air quality files into one dataframe
AQ_weather_hourly = weather_data.merge(right=AQ_data,
                                     how='inner')
# Reset DateTimeIndex
AQ_weather_hourly = AQ_weather_hourly.set_index(pd.DatetimeIndex(AQ_weather_hourly['Datetime']))
AQ_weather_hourly.drop('Datetime', axis=1, inplace=True)

# Resample AQ and weather to daily
# Resample by taking the mean value per day
AQ_weather_daily = AQ_weather_hourly.resample('D').mean()

FileNotFoundError: [Errno 2] No such file or directory: 'FarmingtonCommunityHall_AQ.csv'

In [None]:
# Use scipy.stats.circmean to compute the circular mean for the samples

# Filter out wind direction to apply a circular mean when resampling to daily
WDIR = AQ_weather_hourly.filter(['WDIR_VECT_(DEG)'], axis=1)

def circular_mean(x):
    return round(np.rad2deg(circmean(np.deg2rad(x['WDIR_VECT_(DEG)'].values))),2)

WDIR_daily = WDIR.resample('D').apply(circular_mean)

# Plot the resample mean wind direction vs the circular mean wind direction
plt.figure(figsize=(10,8))
plt.title('Plot comparing the two mean values for wind direction')
plt.scatter(WDIR_daily, AQ_weather_daily['WDIR_VECT_(DEG)'])
plt.ylabel('mean wind direction')
plt.xlabel('circmean wind direction')
plt.show()