In [79]:
from __future__ import division
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import datetime

DATA_DIR = '//home//anaconda//Documents//PAQS/DATA_ANALYSIS//datasets//'

BNG_COLS = ['valid', 'tmpf', 'RH']
BNG = DATA_DIR + '10-Aug//bangalore.csv'

BNG_DATA = pd.read_csv(BNG, usecols=BNG_COLS)
BNG_DATA.head()

Unnamed: 0,valid,tmpf,RH
0,2016-01-01 00:00,66.2,100.0
1,2016-01-01 00:30,66.2,100.0
2,2016-01-01 01:00,66.2,100.0
3,2016-01-01 01:30,64.4,100.0
4,2016-01-01 02:00,66.2,100.0


In [80]:
# Replacing unprocessed results.
BNG_DATA.replace(to_replace='M', value=0, inplace=True)
BNG_DATA = BNG_DATA[BNG_DATA.tmpf != 0]
# Fahrenhiet to Celsius conversion
#T(Celsius) = (T(Fahrenheit) - 32) * (5/9)
BNG_DATA['tmpf'] = BNG_DATA['tmpf'].apply(lambda x: ((float(x) - 32) * (5/9)))
BNG_DATA['RH'] = BNG_DATA['RH'].apply(lambda x: float(x))
BNG_DATA['valid'] = pd.to_datetime(BNG_DATA['valid'])
BNG_DATA.head()

Unnamed: 0,valid,tmpf,RH
0,2016-01-01 00:00:00,19.0,100.0
1,2016-01-01 00:30:00,19.0,100.0
2,2016-01-01 01:00:00,19.0,100.0
3,2016-01-01 01:30:00,18.0,100.0
4,2016-01-01 02:00:00,19.0,100.0


In [81]:
BNG_DATA = BNG_DATA.set_index('valid')
BNG_DATA.index = BNG_DATA.index.tz_localize('Asia/Kolkata')
BNG_DATA = BNG_DATA.resample('1H').mean()

In [82]:
BNG_DATA = BNG_DATA.reset_index()
BNG_DATA.head()

Unnamed: 0,valid,tmpf,RH
0,2016-01-01 00:00:00+05:30,19.0,100.0
1,2016-01-01 01:00:00+05:30,18.5,100.0
2,2016-01-01 02:00:00+05:30,19.0,100.0
3,2016-01-01 03:00:00+05:30,22.0,100.0
4,2016-01-01 04:00:00+05:30,23.5,83.425


In [83]:
BNG_DATA.dtypes

valid    datetime64[ns, Asia/Kolkata]
tmpf                          float64
RH                            float64
dtype: object

In [84]:
# Load data for PAQS device
fields = ['ts', 'temp', 'rh']
LOC = DATA_DIR + '10-Aug//BANG_PAQS.csv'
BNG_PAQS = pd.read_csv(LOC, usecols=fields)
BNG_PAQS.head()

Unnamed: 0,ts,temp,rh
0,1464340928,31.4,43.96
1,1464340958,33.03,46.1
2,1464340987,33.08,46.43
3,1464341017,33.08,46.44
4,1464341047,33.08,46.44


In [85]:
BNG_PAQS['ts'] = pd.to_datetime(BNG_PAQS['ts'], unit='s')
BNG_PAQS = BNG_PAQS.set_index(['ts'])
BNG_PAQS.index = BNG_PAQS.index.tz_localize('UTC').tz_convert('Asia/Kolkata')
BNG_PAQS.head()

Unnamed: 0_level_0,temp,rh
ts,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-05-27 14:52:08+05:30,31.4,43.96
2016-05-27 14:52:38+05:30,33.03,46.1
2016-05-27 14:53:07+05:30,33.08,46.43
2016-05-27 14:53:37+05:30,33.08,46.44
2016-05-27 14:54:07+05:30,33.08,46.44


In [86]:
BNG_PAQS = BNG_PAQS.reset_index()
BNG_PAQS = BNG_PAQS.rename(columns={'ts': 'valid'})
BNG_PAQS.head()

Unnamed: 0,valid,temp,rh
0,2016-05-27 14:52:08+05:30,31.4,43.96
1,2016-05-27 14:52:38+05:30,33.03,46.1
2,2016-05-27 14:53:07+05:30,33.08,46.43
3,2016-05-27 14:53:37+05:30,33.08,46.44
4,2016-05-27 14:54:07+05:30,33.08,46.44


In [87]:
BNG_PAQS = BNG_PAQS.set_index('valid')
BNG_PAQS = BNG_PAQS.resample('1H').mean()
BNG_PAQS.head()

Unnamed: 0_level_0,temp,rh
valid,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-05-27 14:00:00+05:30,32.964667,46.251333
2016-05-27 15:00:00+05:30,33.295862,46.15069
2016-05-27 16:00:00+05:30,33.77,44.977049
2016-05-27 17:00:00+05:30,33.77,43.060083
2016-05-27 18:00:00+05:30,33.77,43.902131


In [88]:
BNG_PAQS = BNG_PAQS.reset_index()
BNG_PAQS.head()

Unnamed: 0,valid,temp,rh
0,2016-05-27 14:00:00+05:30,32.964667,46.251333
1,2016-05-27 15:00:00+05:30,33.295862,46.15069
2,2016-05-27 16:00:00+05:30,33.77,44.977049
3,2016-05-27 17:00:00+05:30,33.77,43.060083
4,2016-05-27 18:00:00+05:30,33.77,43.902131


In [89]:
merged = pd.merge(BNG_DATA, BNG_PAQS, on='valid', how='right')
merged.head()

Unnamed: 0,valid,tmpf,RH,temp,rh
0,2016-05-27 14:00:00+05:30,30.5,55.265,32.964667,46.251333
1,2016-05-27 15:00:00+05:30,28.5,65.965,33.295862,46.15069
2,2016-05-27 16:00:00+05:30,28.0,69.9,33.77,44.977049
3,2016-05-27 17:00:00+05:30,28.0,69.9,33.77,43.060083
4,2016-05-27 18:00:00+05:30,27.0,74.11,33.77,43.902131


In [90]:
merged = merged.dropna()
merged.head()

Unnamed: 0,valid,tmpf,RH,temp,rh
0,2016-05-27 14:00:00+05:30,30.5,55.265,32.964667,46.251333
1,2016-05-27 15:00:00+05:30,28.5,65.965,33.295862,46.15069
2,2016-05-27 16:00:00+05:30,28.0,69.9,33.77,44.977049
3,2016-05-27 17:00:00+05:30,28.0,69.9,33.77,43.060083
4,2016-05-27 18:00:00+05:30,27.0,74.11,33.77,43.902131


In [91]:
merged['temp_diff'] = merged['temp'].sub(merged['tmpf'], axis=0)
merged['rh_diff'] = merged['rh'].sub(merged['rh'], axis=0)
merged.head()

Unnamed: 0,valid,tmpf,RH,temp,rh,temp_diff,rh_diff
0,2016-05-27 14:00:00+05:30,30.5,55.265,32.964667,46.251333,2.464667,0.0
1,2016-05-27 15:00:00+05:30,28.5,65.965,33.295862,46.15069,4.795862,0.0
2,2016-05-27 16:00:00+05:30,28.0,69.9,33.77,44.977049,5.77,0.0
3,2016-05-27 17:00:00+05:30,28.0,69.9,33.77,43.060083,5.77,0.0
4,2016-05-27 18:00:00+05:30,27.0,74.11,33.77,43.902131,6.77,0.0


In [92]:
MERGE_LOC = DATA_DIR + '10-Aug//MERGED.csv'
merged.to_csv(MERGE_LOC)