In [1]:
import datetime
import sys
sys.path.append('/workspaces/docker_python')
from weather_model.src.data_loader import load_humidity_data, load_temperature_data
import pandas as pd

import numpy as np


In [2]:
# Util functions
def unix_timestamp_decode(timestamp):
    """Takes UNIX time stamp as input and returns a datetime object based on that

    Args:
        timestamp (int): UNIX Time stamp
    """
    timestamp_ms = timestamp
    # Convert milliseconds to seconds
    timestamp_s = timestamp_ms / 1000
    # Convert the timestamp to a datetime object
    datetime_obj = datetime.datetime.utcfromtimestamp(timestamp_s)
    return datetime_obj

def compare_h_t_timestamp(h_timestamp, t_timestamp):
    """To Compare the time stamps of 2 features 

    Args:
        h_timestamp (UNIX Timestamp): Humidity Timestamp
        t_timestamp (Unix Timestamp): Temperature Timestamp
    """
    h_tobject = unix_timestamp_decode(h_timestamp)
    t_tobject = unix_timestamp_decode(t_timestamp)
    
    print(f"Humidity Time stamp {h_tobject}")
    print(f"Temperature Time stamp {t_tobject}")
    
    # Calculate the absolute time difference in seconds - for tolerance purpsoe
    time_difference = abs((h_tobject - t_tobject).total_seconds())

    # Check if they have the same date and time (hour and minute)
    if (h_tobject.date() == t_tobject.date()
        and h_tobject.hour == t_tobject.hour
        and h_tobject.minute == t_tobject.minute
        and time_difference <= 10
        ):
        return True
    else:
        return False

    


In [3]:
# Load data and Give name to our dataframes
humidity = load_humidity_data()
humidity.Name = 'humidity'
temperature = load_temperature_data()
temperature.Name = 'temperature'

In [4]:
# Data Evaluation function
def data_evaluation(data):
    """Function to See the Data Evaluation, sees shape, head and describe

    Args:
        data (dataframe): Pandas Dataframe
    """
    print(f"{data.Name} Shape: {data.shape}")
    print(f"{data.Name} Head:\n{data.head()} ")
    print(f"{data.Name} Summary Statistics:\n{data.describe()}")
    print("\n")
    print(f"{data.Name} Columns:\n{data.columns}")

In [5]:
# Evaluate the data
print("First Evaluation: ")
data_evaluation(humidity)
data_evaluation(temperature)


First Evaluation: 
humidity Shape: (1588, 4)
humidity Head:
       timestamp  humidity context_humidity               date (Asia/Kolkata)
0  1694955053748      92.0               {}  2023-09-17 18:20:53.748000+05:30
1  1694954292122      91.0               {}  2023-09-17 18:08:12.122000+05:30
2  1694935992451      69.0               {}  2023-09-17 13:03:12.451000+05:30
3  1694935611126      70.0               {}  2023-09-17 12:56:51.126000+05:30
4  1694935229794      70.0               {}  2023-09-17 12:50:29.794000+05:30 
humidity Summary Statistics:
          timestamp    humidity
count  1.588000e+03  1588.00000
mean   1.693464e+12    75.52330
std    6.966737e+08    18.93607
min    1.692674e+12     0.00000
25%    1.692877e+12    62.00000
50%    1.693243e+12    80.00000
75%    1.694081e+12    95.00000
max    1.694955e+12    95.00000


humidity Columns:
Index(['timestamp', 'humidity', 'context_humidity', 'date (Asia/Kolkata)'], dtype='object')
temperature Shape: (1652, 4)
temperature H

In [6]:
## Just a trial to see compare time stamp func written by me
if (compare_h_t_timestamp(humidity['timestamp'].iloc[0],
                        temperature['timestamp'].iloc[0])
   ):
    print("Same time stamp")
else:
    print("Different timestamp")

print("\n")


Humidity Time stamp 2023-09-17 12:50:53.748000
Temperature Time stamp 2023-09-17 12:50:52.848000
Same time stamp




In [7]:
%store humidity
%store temperature

Stored 'humidity' (DataFrame)
Stored 'temperature' (DataFrame)


  db[ 'autorestore/' + arg ] = obj
  db[ 'autorestore/' + arg ] = obj
